diff --git a/.archon/scripts/echo-args.js b/.archon/scripts/echo-args.js new file mode 100644 index 0000000000..140a9ae4c9 --- /dev/null +++ b/.archon/scripts/echo-args.js @@ -0,0 +1,3 @@ +// Simple script node test — echoes input as JSON +const input = process.argv[2] ?? 'no-input'; +console.log(JSON.stringify({ echoed: input, timestamp: new Date().toISOString() })); diff --git a/.archon/workflows/defaults/archon-adversarial-dev.yaml b/.archon/workflows/defaults/archon-adversarial-dev.yaml index 2ab207dc03..9b5bdbd4ee 100644 --- a/.archon/workflows/defaults/archon-adversarial-dev.yaml +++ b/.archon/workflows/defaults/archon-adversarial-dev.yaml @@ -18,6 +18,7 @@ model: sonnet nodes: # ─── Phase 1: Planning ─────────────────────────────────────────────── - id: plan + model: opus prompt: | You are a product planning expert. Your job is to take a short user prompt and expand it into a comprehensive product specification. diff --git a/.archon/workflows/defaults/archon-architect.yaml b/.archon/workflows/defaults/archon-architect.yaml index a41a75cd33..e57c74d839 100644 --- a/.archon/workflows/defaults/archon-architect.yaml +++ b/.archon/workflows/defaults/archon-architect.yaml @@ -253,6 +253,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: fix-failures + model: haiku prompt: | Review the validation output below. @@ -298,6 +299,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: create-pr + model: haiku prompt: | Create a pull request for the architectural improvements. diff --git a/.archon/workflows/defaults/archon-create-issue.yaml b/.archon/workflows/defaults/archon-create-issue.yaml index 24d59f8e0c..354bf50396 100644 --- a/.archon/workflows/defaults/archon-create-issue.yaml +++ b/.archon/workflows/defaults/archon-create-issue.yaml @@ -433,6 +433,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: report-failure + model: haiku prompt: | The issue could not be reproduced. Report this to the user with actionable detail. @@ -557,6 +558,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: create-issue + model: haiku prompt: | Create the GitHub issue using the drafted content. diff --git a/.archon/workflows/defaults/archon-fix-github-issue.yaml b/.archon/workflows/defaults/archon-fix-github-issue.yaml index 12ad675de9..bea9a92145 100644 --- a/.archon/workflows/defaults/archon-fix-github-issue.yaml +++ b/.archon/workflows/defaults/archon-fix-github-issue.yaml @@ -26,6 +26,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: extract-issue-number + model: haiku prompt: | Find the GitHub issue number for this request. @@ -149,6 +150,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: create-pr + model: haiku prompt: | Create a draft pull request for the current branch. @@ -169,7 +171,7 @@ nodes: 4. Check if a PR already exists for this branch: `gh pr list --head $(git branch --show-current)` - If PR exists, skip creation and capture its number 5. Look for the project's PR template at `.github/pull_request_template.md`, `.github/PULL_REQUEST_TEMPLATE.md`, or `docs/PULL_REQUEST_TEMPLATE.md`. Read whichever one exists. - 6. Create a DRAFT PR: `gh pr create --draft --base $BASE_BRANCH` + 6. Create a PR: `gh pr create --base $BASE_BRANCH` - Title: concise, imperative mood, under 70 chars - Body: if a PR template was found, fill in **every section** with details from the artifacts. Don't skip sections or leave placeholders. If no template, write a body with summary, changes, validation evidence, and `Fixes #...`. - Link to issue: include `Fixes #...` or `Closes #...` @@ -308,3 +310,81 @@ nodes: command: archon-issue-completion-report depends_on: [simplify] context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 11: MARK PR READY & TRIGGER CI + # ═══════════════════════════════════════════════════════════════ + + - id: prepare-merge + bash: | + PR_NUMBER=$(cat "$ARTIFACTS_DIR/.pr-number" 2>/dev/null || echo "") + if [ -z "$PR_NUMBER" ]; then + echo "No PR number found, skipping auto-merge" + exit 0 + fi + # Mark PR as ready for review (remove draft status) + gh pr ready "$PR_NUMBER" 2>/dev/null || true + # Push an empty commit to re-trigger CI on the final state. + # Prior steps (simplify, self-fix) may have pushed after the last CI run, + # leaving required status checks missing on HEAD. + git commit --allow-empty -m "ci: re-trigger checks for auto-merge" + git push + # Enable auto-merge early — GitHub will merge once CI passes. + # This way, if the watch-and-merge loop exits (max iterations), + # the PR still merges automatically when CI eventually goes green. + gh pr merge "$PR_NUMBER" --squash --auto --delete-branch 2>&1 || echo "Auto-merge not available, will retry in watch loop" + echo "$PR_NUMBER" + depends_on: [report] + + # ═══════════════════════════════════════════════════════════════ + # PHASE 12: WATCH CI, FIX FAILURES, AUTO-MERGE + # ═══════════════════════════════════════════════════════════════ + + - id: watch-and-merge + depends_on: [prepare-merge] + context: fresh + model: haiku + loop: + prompt: | + You are watching CI for a pull request. Auto-merge is already enabled — + GitHub will merge automatically once CI passes. Your job is to fix CI + failures so that auto-merge can proceed. + + ## Setup + + 1. Read the PR number from `$ARTIFACTS_DIR/.pr-number` + 2. If empty, output CI_DONE immediately. + + ## Check CI Status + + Run: `gh pr checks $(cat $ARTIFACTS_DIR/.pr-number) --watch --fail-fast 2>&1 || true` + + This blocks until all checks complete. Then check the exit code / output. + + ## If ALL checks passed + + Auto-merge is already enabled and GitHub will handle the merge. + Output: CI_DONE + + ## If any check FAILED + + 1. Identify which check failed from the output + 2. Get the failure logs: `gh run view --log-failed 2>&1 | tail -80` + 3. Read the relevant source files and fix the issue + 4. Run the project's local validation/test commands to verify your fix + 5. Commit the fix with a descriptive message and push: + ```bash + git add -A + git commit -m "fix: " + git push + ``` + 6. End this iteration normally (do NOT output CI_DONE) — the next iteration will re-check CI + + ## Important + + - Be concise. Focus on fixing the failure, not explaining it. + - Only fix what CI reports as broken — don't refactor or improve unrelated code. + - If the failure is environmental (flaky test, infra issue), retry once before trying to fix code. + until: CI_DONE + max_iterations: 4 + fresh_context: true diff --git a/.archon/workflows/defaults/archon-idea-to-pr.yaml b/.archon/workflows/defaults/archon-idea-to-pr.yaml index 9329c55021..16585d30cc 100644 --- a/.archon/workflows/defaults/archon-idea-to-pr.yaml +++ b/.archon/workflows/defaults/archon-idea-to-pr.yaml @@ -134,3 +134,18 @@ nodes: command: archon-workflow-summary depends_on: [implement-fixes] context: fresh + + # ═══════════════════════════════════════════════════════════════════ + # PHASE 9: AUTO-MERGE + # ═══════════════════════════════════════════════════════════════════ + + - id: auto-merge + bash: | + PR_NUMBER=$(cat "$ARTIFACTS_DIR/.pr-number" 2>/dev/null || echo "") + if [ -z "$PR_NUMBER" ]; then + echo "No PR number found, skipping auto-merge" + exit 0 + fi + gh pr ready "$PR_NUMBER" 2>/dev/null || true + gh pr merge "$PR_NUMBER" --squash --auto --delete-branch 2>&1 || echo "Auto-merge not available, PR left open for manual merge" + depends_on: [workflow-summary] diff --git a/.archon/workflows/defaults/archon-piv-loop.yaml b/.archon/workflows/defaults/archon-piv-loop.yaml index 7227900c2f..880c764ba2 100644 --- a/.archon/workflows/defaults/archon-piv-loop.yaml +++ b/.archon/workflows/defaults/archon-piv-loop.yaml @@ -614,6 +614,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: fix-feedback + model: haiku depends_on: [code-review] loop: prompt: | @@ -734,7 +735,7 @@ nodes: cat .github/pull_request_template.md 2>/dev/null || echo "NO_TEMPLATE" ``` - Create with `gh pr create --draft --base $BASE_BRANCH`: + Create with `gh pr create --base $BASE_BRANCH`: - Title from the plan's feature name - Body summarizing the implementation - Use a HEREDOC for the body diff --git a/.archon/workflows/defaults/archon-pr-maintenance.yaml b/.archon/workflows/defaults/archon-pr-maintenance.yaml new file mode 100644 index 0000000000..cfe9a47983 --- /dev/null +++ b/.archon/workflows/defaults/archon-pr-maintenance.yaml @@ -0,0 +1,171 @@ +name: archon-pr-maintenance +description: | + Use when: A single open PR needs rebasing, conflict resolution, or CI fixing. + Triggers: "fix stale PRs", "maintain PRs", "rebase open PRs", "merge open PRs", + "clean up PRs", "PR maintenance", "fix PR #123". + NOT for: Creating new PRs, reviewing code, fixing specific issues from scratch. + + Picks the highest-priority open PR needing attention (or a specific PR if given), + rebases it, resolves conflicts, fixes CI failures, and enables auto-merge. + Designed to be run on a cron — one PR per run, zero AI cost if nothing to do. + +provider: claude +model: sonnet + +nodes: + # ═══════════════════════════════════════════════════════════════ + # PHASE 1: FIND THE NEXT PR TO FIX (bash only — zero AI cost) + # ═══════════════════════════════════════════════════════════════ + + - id: find-pr + bash: | + # If a specific PR number was passed, use that + REQUESTED=$(echo "$ARGUMENTS" | grep -oE '[0-9]+' | head -1) + + if [ -n "$REQUESTED" ]; then + # Validate the PR exists and is open + STATE=$(gh pr view "$REQUESTED" --json state -q '.state' 2>/dev/null) + if [ "$STATE" = "OPEN" ]; then + echo "$REQUESTED" + exit 0 + else + echo "PR #$REQUESTED is not open (state: $STATE)" >&2 + exit 1 + fi + fi + + # No specific PR requested — find the highest priority actionable one. + # Priority: BEHIND (just needs rebase) > DIRTY (conflicts) > UNSTABLE (CI failing) + # Skip CLEAN (handled by wrapper script) and BLOCKED (can't do anything). + for STATUS in BEHIND DIRTY UNSTABLE UNKNOWN; do + PR=$(gh pr list --state open --json number,mergeStateStatus,isDraft \ + --jq "[.[] | select(.isDraft == false and .mergeStateStatus == \"$STATUS\")] | .[0].number // empty") + if [ -n "$PR" ]; then + echo "$PR" + exit 0 + fi + done + + echo "No PRs need maintenance" >&2 + exit 1 + timeout: 30000 + + # ═══════════════════════════════════════════════════════════════ + # PHASE 2: CHECKOUT & DIAGNOSE (bash only — zero AI cost) + # ═══════════════════════════════════════════════════════════════ + + - id: checkout-and-diagnose + bash: | + PR_NUMBER="$find-pr.output" + + # Checkout the PR branch + gh pr checkout "$PR_NUMBER" + + # Gather diagnostic info + echo "=== PR INFO ===" + gh pr view "$PR_NUMBER" --json number,title,headRefName,baseRefName,mergeable,mergeStateStatus,statusCheckRollup \ + --jq '{number, title, head: .headRefName, base: .baseRefName, mergeable, mergeState: .mergeStateStatus, checks: [.statusCheckRollup[]? | {name, status, conclusion}]}' + + echo "" + echo "=== BEHIND COUNT ===" + BASE=$(gh pr view "$PR_NUMBER" --json baseRefName -q '.baseRefName') + git fetch origin "$BASE" --quiet + BEHIND=$(git rev-list --count HEAD.."origin/$BASE") + echo "$BEHIND commits behind $BASE" + + echo "" + echo "=== FILES CHANGED ===" + gh pr diff "$PR_NUMBER" --name-only 2>/dev/null | head -30 + depends_on: [find-pr] + timeout: 60000 + + # ═══════════════════════════════════════════════════════════════ + # PHASE 3: FIX THE PR (AI — only runs if phases 1-2 succeeded) + # ═══════════════════════════════════════════════════════════════ + + - id: fix-pr + prompt: | + You are a PR maintenance agent. Fix this single PR so it can merge. + + ## PR Diagnostics + + ``` + $checkout-and-diagnose.output + ``` + + ## Instructions + + You are already on the PR's branch. Follow these steps: + + ### Step 1: Rebase onto base branch + + ```bash + BASE=$(gh pr view $find-pr.output --json baseRefName -q '.baseRefName') + git rebase "origin/$BASE" + ``` + + If conflicts arise: + - Check conflicting files: `git diff --name-only --diff-filter=U` + - For each file, read the full file to understand the conflict markers + - Resolve based on intent: + - Both sides added different things → keep both + - One side updated, other didn't → keep the update + - Both changed same lines → understand the PR's intent from the title/diff and resolve accordingly + - Stage each resolved file: `git add ` + - Continue: `git rebase --continue` + - If the rebase has more than 5 conflicting files, abort (`git rebase --abort`) and report failure + + ### Step 2: Validate + + Run the project's validation. Check for these in order and run the first that exists: + ```bash + # Check what package manager / validation commands exist + if [ -f "pubspec.yaml" ]; then + flutter analyze && flutter test + elif [ -f "bun.lockb" ] || [ -f "bunfig.toml" ]; then + bun run type-check 2>/dev/null; bun test 2>/dev/null + elif [ -f "package.json" ]; then + npm test 2>/dev/null + elif [ -f "pyproject.toml" ] || [ -f "requirements.txt" ]; then + python -m pytest 2>/dev/null + elif [ -f "go.mod" ]; then + go test ./... 2>/dev/null + fi + ``` + + If validation fails, fix the issue and re-run. If you can't fix it after one attempt, + proceed anyway — CI will catch it. + + ### Step 3: Push + + ```bash + git push --force-with-lease + ``` + + ### Step 4: Enable auto-merge + + ```bash + gh pr merge $find-pr.output --squash --auto --delete-branch 2>&1 || \ + echo "Auto-merge not available" + ``` + + If auto-merge is not available (no branch protection rules), check CI status + and merge directly once passing: + ```bash + gh pr checks $find-pr.output --watch --fail-fast 2>&1 || true + gh pr merge $find-pr.output --squash --delete-branch 2>&1 || \ + echo "Could not merge — may need manual review" + ``` + + ### Step 5: Report result + + Output a brief summary: PR number, what you did, whether it merged or has auto-merge enabled. + + ## Important + + - Be concise. Fix the problem, don't explain it at length. + - Only fix what's needed for the PR to merge — don't refactor or improve unrelated code. + - If a CI failure is environmental (flaky test, infra issue), push an empty commit to retry. + depends_on: [checkout-and-diagnose] + context: fresh + model: sonnet diff --git a/.archon/workflows/defaults/archon-ralph-dag.yaml b/.archon/workflows/defaults/archon-ralph-dag.yaml index 5c0d7c9099..18be2e5f18 100644 --- a/.archon/workflows/defaults/archon-ralph-dag.yaml +++ b/.archon/workflows/defaults/archon-ralph-dag.yaml @@ -528,7 +528,7 @@ nodes: If no template was found, write a summary with: problem, what changed, stories table, and validation evidence. - 3. **Create a draft PR** using `gh pr create --draft --base $BASE_BRANCH --title "feat: {PRD feature name}"` with the filled-in template as the body. Use a HEREDOC for the body. + 3. **Create a PR** using `gh pr create --base $BASE_BRANCH --title "feat: {PRD feature name}"` with the filled-in template as the body. Use a HEREDOC for the body. 4. **Output completion signal:** ``` diff --git a/.archon/workflows/defaults/archon-refactor-safely.yaml b/.archon/workflows/defaults/archon-refactor-safely.yaml index 56bc96ac36..4cc0723ecf 100644 --- a/.archon/workflows/defaults/archon-refactor-safely.yaml +++ b/.archon/workflows/defaults/archon-refactor-safely.yaml @@ -207,7 +207,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: execute-refactor - model: claude-opus-4-6[1m] + model: sonnet prompt: | You are executing a refactoring plan with strict safety guardrails. @@ -327,6 +327,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: fix-failures + model: haiku prompt: | Review the validation output below. @@ -430,6 +431,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: create-pr + model: haiku prompt: | Create a pull request for the refactoring. diff --git a/.archon/workflows/defaults/archon-security-audit.yaml b/.archon/workflows/defaults/archon-security-audit.yaml new file mode 100644 index 0000000000..ab2d00ab64 --- /dev/null +++ b/.archon/workflows/defaults/archon-security-audit.yaml @@ -0,0 +1,388 @@ +name: archon-security-audit +description: | + Use when: User wants a deep security and privacy audit of the codebase. + Triggers: "security audit", "privacy audit", "security review", "vulnerability scan", + "check for security issues", "owasp review", "pentest the code". + Does: Scans attack surface -> 5 parallel specialized security agents -> synthesize findings -> + auto-fix critical/high issues -> create PR for fixes, issues for the rest. + NOT for: Single-file fixes, feature development, architecture review. + +provider: claude + +nodes: + # ═══════════════════════════════════════════════════════════════ + # PHASE 1: ENUMERATE ATTACK SURFACE + # ═══════════════════════════════════════════════════════════════ + + - id: scan-surface + bash: | + echo "=== PROJECT STRUCTURE ===" + find . -name '*.py' -o -name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.jsx' \ + | grep -v node_modules | grep -v .git | grep -v dist | grep -v __pycache__ | head -100 + + echo "" + echo "=== API ENDPOINTS ===" + # FastAPI/Flask routes + grep -rn '@app\.\(get\|post\|put\|delete\|patch\)\|@router\.\(get\|post\|put\|delete\|patch\)\|app\.route' \ + --include='*.py' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=__pycache__ . 2>/dev/null | head -50 + # Express/Next.js routes + grep -rn 'app\.\(get\|post\|put\|delete\|patch\)\|router\.\(get\|post\|put\|delete\|patch\)\|export.*\(GET\|POST\|PUT\|DELETE\)' \ + --include='*.ts' --include='*.js' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | head -50 + + echo "" + echo "=== AUTHENTICATION / AUTH ===" + grep -rn 'auth\|token\|session\|cookie\|jwt\|oauth\|password\|credential\|api.key\|secret' \ + --include='*.py' --include='*.ts' --include='*.js' --include='*.tsx' --include='*.jsx' \ + --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist --exclude-dir=__pycache__ \ + -il . 2>/dev/null | head -30 + + echo "" + echo "=== DATABASE QUERIES ===" + grep -rn 'execute\|raw_sql\|text(\|\.query\|sql\.\|SELECT\|INSERT\|UPDATE\|DELETE.*FROM' \ + --include='*.py' --include='*.ts' --include='*.js' \ + --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist --exclude-dir=__pycache__ . 2>/dev/null | head -30 + + echo "" + echo "=== USER INPUT HANDLING ===" + grep -rn 'request\.\(form\|args\|json\|body\|query\|params\|files\)\|req\.body\|req\.query\|req\.params\|FormData\|useFormData\|innerHTML\|dangerouslySetInnerHTML' \ + --include='*.py' --include='*.ts' --include='*.js' --include='*.tsx' --include='*.jsx' \ + --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist --exclude-dir=__pycache__ . 2>/dev/null | head -30 + + echo "" + echo "=== FILE OPERATIONS ===" + grep -rn 'open(\|write\|upload\|multer\|FormData\|multipart\|file_path\|os\.path\|shutil' \ + --include='*.py' --include='*.ts' --include='*.js' \ + --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist --exclude-dir=__pycache__ . 2>/dev/null | head -20 + + echo "" + echo "=== ENV / SECRETS ===" + grep -rn 'process\.env\|os\.environ\|os\.getenv\|dotenv\|\.env\|SECRET\|API_KEY\|PASSWORD\|TOKEN' \ + --include='*.py' --include='*.ts' --include='*.js' --include='*.tsx' \ + --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist --exclude-dir=__pycache__ . 2>/dev/null | head -30 + echo "" + echo "=== HARDCODED SECRETS CHECK ===" + grep -rn "sk-\|ghp_\|gho_\|glpat-\|xoxb-\|xoxp-\|AKIA\|password.*=.*['\"]" \ + --include='*.py' --include='*.ts' --include='*.js' --include='*.tsx' --include='*.jsx' \ + --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist --exclude-dir=__pycache__ . 2>/dev/null | head -20 + + echo "" + echo "=== DEPENDENCIES ===" + cat requirements.txt 2>/dev/null || cat backend/requirements.txt 2>/dev/null || true + cat package.json 2>/dev/null | grep -A 999 '"dependencies"' | head -40 || true + + echo "" + echo "=== CORS / HEADERS ===" + grep -rn 'cors\|CORS\|Access-Control\|X-Frame\|Content-Security-Policy\|helmet\|CSP' \ + --include='*.py' --include='*.ts' --include='*.js' \ + --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist --exclude-dir=__pycache__ . 2>/dev/null | head -20 + + echo "" + echo "=== DOCKER / DEPLOY CONFIG ===" + cat Dockerfile 2>/dev/null | head -30 || true + cat docker-compose.yml 2>/dev/null | head -30 || true + timeout: 60000 + + # ═══════════════════════════════════════════════════════════════ + # PHASE 2: PARALLEL SECURITY ANALYSIS (5 specialized agents) + # ═══════════════════════════════════════════════════════════════ + + - id: injection-agent + prompt: | + You are a security specialist focused on **injection vulnerabilities**. + + ## Attack Surface + $scan-surface.output + + ## Scope + Audit the ENTIRE codebase for injection attacks. Read every file that handles user input, database queries, or shell commands. + + ## Checklist + - **SQL injection**: raw queries, string interpolation in SQL, ORM misuse, unparameterized queries + - **Command injection**: subprocess calls with user input, os.system, exec, eval + - **Template injection**: user input in templates, SSTI + - **Path traversal**: user-controlled file paths, directory escapes, file uploads without path sanitization + - **LDAP/NoSQL injection**: if applicable + - **XSS**: reflected, stored, DOM-based — innerHTML, dangerouslySetInnerHTML, unescaped output + - **Header injection**: CRLF in headers, host header attacks + + ## Output + Write findings to $ARTIFACTS_DIR/injection-findings.md with: + - Finding ID (INJ-001, etc.) + - Severity: CRITICAL / HIGH / MEDIUM / LOW + - File and line number + - Description of the vulnerability + - Proof: show the vulnerable code + - Fix: specific code change needed + depends_on: [scan-surface] + context: fresh + denied_tools: [Write, Edit, Bash] + + - id: auth-agent + prompt: | + You are a security specialist focused on **authentication and authorization**. + + ## Attack Surface + $scan-surface.output + + ## Scope + Audit the ENTIRE codebase for auth vulnerabilities. Read every file related to authentication, sessions, tokens, and access control. + + ## Checklist + - **Broken authentication**: weak password policies, no rate limiting on login, credential stuffing + - **Session management**: session fixation, insecure cookies (missing Secure/HttpOnly/SameSite), session timeout + - **JWT issues**: algorithm confusion, missing expiration, weak signing keys, token in URL + - **OAuth issues**: state parameter missing, redirect URI validation, token storage + - **Broken access control**: IDOR, missing authorization checks on endpoints, privilege escalation + - **CSRF**: missing CSRF tokens on state-changing operations, SameSite cookie config + - **API authentication**: missing auth on endpoints, API key exposure, bearer token handling + + ## Output + Write findings to $ARTIFACTS_DIR/auth-findings.md — same format as injection agent (AUTH-001, etc.) + depends_on: [scan-surface] + context: fresh + denied_tools: [Write, Edit, Bash] + + - id: data-privacy-agent + prompt: | + You are a security specialist focused on **data privacy and exposure**. + + ## Attack Surface + $scan-surface.output + + ## Scope + Audit the ENTIRE codebase for data privacy issues. Read every file that handles user data, logging, error responses, or external API calls. + + ## Checklist + - **Sensitive data exposure**: PII in logs, verbose error messages leaking internals, stack traces in production + - **Data at rest**: unencrypted sensitive fields in DB, plaintext passwords, API keys in code + - **Data in transit**: HTTP instead of HTTPS, missing TLS, insecure websocket + - **Secrets management**: hardcoded secrets, secrets in git history, .env in Docker image, exposed in client bundle + - **Third-party data leakage**: sending user data to analytics/CDN without consent, excessive data in API responses + - **Logging**: logging passwords, tokens, PII, credit cards + - **Error handling**: different error messages revealing valid/invalid usernames, timing attacks + - **GDPR/privacy**: data retention, deletion capability, consent mechanisms, right to export + + ## Output + Write findings to $ARTIFACTS_DIR/privacy-findings.md — same format (PRIV-001, etc.) + depends_on: [scan-surface] + context: fresh + denied_tools: [Write, Edit, Bash] + + - id: config-deps-agent + prompt: | + You are a security specialist focused on **configuration and dependency security**. + + ## Attack Surface + $scan-surface.output + + ## Scope + Audit the ENTIRE codebase for configuration and dependency issues. Read all config files, Dockerfiles, CI configs, and dependency manifests. + + ## Checklist + - **Security headers**: missing HSTS, CSP, X-Frame-Options, X-Content-Type-Options, Referrer-Policy + - **CORS misconfiguration**: wildcard origins, credentials with wildcard, overly permissive + - **Docker security**: running as root, unnecessary capabilities, secrets in build layers, outdated base images + - **Dependency vulnerabilities**: check versions against known CVEs, outdated packages with known issues + - **CI/CD security**: secrets in logs, permissive permissions, unsigned artifacts + - **Debug/dev in production**: debug mode enabled, development credentials, verbose logging + - **TLS/SSL**: weak cipher suites, self-signed certs, missing certificate validation + - **Rate limiting**: missing rate limits on auth endpoints, API abuse protection + + ## Output + Write findings to $ARTIFACTS_DIR/config-findings.md — same format (CFG-001, etc.) + depends_on: [scan-surface] + context: fresh + denied_tools: [Write, Edit, Bash] + + - id: logic-agent + prompt: | + You are a security specialist focused on **business logic and application security**. + + ## Attack Surface + $scan-surface.output + + ## Scope + Audit the ENTIRE codebase for business logic vulnerabilities. Read the core application logic, state machines, payment flows, and user-facing operations. + + ## Checklist + - **Race conditions**: TOCTOU, double-submit, concurrent modification without locks + - **Business logic bypass**: skipping steps in multi-step flows, parameter tampering, mass assignment + - **Insecure direct object references**: predictable IDs, missing ownership checks + - **File upload**: unrestricted file types, missing size limits, path traversal in filenames, executable uploads + - **Denial of service**: regex DoS (ReDoS), unbounded queries, missing pagination, resource exhaustion + - **Integer overflow/underflow**: in financial calculations, ratings, scores + - **Insecure randomness**: using Math.random or random for security-sensitive operations + - **Open redirects**: unvalidated redirect URLs + + ## Output + Write findings to $ARTIFACTS_DIR/logic-findings.md — same format (LOGIC-001, etc.) + depends_on: [scan-surface] + context: fresh + denied_tools: [Write, Edit, Bash] + + # ═══════════════════════════════════════════════════════════════ + # PHASE 3: SYNTHESIZE + # ═══════════════════════════════════════════════════════════════ + + - id: synthesize + prompt: | + You are the lead security auditor. Synthesize all findings from the parallel security agents. + + ## Findings + + ### Injection Analysis + $injection-agent.output + + ### Authentication & Authorization + $auth-agent.output + + ### Data Privacy + $data-privacy-agent.output + + ### Configuration & Dependencies + $config-deps-agent.output + + ### Business Logic + $logic-agent.output + + ## Instructions + + 1. Deduplicate findings across agents (same issue found by multiple agents) + 2. Validate findings — check if the agent's assessment is accurate by reading the actual code + 3. Assign final severity: CRITICAL / HIGH / MEDIUM / LOW / FALSE_POSITIVE + 4. Categorize: FIXABLE_NOW (can be fixed in code) vs NEEDS_DISCUSSION (architectural, needs human input) + 5. Prioritize by: severity × exploitability × data impact + + ## Output + + Write to $ARTIFACTS_DIR/security-report.md: + + ### Executive Summary + - Total findings by severity + - Overall security posture assessment + - Top 3 most urgent issues + + ### CRITICAL & HIGH Findings (to fix now) + For each: ID, title, severity, file:line, description, fix + + ### MEDIUM & LOW Findings (to track as issues) + For each: ID, title, severity, file:line, description + + ### False Positives (dismissed) + For each: ID, why it was dismissed + + Also write $ARTIFACTS_DIR/fix-plan.md with specific code changes for all CRITICAL and HIGH findings, ordered by dependency. + depends_on: [injection-agent, auth-agent, data-privacy-agent, config-deps-agent, logic-agent] + trigger_rule: one_success + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 4: FIX CRITICAL & HIGH + # ═══════════════════════════════════════════════════════════════ + + - id: fix-security + prompt: | + You are implementing security fixes for CRITICAL and HIGH severity findings. + + ## Fix Plan + $synthesize.output + + Read $ARTIFACTS_DIR/fix-plan.md for the ordered list of fixes. + + ## Rules + - Fix ONLY CRITICAL and HIGH severity issues + - Each fix must be minimal — do not refactor surrounding code + - After each fix, verify the vulnerability is actually resolved + - If a fix would break functionality, skip it and note why + - Commit each logical fix separately with a descriptive message + + ## Instructions + 1. Read the fix plan + 2. For each fix: read the vulnerable code, apply the fix, verify it works + 3. Run tests after all fixes to ensure nothing is broken + 4. Do a final `git diff --stat` to verify scope + depends_on: [synthesize] + context: fresh + model: sonnet + hooks: + PostToolUse: + - matcher: "Write|Edit" + response: + systemMessage: > + You just applied a security fix. Verify NOW: + 1. Is the vulnerability actually fixed? Re-read the code. + 2. Did you introduce any new issues? + 3. Run relevant tests to confirm nothing broke. + + # ═══════════════════════════════════════════════════════════════ + # PHASE 5: VALIDATE + # ═══════════════════════════════════════════════════════════════ + + - id: validate + bash: | + echo "=== RUNNING PROJECT TESTS ===" + # Try common test runners + if [ -f "pyproject.toml" ] || [ -f "backend/requirements.txt" ]; then + echo "--- Python tests ---" + python -m pytest 2>&1 || pytest 2>&1 || echo "No pytest found" + fi + if [ -f "package.json" ]; then + echo "--- JS/TS tests ---" + npm test 2>&1 || bun run test 2>&1 || echo "No JS tests found" + fi + if [ -f "frontend/package.json" ]; then + echo "--- Frontend tests ---" + cd frontend && npm test 2>&1 || bun run test 2>&1; cd .. + fi + echo "VALIDATION_COMPLETE" + depends_on: [fix-security] + timeout: 300000 + + # ═══════════════════════════════════════════════════════════════ + # PHASE 6: CREATE PR + ISSUES + # ═══════════════════════════════════════════════════════════════ + + - id: create-pr + prompt: | + Create a PR for the security fixes and GitHub issues for remaining findings. + + ## Context + - Security report: $synthesize.output + - Validation: $validate.output + + ## Instructions + + ### Step 1: Create PR for fixes (if any changes were made) + 1. Check `git diff --stat` — if no changes, skip PR creation + 2. Stage and commit all changes if uncommitted + 3. Push: `git push -u origin HEAD` + 4. Create PR: + - Title: "security: fix [N] critical/high vulnerabilities" + - Body: list each fix with the finding ID, what was vulnerable, what was changed + - Include the executive summary from the security report + 5. Save PR number: `echo "$(gh pr view --json number -q '.number')" > $ARTIFACTS_DIR/.pr-number` + 6. Mark ready and enable auto-merge: + ```bash + PR_NUM=$(cat $ARTIFACTS_DIR/.pr-number) + gh pr ready "$PR_NUM" + gh pr merge "$PR_NUM" --squash --auto --delete-branch + ``` + + ### Step 2: Create issues for MEDIUM/LOW findings + Read $ARTIFACTS_DIR/security-report.md. For each MEDIUM or LOW finding that was NOT fixed: + - Create a GitHub issue with label "security" + - Title: "[Security] FINDING_ID: brief description" + - Body: severity, file:line, description, suggested fix + + Do NOT create issues for findings marked as FALSE_POSITIVE. + depends_on: [validate] + context: fresh + model: haiku + hooks: + PreToolUse: + - matcher: "Write|Edit" + response: + hookSpecificOutput: + hookEventName: PreToolUse + permissionDecision: deny + permissionDecisionReason: "PR creation node — do not modify source files." diff --git a/.archon/workflows/defaults/archon-test-audit.yaml b/.archon/workflows/defaults/archon-test-audit.yaml new file mode 100644 index 0000000000..36e25dc44c --- /dev/null +++ b/.archon/workflows/defaults/archon-test-audit.yaml @@ -0,0 +1,357 @@ +name: archon-test-audit +description: | + Use when: User wants to improve test coverage, fix flaky tests, or audit test quality. + Triggers: "test audit", "improve coverage", "fix flaky tests", "test coverage", + "add tests", "test quality", "stability audit", "robustness". + Does: Measures current coverage -> identifies critical gaps -> fixes flaky tests -> + writes new tests for uncovered code -> validates -> creates PR. + NOT for: Writing tests for a specific feature (use archon-fix-github-issue), + PR-scoped test review (that's the test-coverage-agent in review workflows). + +provider: claude + +nodes: + # ═══════════════════════════════════════════════════════════════ + # PHASE 1: MEASURE CURRENT STATE + # ═══════════════════════════════════════════════════════════════ + + - id: measure-coverage + bash: | + echo "=== PROJECT DETECTION ===" + # Detect project type + HAS_PYTHON=false + HAS_NODE=false + [ -f "requirements.txt" ] || [ -f "backend/requirements.txt" ] || [ -f "pyproject.toml" ] && HAS_PYTHON=true + [ -f "package.json" ] || [ -f "frontend/package.json" ] && HAS_NODE=true + echo "Python: $HAS_PYTHON" + echo "Node: $HAS_NODE" + + echo "" + echo "=== PYTHON COVERAGE ===" + if [ "$HAS_PYTHON" = "true" ]; then + pip install pytest-cov 2>/dev/null || true + # Try common pytest locations + if [ -f "backend/requirements.txt" ]; then + python -m pytest backend/tests/ --cov=backend --cov-report=term-missing --tb=no -q 2>&1 || echo "pytest failed" + elif [ -f "pyproject.toml" ]; then + python -m pytest --cov=. --cov-report=term-missing --tb=no -q 2>&1 || echo "pytest failed" + else + python -m pytest --cov=. --cov-report=term-missing --tb=no -q 2>&1 || echo "pytest failed" + fi + else + echo "No Python project detected" + fi + + echo "" + echo "=== NODE COVERAGE ===" + if [ "$HAS_NODE" = "true" ]; then + # Try running coverage from root or frontend + if [ -f "package.json" ]; then + npx vitest run --coverage --reporter=verbose 2>&1 | tail -60 || \ + npm test -- --coverage 2>&1 | tail -60 || \ + echo "Node tests failed at root" + fi + if [ -f "frontend/package.json" ]; then + echo "--- Frontend coverage ---" + cd frontend + npx vitest run --coverage --reporter=verbose 2>&1 | tail -60 || \ + npm test -- --coverage 2>&1 | tail -60 || \ + echo "Frontend tests failed" + cd .. + fi + else + echo "No Node project detected" + fi + + echo "" + echo "=== TEST FILE INVENTORY ===" + echo "Python test files:" + find . -name 'test_*.py' -o -name '*_test.py' | grep -v node_modules | grep -v .git | sort + echo "" + echo "JS/TS test files:" + find . -name '*.test.*' -o -name '*.spec.*' | grep -v node_modules | grep -v .git | sort + + echo "" + echo "=== SOURCE FILES WITHOUT TESTS ===" + echo "Python source files without corresponding tests:" + for f in $(find . -name '*.py' -not -name 'test_*' -not -name '*_test.py' -not -name '__init__.py' -not -path '*/tests/*' -not -path '*/migrations/*' -not -path '*/.git/*' -not -path '*/node_modules/*' | sort); do + base=$(basename "$f" .py) + if ! find . -name "test_${base}.py" -o -name "${base}_test.py" 2>/dev/null | grep -q .; then + echo " UNTESTED: $f" + fi + done + + echo "" + echo "JS/TS source files without corresponding tests:" + for f in $(find . -name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.jsx' | grep -v node_modules | grep -v .git | grep -v dist | grep -v '.test.' | grep -v '.spec.' | grep -v '__tests__' | sort); do + base=$(basename "$f" | sed 's/\.\(ts\|tsx\|js\|jsx\)$//') + if ! find . -name "${base}.test.*" -o -name "${base}.spec.*" 2>/dev/null | grep -v node_modules | grep -q .; then + echo " UNTESTED: $f" + fi + done + + echo "" + echo "=== FLAKY TEST INDICATORS ===" + echo "Skipped tests:" + grep -rn '@pytest.mark.skip\|pytest.skip\|@skip\|xit(\|xdescribe(\|test.todo\|test.skip\|\.skip(' \ + --include='*.py' --include='*.ts' --include='*.js' --include='*.tsx' --include='*.jsx' \ + --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | head -20 + + echo "" + echo "Retry/flaky patterns:" + grep -rn 'flaky\|retry\|eventually\|wait_for\|sleep.*assert\|xfail' \ + --include='*.py' --include='*.ts' --include='*.js' --include='*.tsx' --include='*.jsx' \ + --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | head -20 + timeout: 300000 + + # ═══════════════════════════════════════════════════════════════ + # PHASE 2: ANALYZE GAPS (parallel agents) + # ═══════════════════════════════════════════════════════════════ + + - id: coverage-gap-agent + prompt: | + You are a test coverage specialist. Analyze the coverage report and identify the most critical gaps. + + ## Coverage Data + $measure-coverage.output + + ## Focus: $ARGUMENTS + + ## Instructions + + 1. Read the coverage report — identify files/functions with lowest coverage + 2. For each untested source file, read it and assess criticality: + - Business logic (payment, auth, core algorithms) = CRITICAL + - API endpoints handling user input = HIGH + - Data transformations, validators = MEDIUM + - Utilities, formatters, config = LOW + 3. For files with partial coverage, read them and identify which branches/paths are untested + 4. Prioritize: what would catch the most bugs if tested? + + ## Output + + Write to $ARTIFACTS_DIR/coverage-gaps.md: + - Total coverage percentage (if available) + - Top 10 most critical untested code paths, ordered by risk + - For each: file, function/method, why it's critical, what test to write + - Estimated number of tests needed to reach next coverage milestone + depends_on: [measure-coverage] + context: fresh + model: sonnet + denied_tools: [Write, Edit, Bash] + + - id: flaky-test-agent + prompt: | + You are a test stability specialist. Find and diagnose flaky, slow, or unreliable tests. + + ## Test Data + $measure-coverage.output + + ## Instructions + + 1. Read every skipped/xfail test and determine: is it actually broken, or just neglected? + 2. Look for common flaky patterns: + - Time-dependent tests (sleep, wall clock, timeouts) + - Order-dependent tests (shared state, missing teardown) + - Network-dependent tests (external APIs without mocking) + - Race conditions (async without proper awaiting) + - Hardcoded paths or ports + 3. Read the test files and identify tests that are: + - Slow (>5s) — check for unnecessary sleeps, full DB rebuilds + - Brittle — testing implementation details instead of behavior + - Missing assertions — tests that always pass + + ## Output + + Write to $ARTIFACTS_DIR/flaky-tests.md: + - List of skipped tests with recommendation: fix, delete, or keep skipped + - List of flaky patterns found with specific file:line references + - List of slow tests with optimization suggestions + - List of tests with weak/missing assertions + depends_on: [measure-coverage] + context: fresh + model: sonnet + denied_tools: [Write, Edit, Bash] + + # ═══════════════════════════════════════════════════════════════ + # PHASE 3: PLAN + # ═══════════════════════════════════════════════════════════════ + + - id: plan + prompt: | + You are planning test improvements. Synthesize the coverage analysis and flaky test findings. + + ## Coverage Gaps + $coverage-gap-agent.output + + ## Flaky Tests + $flaky-test-agent.output + + ## Principles + + - Fix flaky tests FIRST — unreliable tests are worse than no tests + - Delete tests that can never pass or test nothing + - Write tests for critical business logic before edge cases + - Each new test must test BEHAVIOR, not implementation + - Don't mock what you don't own — use fakes/fixtures for external dependencies + - Keep tests fast — mock/stub slow dependencies (network, disk) + - Max scope: 15 test changes per PR to keep reviews manageable + + ## Instructions + + 1. From both reports, select up to 15 highest-impact improvements: + - Fix/delete flaky tests (up to 5) + - Write new tests for critical gaps (up to 10) + 2. For each: specify file, what to test, expected assertions, any fixtures needed + 3. Order by: flaky fixes first, then critical coverage gaps, then medium gaps + + ## Output + + Numbered plan. Be specific — the implement node will follow this literally. + depends_on: [coverage-gap-agent, flaky-test-agent] + context: fresh + allowed_tools: [Read] + + # ═══════════════════════════════════════════════════════════════ + # PHASE 4: IMPLEMENT + # ═══════════════════════════════════════════════════════════════ + + - id: implement + prompt: | + You are implementing test improvements. + + ## Plan + $plan.output + + ## Rules + + - Follow the plan exactly — do not add extra tests beyond the plan + - Each test must have clear arrange/act/assert structure + - Use the project's existing test patterns and fixtures — read existing tests first + - Every test must have a descriptive name that explains what behavior it verifies + - After writing each test, run it individually to verify it passes + - If a test requires new fixtures or mocks, create them in the appropriate conftest/setup file + - Commit each logical group of tests separately + + ## Instructions + + 1. Read existing test files to understand patterns, fixtures, and conventions + 2. Work through the plan items in order + 3. For flaky test fixes: read the test, diagnose, fix, verify it's stable + 4. For new tests: write, run, verify passing + 5. After all changes, run the full test suite to ensure nothing regressed + depends_on: [plan] + context: fresh + hooks: + PostToolUse: + - matcher: "Write|Edit" + response: + systemMessage: > + You just modified a test file. Run the specific test NOW to verify it passes. + Do not batch — verify each test immediately after writing it. + + # ═══════════════════════════════════════════════════════════════ + # PHASE 5: VALIDATE + # ═══════════════════════════════════════════════════════════════ + + - id: validate + bash: | + echo "=== RUNNING FULL TEST SUITE ===" + PASS=true + + if [ -f "backend/requirements.txt" ] || [ -f "pyproject.toml" ]; then + echo "--- Python tests ---" + if ! python -m pytest -v 2>&1; then + PASS=false + fi + fi + + if [ -f "package.json" ]; then + echo "--- Root JS/TS tests ---" + if ! npx vitest run 2>&1 && ! npm test 2>&1; then + PASS=false + fi + fi + + if [ -f "frontend/package.json" ]; then + echo "--- Frontend tests ---" + cd frontend + if ! npx vitest run 2>&1 && ! npm test 2>&1; then + PASS=false + fi + cd .. + fi + + if [ "$PASS" = "true" ]; then + echo "VALIDATION_STATUS: PASS" + else + echo "VALIDATION_STATUS: FAIL" + fi + depends_on: [implement] + timeout: 300000 + + # ═══════════════════════════════════════════════════════════════ + # PHASE 6: FIX FAILURES + # ═══════════════════════════════════════════════════════════════ + + - id: fix-failures + prompt: | + Review the validation output below. + + ## Validation Output + $validate.output + + ## Instructions + + If "VALIDATION_STATUS: PASS", respond with "All tests pass" and stop. + + If there are failures: + 1. Read the failing test output carefully + 2. Fix ONLY the failing tests — do not modify passing tests + 3. If a new test is fundamentally wrong (bad assumption), delete it rather than forcing it to pass + 4. Run the failing test after each fix to verify + 5. After all fixes, run the full test suite + depends_on: [validate] + context: fresh + model: haiku + + # ═══════════════════════════════════════════════════════════════ + # PHASE 7: CREATE PR + # ═══════════════════════════════════════════════════════════════ + + - id: create-pr + prompt: | + Create a PR for the test improvements. + + ## Context + - Coverage gaps: $coverage-gap-agent.output + - Flaky tests: $flaky-test-agent.output + - Plan: $plan.output + - Validation: $validate.output + + ## Instructions + + 1. Check `git diff --stat` — if no changes, skip PR creation + 2. Stage and commit all changes if uncommitted + 3. Push: `git push -u origin HEAD` + 4. Create PR: + - Title: "test: improve coverage and fix flaky tests" + - Body: summary of what was added/fixed, coverage before/after if available + 5. Save PR number: `echo "$(gh pr view --json number -q '.number')" > $ARTIFACTS_DIR/.pr-number` + 6. Mark ready and auto-merge: + ```bash + PR_NUM=$(cat $ARTIFACTS_DIR/.pr-number) + gh pr ready "$PR_NUM" + gh pr merge "$PR_NUM" --squash --auto --delete-branch + ``` + depends_on: [fix-failures] + context: fresh + model: haiku + hooks: + PreToolUse: + - matcher: "Write|Edit" + response: + hookSpecificOutput: + hookEventName: PreToolUse + permissionDecision: deny + permissionDecisionReason: "PR creation node — do not modify source files." diff --git a/.archon/workflows/defaults/archon-test-loop-dag.yaml b/.archon/workflows/defaults/archon-test-loop-dag.yaml index 99f6ee896f..d554653142 100644 --- a/.archon/workflows/defaults/archon-test-loop-dag.yaml +++ b/.archon/workflows/defaults/archon-test-loop-dag.yaml @@ -12,6 +12,7 @@ nodes: echo "Counter initialized to 0" - id: loop-counter + model: haiku depends_on: [setup] loop: prompt: | @@ -42,6 +43,7 @@ nodes: fresh_context: false - id: report + model: haiku depends_on: [loop-counter] prompt: | The loop counter test has completed. The loop node output was: diff --git a/.archon/workflows/defaults/archon-workflow-builder.yaml b/.archon/workflows/defaults/archon-workflow-builder.yaml index a311b8d970..877c7a9f43 100644 --- a/.archon/workflows/defaults/archon-workflow-builder.yaml +++ b/.archon/workflows/defaults/archon-workflow-builder.yaml @@ -202,6 +202,7 @@ nodes: depends_on: [generate-yaml] - id: save-or-report + model: haiku prompt: | You are a workflow installer. Save the generated workflow and report to the user. diff --git a/.archon/workflows/e2e-all-nodes.yaml b/.archon/workflows/e2e-all-nodes.yaml new file mode 100644 index 0000000000..a3962b9740 --- /dev/null +++ b/.archon/workflows/e2e-all-nodes.yaml @@ -0,0 +1,51 @@ +# E2E smoke test — all node types +# Verifies: bash, prompt, script, structured output, model override, $nodeId.output refs +name: e2e-all-nodes +description: "Comprehensive E2E test exercising bash, prompt, script, and structured output nodes." +provider: claude + +nodes: + # 1. Bash node — no AI, runs shell, stdout captured as output + - id: bash-check + bash: "echo '{\"status\":\"ok\",\"cwd\":\"'$(pwd)'\"}'" + + # 2. Prompt node — simple AI call, verifies sendQuery works + - id: prompt-simple + prompt: "The bash node returned: $bash-check.output — confirm you received it by saying 'received'. Say nothing else." + depends_on: [bash-check] + + # 3. Prompt with model override — verifies model selection + - id: prompt-haiku + prompt: "Say 'haiku-ok' and nothing else." + model: haiku + depends_on: [bash-check] + + # 4. Structured output node — verifies output_format translation + - id: structured + prompt: "Classify the text 'hello world' as either 'greeting' or 'math'." + output_format: + type: object + properties: + category: + type: string + enum: ["greeting", "math"] + required: ["category"] + additionalProperties: false + depends_on: [prompt-simple] + + # 5. Bash node using $nodeId.output from structured node + - id: bash-read-output + bash: "echo 'Structured output category: $structured.output'" + depends_on: [structured] + + # 6. Script node (bun runtime) — verifies script execution + - id: script-echo + script: echo-args + runtime: bun + depends_on: [bash-check] + + # 7. Prompt with effort control — verifies effort passes through to SDK + - id: prompt-effort + prompt: "Say 'effort-ok' and nothing else." + effort: low + depends_on: [bash-check] diff --git a/.archon/workflows/e2e-claude-smoke.yaml b/.archon/workflows/e2e-claude-smoke.yaml new file mode 100644 index 0000000000..e4b0f776a4 --- /dev/null +++ b/.archon/workflows/e2e-claude-smoke.yaml @@ -0,0 +1,23 @@ +# E2E smoke test — Claude provider +# Verifies: provider selection, sendQuery, structured output, tool use +name: e2e-claude-smoke +description: "E2E smoke test for Claude provider. Runs a simple prompt + structured output node." +provider: claude + +nodes: + - id: simple + prompt: "What is 2+2? Answer with just the number, nothing else." + + - id: structured + prompt: "Classify this input as 'math' or 'text': '2+2=4'" + output_format: + type: object + properties: + category: + type: string + enum: ["math", "text"] + depends_on: [simple] + + - id: tool-use + prompt: "Read the file packages/providers/package.json and tell me the package name. Answer with just the name." + depends_on: [simple] diff --git a/.archon/workflows/e2e-codex-smoke.yaml b/.archon/workflows/e2e-codex-smoke.yaml new file mode 100644 index 0000000000..6650f92215 --- /dev/null +++ b/.archon/workflows/e2e-codex-smoke.yaml @@ -0,0 +1,21 @@ +# E2E smoke test — Codex provider +# Verifies: provider selection, sendQuery, structured output +name: e2e-codex-smoke +description: "E2E smoke test for Codex provider. Runs a simple prompt + structured output node." +provider: codex + +nodes: + - id: simple + prompt: "What is 2+2? Answer with just the number, nothing else." + + - id: structured + prompt: "Classify this input as 'math' or 'text': '2+2=4'. Return JSON only." + output_format: + type: object + properties: + category: + type: string + enum: ["math", "text"] + required: ["category"] + additionalProperties: false + depends_on: [simple] diff --git a/.archon/workflows/e2e-mixed-providers.yaml b/.archon/workflows/e2e-mixed-providers.yaml new file mode 100644 index 0000000000..6922056e50 --- /dev/null +++ b/.archon/workflows/e2e-mixed-providers.yaml @@ -0,0 +1,27 @@ +# E2E smoke test — mixed providers (Claude + Codex in same workflow) +# Verifies: per-node provider override, cross-provider $nodeId.output refs +name: e2e-mixed-providers +description: "Tests Claude and Codex providers in the same workflow with cross-provider output refs." + +# Default provider is claude +provider: claude + +nodes: + # 1. Claude node — default provider + - id: claude-node + prompt: "Say 'claude-ok' and nothing else." + + # 2. Codex node — provider override + - id: codex-node + prompt: "Say 'codex-ok' and nothing else." + provider: codex + + # 3. Claude node reads Codex output — cross-provider ref + - id: claude-reads-codex + prompt: "The codex node said: '$codex-node.output'. Confirm you received it by saying 'cross-provider-ok'. Say nothing else." + depends_on: [codex-node] + + # 4. Bash node verifies both outputs + - id: verify + bash: "echo 'claude=$claude-node.output codex=$codex-node.output cross=$claude-reads-codex.output'" + depends_on: [claude-node, codex-node, claude-reads-codex] diff --git a/.claude/commands/plan-feature.md b/.claude/commands/plan-feature.md index d4562e0f84..c3a12c4eab 100644 --- a/.claude/commands/plan-feature.md +++ b/.claude/commands/plan-feature.md @@ -23,7 +23,7 @@ Restate the feature request in your own words. Identify: 3. **Scope boundaries** — What is explicitly in scope vs. out of scope? 4. **Package impact** — Which of the 8 packages are affected? (`paths`, `git`, `isolation`, `workflows`, `core`, `adapters`, `server`, `web`) -5. **Interface changes** — Does this touch `IPlatformAdapter`, `IAssistantClient`, +5. **Interface changes** — Does this touch `IPlatformAdapter`, `IAgentProvider`, `IDatabase`, or `IWorkflowStore`? New interfaces needed? --- @@ -85,7 +85,7 @@ Before writing tasks, reason through: **Interface design:** - Prefer extending existing narrow interfaces over creating fat ones. - New interface methods only if they have a concrete current caller. -- Avoid adding methods to `IPlatformAdapter` or `IAssistantClient` unless essential. +- Avoid adding methods to `IPlatformAdapter` or `IAgentProvider` unless essential. **Test isolation strategy:** - `mock.module()` is process-global and permanent in Bun — plan test file placement carefully. diff --git a/.claude/commands/prime-backend.md b/.claude/commands/prime-backend.md index e2ff9dafee..7c34a3bee7 100644 --- a/.claude/commands/prime-backend.md +++ b/.claude/commands/prime-backend.md @@ -39,11 +39,11 @@ Read `packages/core/src/state/session-transitions.ts` in full — `TransitionTri ### 5. Understand AI Client Patterns -List clients: -!`ls packages/core/src/clients/` +List providers: +!`ls packages/core/src/providers/` -Read `packages/core/src/clients/factory.ts` for provider selection logic. -Read `packages/core/src/clients/claude.ts` first 50 lines — `IAssistantClient` implementation +Read `packages/core/src/providers/factory.ts` for provider selection logic. +Read `packages/core/src/providers/claude.ts` first 50 lines — `IAgentProvider` implementation with streaming event loop pattern. ### 6. Understand Database Layer @@ -52,7 +52,7 @@ List DB modules: !`ls packages/core/src/db/` Read `packages/core/src/types/index.ts` (or the main types file) first 60 lines for key -interfaces: `IPlatformAdapter`, `IAssistantClient`, `Conversation`, `Session`. +interfaces: `IPlatformAdapter`, `IAgentProvider`, `Conversation`, `Session`. ### 7. Understand the Server @@ -81,9 +81,9 @@ Summarize (under 250 words): - `TransitionTrigger` values and their behaviors - Only `plan-to-execute` immediately creates a new session; others deactivate first -### AI Clients -- `ClaudeClient` (claude-agent-sdk) and `CodexClient` (codex-sdk) -- `IAssistantClient` streaming pattern: `for await (const event of events)` +### AI Providers +- `ClaudeProvider` (claude-agent-sdk) and `CodexProvider` (codex-sdk) +- `IAgentProvider` streaming pattern: `for await (const event of events)` ### Key Database Tables - conversations, sessions, codebases, isolation_environments, workflow_runs, workflow_events, messages diff --git a/.claude/commands/prime-workflows.md b/.claude/commands/prime-workflows.md index 25509de48f..464d8f2e67 100644 --- a/.claude/commands/prime-workflows.md +++ b/.claude/commands/prime-workflows.md @@ -51,7 +51,7 @@ bridges these to SSE via `WorkflowEventBridge`. ### 7. Understand Dependency Injection Read `packages/workflows/src/deps.ts` — `WorkflowDeps` type: `IWorkflowPlatform`, -`IWorkflowAssistantClient`, `IWorkflowStore` injected at runtime. No direct DB or AI imports +`IWorkflowAgentProvider`, `IWorkflowStore` injected at runtime. No direct DB or AI imports inside this package. ### 8. See What Workflows Are Available diff --git a/.claude/commands/prime.md b/.claude/commands/prime.md index 50e5f45b4c..0a70ebe35f 100644 --- a/.claude/commands/prime.md +++ b/.claude/commands/prime.md @@ -64,8 +64,8 @@ Provide a concise summary (under 300 words) covering: ### Architecture - Package dependency order and each package's responsibility -- Key interfaces: `IPlatformAdapter`, `IAssistantClient`, `IDatabase`, `IWorkflowStore` -- Message flow: platform adapter → orchestrator-agent → command handler OR AI client +- Key interfaces: `IPlatformAdapter`, `IAgentProvider`, `IDatabase`, `IWorkflowStore` +- Message flow: platform adapter → orchestrator-agent → command handler OR AI provider - Workflow execution: `discoverWorkflows` → router → `executeWorkflow` (steps / loop / DAG) ### Current State diff --git a/.claude/commands/validate.md b/.claude/commands/validate.md index 7e86a0dae4..658bc00def 100644 --- a/.claude/commands/validate.md +++ b/.claude/commands/validate.md @@ -21,7 +21,7 @@ Runs `tsc --noEmit` across all 8 packages via `bun --filter '*' type-check`. **What to look for:** - Missing return types (explicit return types required on all functions) -- Incorrect interface implementations (`IPlatformAdapter`, `IAssistantClient`, etc.) +- Incorrect interface implementations (`IPlatformAdapter`, `IAgentProvider`, etc.) - Import type errors (use `import type` for type-only imports) - Package boundary violations (e.g., `@archon/workflows` importing from `@archon/core`) diff --git a/.claude/docs/architecture-deep-dive.md b/.claude/docs/architecture-deep-dive.md index f5126d6fb4..d5e542b59b 100644 --- a/.claude/docs/architecture-deep-dive.md +++ b/.claude/docs/architecture-deep-dive.md @@ -33,7 +33,7 @@ Slack event → Otherwise → buildOrchestratorPrompt() (prompt-builder.ts:116) → Prompt includes: registered projects, discovered workflows, /invoke-workflow format → sessionDb.getActiveSession() → transitionSession('first-message') if none (orchestrator-agent.ts:462) - → getAssistantClient(conversation.ai_assistant_type) (orchestrator-agent.ts:470) + → getAgentProvider(conversation.ai_assistant_type) (orchestrator-agent.ts:470) → cwd = getArchonWorkspacesPath() (orchestrator-agent.ts:458) → handleBatchMode() or handleStreamMode() based on getStreamingMode() @@ -313,7 +313,7 @@ Narrows `IPlatformAdapter` to `WebAdapter` for web-specific methods: `setConvers | Message entry | `adapters/src/chat/slack/adapter.ts`, `server/src/index.ts` | | Orchestration | `core/src/orchestrator/orchestrator-agent.ts`, `core/src/orchestrator/orchestrator.ts` | | Locking | `core/src/utils/conversation-lock.ts` | -| AI clients | `core/src/clients/claude.ts`, `core/src/clients/factory.ts` | +| AI providers | `core/src/providers/claude.ts`, `core/src/providers/factory.ts` | | Commands | `core/src/handlers/command-handler.ts` | | Sessions | `core/src/db/sessions.ts`, `core/src/state/session-transitions.ts` | | Workflows | `workflows/src/executor.ts`, `workflows/src/dag-executor.ts`, `workflows/src/loader.ts` | diff --git a/.claude/rules/adapters.md b/.claude/rules/adapters.md deleted file mode 100644 index d49e683378..0000000000 --- a/.claude/rules/adapters.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -paths: - - "packages/adapters/**/*.ts" ---- - -# Adapters Conventions - -## Key Patterns - -- **Auth is inside adapters** — every adapter checks authorization before calling `onMessage()`. Silent rejection (no error response), log with masked user ID: `userId.slice(0, 4) + '***'`. -- **Whitelist parsing in constructor** — parse env var (`SLACK_ALLOWED_USER_IDS`, `TELEGRAM_ALLOWED_USER_IDS`, `GITHUB_ALLOWED_USERS`) using a co-located `parseAllowedUserIds()` / `parseAllowedUsers()` function. Empty list = open access. -- **Lazy logger pattern** — ALL adapter files use a module-level `cachedLog` + `getLog()` getter so test mocks intercept `createLogger` before the logger is instantiated. Never initialize logger at module scope. -- **Two handler patterns** (both valid): - - **Chat adapters** (Slack, Telegram, Discord): `onMessage(handler)` — adapter owns the event loop (polling/WebSocket), fires registered callback. Lock manager lives in the server's callback closure. Errors handled by caller via `createMessageErrorHandler`. - - **Forge adapters** (GitHub, Gitea): `handleWebhook(payload, signature)` — server HTTP route calls directly, returns 200 immediately. Full pipeline inside adapter (signature verification, repo cloning, command loading, context building). Lock manager injected in constructor. Errors caught internally and posted to issue/PR. -- **Message splitting** — use shared `splitIntoParagraphChunks(message, maxLength)` from `../../utils/message-splitting`. Two-pass: paragraph breaks first, then line breaks. Limits: Slack 12000, Telegram 4096, GitHub 65000. -- **`ensureThread()` is often a no-op** — Slack returns the same ID (already encoded as `channel:ts`), Telegram has no threads, GitHub issues are inherently threaded. - -## Conversation ID Formats - -| Platform | Format | Example | -|----------|--------|---------| -| Slack | `channel:thread_ts` | `C123ABC:1234567890.123456` | -| Telegram | numeric chat ID as string | `"1234567890"` | -| GitHub | `owner/repo#number` | `"acme/api#42"` | -| Web | user-provided string | `"my-chat"` | -| Discord | channel ID string | `"987654321098765432"` | - -## Architecture - -- All chat adapters implement `IPlatformAdapter` from `@archon/core` -- GitHub adapter is webhook-based (no polling); Slack/Telegram/Discord use polling -- GitHub adapter holds its own `ConversationLockManager` (injected in constructor) -- Slack conversation ID encodes both channel and thread: `sendMessage()` splits on `:` to extract `thread_ts` -- GitHub adapter adds `` marker to prevent self-triggering loops -- GitHub only responds to `issue_comment.created` events — NOT `issues.opened` / `pull_request.opened` (descriptions contain documentation, not commands; see #96) - -## Anti-patterns - -- Never put auth logic outside the adapter (no auth middleware in server routes) -- Never throw from `onMessage` handlers; errors surface to the caller -- Never call `sendMessage()` with a raw token or credential string in the message -- Never use the generic `exec` — always use `execFileAsync` for subprocess calls -- Never add a new adapter method to `IPlatformAdapter` unless ALL adapters need it; use optional methods (`sendStructuredEvent?`) for platform-specific capabilities diff --git a/.claude/rules/cli.md b/.claude/rules/cli.md deleted file mode 100644 index a954b6bd18..0000000000 --- a/.claude/rules/cli.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -paths: - - "packages/cli/**/*.ts" ---- - -# CLI Conventions - -## Commands - -```bash -# Workflow commands (require git repo) -bun run cli workflow list [--json] -bun run cli workflow run [message] [--branch ] [--from-branch ] [--no-worktree] [--resume] -bun run cli workflow status [runId] - -# Isolation commands -bun run cli isolation list -bun run cli isolation cleanup [days] # default: 7 days -bun run cli isolation cleanup --merged # removes merged branches + remote refs -bun run cli complete [--force] # full lifecycle: worktree + local/remote branches - -# Interactive -bun run cli chat [--cwd ] - -# Setup -bun run cli setup -bun run cli version -``` - -## Startup Behavior - -1. `@archon/paths/strip-cwd-env-boot` (first import) removes all Bun-auto-loaded CWD `.env` keys from `process.env` -2. Loads `~/.archon/.env` with `override: true` (Archon config wins over shell-inherited vars) -3. Smart Claude auth default: if no `CLAUDE_API_KEY` or `CLAUDE_CODE_OAUTH_TOKEN`, sets `CLAUDE_USE_GLOBAL_AUTH=true` -4. Imports all commands AFTER dotenv setup - -## WorkflowRunOptions Interface - -```typescript -interface WorkflowRunOptions { - branchName?: string; // Explicit branch name for the worktree - fromBranch?: string; // Override base branch (start-point for worktree) - noWorktree?: boolean; // Opt out of isolation, run in live checkout - resume?: boolean; // Reuse worktree from last failed run -} -``` - -**Default behavior**: Creates worktree with auto-generated branch name (`archon/task-{workflow}-{timestamp}`). - -**Mutually exclusive** (enforced in both `cli.ts` pre-flight and `workflowRunCommand`): -- `--branch` + `--no-worktree` -- `--from` + `--no-worktree` -- `--resume` + `--branch` - -- `--branch feature-auth` → creates/reuses worktree for that branch -- (no flags) → creates worktree with auto-generated `archon/task-*` branch (isolation by default) -- `--no-worktree` → runs directly in live checkout (opt-out of isolation) -- `--from dev` → overrides the start-point for new worktree (works with or without `--branch`) -- `--resume` → resumes last run for this conversation (mutually exclusive with `--branch`) - -## Git Repo Requirement - -Workflow and isolation commands resolve CWD to the git repo root. Run from within a git repository (subdirectories work). The CLI calls `git rev-parse --show-toplevel` to find the root. - -## Conversation ID Format - -CLI generates: `cli-{timestamp}-{random6}` (e.g., `cli-1703123456789-a7f3bc`) - -## Port Allocation - -Worktree-aware: same hash-based algorithm as server (3190–4089 range). Running `bun dev` in a worktree auto-allocates a unique port. Same worktree always gets same port. - -## CLIAdapter - -The `CLIAdapter` implements `IPlatformAdapter`. It streams output to stdout. `getStreamingMode()` defaults to `'batch'` (configurable via constructor options). No auth needed — CLI is local only. - -## Architecture - -- `@archon/cli` depends on `@archon/core`, `@archon/workflows`, `@archon/git`, `@archon/isolation`, `@archon/paths` -- Uses `createWorkflowDeps()` from `@archon/core/workflows/store-adapter` to build workflow deps -- Database shared with server (same `~/.archon/archon.db` or `DATABASE_URL`) -- Conversation lifecycle: create → run workflow → persist messages (same DB as web UI) - -## Anti-patterns - -- Never run CLI commands without being inside a git repository (workflow/isolation commands will fail) -- Never set `DATABASE_URL` in `~/.archon/.env` to point at a target app's database -- Never use `--force` on `complete` unless branch is truly safe to delete (skips uncommitted check) -- Never add interactive prompts inside CLI commands — use flags for all options (non-interactive tool) diff --git a/.claude/rules/database.md b/.claude/rules/database.md deleted file mode 100644 index 0f579cc1a2..0000000000 --- a/.claude/rules/database.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -paths: - - "packages/core/src/db/**/*.ts" - - "migrations/**/*.sql" ---- - -# Database Conventions - -## 7 Tables (all prefixed `remote_agent_`) - -| Table | Purpose | -|-------|---------| -| `remote_agent_conversations` | Platform conversations, soft-delete (`deleted_at`), title, `hidden` flag | -| `remote_agent_sessions` | AI SDK sessions with `parent_session_id` audit chain, `transition_reason` | -| `remote_agent_codebases` | Repository metadata, `commands` JSONB | -| `remote_agent_isolation_environments` | Git worktree tracking, `workflow_type`, `workflow_id` | -| `remote_agent_workflow_runs` | Execution state, `working_path`, `last_activity_at` | -| `remote_agent_workflow_events` | Step-level event log per run | -| `remote_agent_messages` | Conversation history, tool call metadata as JSONB | - -## IDatabase Interface - -Auto-detects at startup: PostgreSQL if `DATABASE_URL` set, SQLite (`~/.archon/archon.db`) otherwise. - -```typescript -import { pool, getDialect } from './connection'; // pool = IDatabase instance - -// $1, $2 placeholders work for both PostgreSQL and SQLite -const result = await pool.query( - 'SELECT * FROM remote_agent_conversations WHERE id = $1', - [id] -); -const row = result.rows[0]; // rows is readonly T[] -``` - -Use `getDialect()` for dialect-specific expressions: `dialect.generateUuid()`, `dialect.now()`, `dialect.jsonMerge(col, paramIdx)`, `dialect.jsonArrayContains(col, path, paramIdx)`, `dialect.nowMinusDays(paramIdx)`. - -## Import Pattern — Namespaced Exports - -```typescript -// Use namespace imports for DB modules (consistent project-wide pattern) -import * as conversationDb from '@archon/core/db/conversations'; -import * as sessionDb from '@archon/core/db/sessions'; -import * as codebaseDb from '@archon/core/db/codebases'; -import * as workflowDb from '@archon/core/db/workflows'; -import * as messageDb from '@archon/core/db/messages'; -``` - -## INSERT Error Handling - -```typescript -try { - const result = await pool.query('INSERT INTO remote_agent_conversations ...', params); - return result.rows[0]; -} catch (error) { - log.error({ err: error, params }, 'db_insert_failed'); - throw new Error('Failed to create conversation'); -} -``` - -## UPDATE with rowCount Verification - -`updateConversation()` and similar throw `ConversationNotFoundError` / `SessionNotFoundError` when `rowCount === 0`. Callers must handle: - -```typescript -try { - await db.updateConversation(conversationId, { codebase_id: codebaseId }); -} catch (error) { - if (error instanceof ConversationNotFoundError) { - // Handle missing conversation specifically - } - throw error; // Re-throw unexpected errors -} -``` - -## Session Audit Trail - -Sessions are immutable. Every new session links back: `parent_session_id` → previous session, `transition_reason: TransitionTrigger`. Query the chain to understand history. `active = true` means the current session. - -## Soft Delete - -Conversations use soft-delete: `deleted_at IS NULL` filter should be included in all user-facing queries. `hidden = true` conversations are worker conversations (background workflows) — excluded from UI listings. - -## Anti-patterns - -- Never `SELECT *` in production queries on large tables — select specific columns -- Never write raw SQL strings in application code outside `packages/core/src/db/` modules -- Never bypass the `IDatabase` interface to call database drivers directly from other packages -- Never assume `rows[0]` exists without null-checking — queries can return empty arrays -- Never use `RETURNING *` in UPDATE when only checking success — check `rowCount` instead diff --git a/.claude/rules/dx-quirks.md b/.claude/rules/dx-quirks.md deleted file mode 100644 index 3d05e1f843..0000000000 --- a/.claude/rules/dx-quirks.md +++ /dev/null @@ -1,22 +0,0 @@ -# DX Quirks - -## Bun Log Elision - -When running `bun dev` from repo root, `--filter` truncates logs to `[N lines elided]`. -To see full logs: `cd packages/server && bun --watch src/index.ts` or `bun --cwd packages/server run dev`. - -## mock.module() Pollution - -`mock.module()` is process-global and irreversible — `mock.restore()` does NOT undo it. -Never add `afterAll(() => mock.restore())` for `mock.module()` cleanup. -Use `spyOn()` for internal modules (spy.mockRestore() DOES work). -When adding tests with `mock.module()`, ensure package.json runs it in a separate `bun test` invocation. - -## Worktree Port Allocation - -Worktrees auto-allocate ports (3190-4089 range, hash-based on path). Same worktree always gets same port. -Main repo defaults to 3090. Override: `PORT=4000 bun dev`. - -## bun run test vs bun test - -NEVER run `bun test` from repo root — it discovers all test files across packages in one process, causing ~135 mock pollution failures. Always use `bun run test` (which uses `bun --filter '*' test` for per-package isolation). diff --git a/.claude/rules/isolation-patterns.md b/.claude/rules/isolation-patterns.md deleted file mode 100644 index 0e763e03a2..0000000000 --- a/.claude/rules/isolation-patterns.md +++ /dev/null @@ -1,40 +0,0 @@ -# Isolation Architecture Patterns - -## Core Design - -- ALL isolation logic is centralized in the orchestrator — adapters are thin -- Every @mention auto-creates a worktree (simplicity > efficiency; worktrees are cheap) -- Data model is work-centric (`isolation_environments` table), enabling cross-platform sharing -- Cleanup is a separate service using git-first checks - -## Directory Structure - -``` -~/.archon/workspaces/owner/repo/ -├── source/ # Clone or symlink to local path -├── worktrees/ # Git worktrees for this project -├── artifacts/ # Workflow artifacts (NEVER in git) -│ ├── runs/{id}/ # Per-run artifacts ($ARTIFACTS_DIR) -│ └── uploads/{convId}/ # Web UI file uploads (ephemeral) -└── logs/ # Workflow execution logs -``` - -## Resolution Flow - -1. Adapter provides `IsolationHints` (conversationId, workflowId, branch preference) -2. Orchestrator's `validateAndResolveIsolation()` resolves hints → environment -3. WorktreeProvider creates worktree if needed, syncs with origin first -4. Environment tracked in `isolation_environments` table - -## Key Packages - -- `@archon/isolation` (`packages/isolation/src/`) — types, providers, resolver, error classifiers -- `@archon/git` (`packages/git/src/`) — branch, worktree, repo operations -- `@archon/paths` (`packages/paths/src/`) — path resolution utilities - -## Safety Rules - -- NEVER run `git clean -fd` — permanently deletes untracked files -- Use `classifyIsolationError()` to map git errors to user-friendly messages -- Trust git's natural guardrails (refuse to remove worktree with uncommitted changes) -- Use `execFileAsync` (not `exec`) when calling git directly diff --git a/.claude/rules/isolation.md b/.claude/rules/isolation.md deleted file mode 100644 index 1b849e7eca..0000000000 --- a/.claude/rules/isolation.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -paths: - - "packages/isolation/**/*.ts" - - "packages/git/**/*.ts" ---- - -# Isolation & Git Conventions - -## Branded Types (packages/git/src/types.ts) - -Always use the branded constructors — they reject empty strings at runtime and prevent passing the wrong string type: - -```typescript -import { toRepoPath, toBranchName, toWorktreePath } from '@archon/git'; -import type { RepoPath, BranchName, WorktreePath } from '@archon/git'; - -const repo = toRepoPath('/home/user/owner/repo'); // RepoPath -const branch = toBranchName('feature-auth'); // BranchName -const wt = toWorktreePath('/home/.archon/worktrees/x'); // WorktreePath -``` - -Git operations return `GitResult` discriminated union: `{ ok: true; value: T }` or `{ ok: false; error: GitError }`. Always check `.ok` before accessing `.value`. - -## IsolationResolver — 7-Step Resolution Order - -1. **Existing env** — use `existingEnvId` if worktree still exists on disk -2. **No codebase** — skip isolation entirely, return `status: 'none'` -3. **Workflow reuse** — find active env with same `(codebaseId, workflowType, workflowId)` -4. **Linked issue sharing** — PR can reuse the worktree from a linked issue -5. **PR branch adoption** — find existing worktree by branch name (`findWorktreeByBranch`) -6. **Limit check + auto-cleanup** — if at `maxWorktrees` (default 25), try `makeRoom()` first -7. **Create new** — call `provider.create(isolationRequest)` then `store.create()` - -If `store.create()` fails after `provider.create()` succeeds, the orphaned worktree is cleaned up best-effort before re-throwing. - -## Error Handling Pattern - -```typescript -import { classifyIsolationError, isKnownIsolationError } from '@archon/isolation'; - -try { - await provider.create(request); -} catch (error) { - const err = error instanceof Error ? error : new Error(String(error)); - if (!isKnownIsolationError(err)) { - throw err; // Unknown = programming bug, propagate as crash - } - const userMessage = classifyIsolationError(err); // Maps to friendly message - // ...send userMessage to platform, return blocked resolution -} -``` - -Known error patterns: `permission denied`, `eacces`, `timeout`, `no space left`, `enospc`, `not a git repository`, `branch not found`. - -`IsolationBlockedError` signals ALL message handling should stop — the user has already been notified. - -## Git Safety Rules - -- **NEVER run `git clean -fd`** — permanently deletes untracked files. Use `git checkout .` instead. -- **Always use `execFileAsync`** (from `@archon/git/exec`), never `exec` or `execSync` -- `hasUncommittedChanges()` returns `true` on unexpected errors (conservative — prevents data loss) -- Worktree paths follow project-scoped layout: `~/.archon/workspaces/{owner}/{repo}/worktrees/{branch}` - -## Architecture - -- `@archon/git` — zero `@archon/*` dependencies; only branded types and `execFileAsync` wrapper -- `@archon/isolation` — depends only on `@archon/git` + `@archon/paths` -- `IIsolationStore` interface injected into `IsolationResolver` — never call DB directly from git package -- `IIsolationProvider` interface — `WorktreeProvider` is the only implementation -- Stale env cleanup is best-effort: `markDestroyedBestEffort()` logs errors but never throws - -## Anti-patterns - -- Never call `git` via `exec()` or shell string — always `execFileAsync('git', [...args])` -- Never treat `IsolationBlockedError` as recoverable — it means user was notified, stop processing -- Never use a plain `string` where `RepoPath` / `BranchName` / `WorktreePath` is expected -- Never skip the `isKnownIsolationError()` check — unknown errors must propagate as crashes diff --git a/.claude/rules/orchestrator.md b/.claude/rules/orchestrator.md deleted file mode 100644 index acc3d64fa0..0000000000 --- a/.claude/rules/orchestrator.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -paths: - - "packages/core/src/orchestrator/**/*.ts" - - "packages/core/src/handlers/**/*.ts" - - "packages/core/src/state/**/*.ts" ---- - -# Orchestrator Conventions - -## Message Flow — Routing Agent Architecture - -``` -Platform message - → ConversationLockManager.acquireLock() - → handleMessage() (orchestrator-agent.ts:383) - → inheritThreadContext() — copy parent's codebase/cwd if child thread - → Deterministic gate: 10 commands (help, status, reset, workflow, register-project, update-project, remove-project, commands, init, worktree) - → Everything else → AI routing call: - → listCodebases() + discoverAllWorkflows() - → buildFullPrompt() → buildOrchestratorPrompt() or buildProjectScopedPrompt() - → AI responds with natural language ± /invoke-workflow or /register-project - → parseOrchestratorCommands() extracts structured commands from AI response - → If /invoke-workflow found → dispatchOrchestratorWorkflow() - → If /register-project found → handleRegisterProject() - → Otherwise → send AI text to user -``` - -Lock manager returns `{ status: 'started' | 'queued-conversation' | 'queued-capacity' }`. Always use the return value to decide whether to emit a "queued" notice — never call `isActive()` separately (TOCTOU race). - -## Deterministic Commands (command-handler.ts) - -Only **10 commands** are handled deterministically: - -| Command | Behavior | -|---------|----------| -| `/help` | Show available commands | -| `/status` | Show conversation/session state | -| `/reset` | Deactivate current session | -| `/workflow` | Subcommands: `list`, `run`, `status`, `cancel`, `reload` | -| `/register-project` | Handled inline — creates codebase DB record | -| `/update-project` | Handled inline — updates codebase path | -| `/remove-project` | Handled inline — deletes codebase DB record | -| `/commands` | List registered codebase commands | -| `/init` | Scaffold `.archon/` in current repo | -| `/worktree` | Worktree subcommands | - -**All other slash commands fall through to the AI router.** Unrecognized commands return an "Unknown command" error. - -## Routing AI — Prompt Building (prompt-builder.ts) - -The choice between prompts depends on whether the conversation has an attached project: - -- **No project** → `buildOrchestratorPrompt()` (prompt-builder.ts:116) — lists all projects equally, asks user to clarify if ambiguous -- **Has project** → `buildProjectScopedPrompt()` (prompt-builder.ts:153) — active project shown first, ambiguous requests default to it - -Both prompts include: registered projects, discovered workflows, and the `/invoke-workflow` + `/register-project` format specification. - -### `/invoke-workflow` Protocol - -The AI emits: `/invoke-workflow --project --prompt "user's intent"` - -`parseOrchestratorCommands()` (orchestrator-agent.ts:90) parses this with: -- Workflow name validated against discovered workflows via `findWorkflow()` -- Project name validated via `findCodebaseByName()` — case-insensitive, supports partial path segment match (e.g., `"repo"` matches `"owner/repo"`) -- `--project` must appear before `--prompt` - -### `filterToolIndicators()` (orchestrator-agent.ts:163) - -Batch mode only. Strips paragraphs starting with emoji tool indicators (🔧💭📝✏️🗑️📂🔍) from accumulated AI response before sending to user. - -## Session Transitions - -Sessions are **immutable** — never mutated, only deactivated and replaced. The audit trail is via `parent_session_id` + `transition_reason`. - -**Only `plan-to-execute` immediately creates a new session.** All other triggers only deactivate; the new session is created on the next AI message. - -```typescript -import { getTriggerForCommand, shouldCreateNewSession } from '../state/session-transitions'; - -const trigger = getTriggerForCommand('reset'); // 'reset-requested' -if (shouldCreateNewSession(trigger)) { - // plan-to-execute only -} -``` - -`TransitionTrigger` values: `'first-message'`, `'plan-to-execute'`, `'isolation-changed'`, `'reset-requested'`, `'worktree-removed'`, `'conversation-closed'`. - -## Isolation Resolution - -`validateAndResolveIsolation()` (orchestrator.ts:108) delegates to `IsolationResolver` and handles: -- Sending contextual messages to the platform (e.g., "Reusing worktree from issue #42") -- Updating the DB (`conversation.isolation_env_id`, `conversation.cwd`) -- Retrying once when a stale reference is found (`stale_cleaned`) -- Throwing `IsolationBlockedError` after platform notification when blocked - -When isolation is blocked, **stop all further processing** — `IsolationBlockedError` means the user was already notified. - -## Background Workflow Dispatch (Web only) - -`dispatchBackgroundWorkflow()` (orchestrator.ts:256) creates a hidden worker conversation (`web-worker-{timestamp}-{random}`), sets up event bridging from worker SSE → parent SSE, pre-creates the workflow run row (prevents 404 on immediate UI navigation), and fires-and-forgets `executeWorkflow()`. On completion, surfaces `result.summary` to the parent conversation. - -## Lazy Logger Pattern - -All files in this area use the deferred logger pattern — NEVER initialize at module scope: - -```typescript -let cachedLog: ReturnType | undefined; -function getLog(): ReturnType { - if (!cachedLog) cachedLog = createLogger('orchestrator'); - return cachedLog; -} -``` - -## Anti-patterns - -- Never call `isActive()` and then `acquireLock()` — race condition, use the lock return value -- Never access `conversation.isolation_env_id` directly without going through the resolver -- Never skip `IsolationBlockedError` — it must propagate to stop all further message handling -- Never add platform-specific logic to the orchestrator; it uses `IPlatformAdapter` interface only -- Never transition sessions by mutating them; always deactivate and create a new linked session -- Never assume a slash command is deterministic — only the 10 listed above bypass the AI router diff --git a/.claude/rules/server-api.md b/.claude/rules/server-api.md deleted file mode 100644 index 912e7db877..0000000000 --- a/.claude/rules/server-api.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -paths: - - "packages/server/**/*.ts" ---- - -# Server API Conventions - -## Hono Framework - -```typescript -import { Hono } from 'hono'; -import { streamSSE } from 'hono/streaming'; -import { cors } from 'hono/cors'; - -// CORS: allow-all for single-developer tool (override with WEB_UI_ORIGIN) -app.use('/api/*', cors({ origin: process.env.WEB_UI_ORIGIN || '*' })); - -// Error response helper pattern -function apiError(c: Context, status: 400 | 404 | 500, message: string): Response { - return c.json({ error: message }, status); -} -``` - -## SSE Streaming - -Always check `stream.closed` before writing. Use `stream.onAbort()` for cleanup. Hono's `streamSSE` callback receives an SSE writer: - -```typescript -app.get('/api/stream/:id', (c) => { - return streamSSE(c, async (stream) => { - stream.onAbort(() => { - transport.removeStream(conversationId, writer); - }); - // Write events: - if (!stream.closed) { - await stream.writeSSE({ data: JSON.stringify(event) }); - } - }); -}); -``` - -`SSETransport` in `src/adapters/web/transport.ts` manages the stream registry. `removeStream()` accepts an `expectedStream` reference to prevent race conditions (StrictMode double-mount). - -## Webhook Signature Verification - -```typescript -// ALWAYS use c.req.text() for raw webhook body — JSON.parse separately -const payload = await c.req.text(); -const signature = c.req.header('X-Hub-Signature-256') ?? ''; - -// timingSafeEqual prevents timing attacks -const hmac = createHmac('sha256', webhookSecret); -const digest = 'sha256=' + hmac.update(payload).digest('hex'); -const isValid = timingSafeEqual(Buffer.from(digest), Buffer.from(signature)); -``` - -Return 200 immediately for webhook events; process async. Never log the full signature. - -## Auto Port Allocation (Worktrees) - -`getPort()` from `@archon/core` returns: -- Main repo: `PORT` env var or `3090` -- Worktrees: hash-based port in range 3190–4089 (deterministic per worktree path) - -Same worktree always gets same port. Override with `PORT=4000` env var. - -## Static SPA Fallback - -```typescript -// Serve web dist; fall back to index.html for client-side routing -app.use('/*', serveStatic({ root: path.join(import.meta.dir, '../../web/dist') })); -app.get('*', (c) => c.html(/* index.html */)); -``` - -Use `import.meta.dir` (absolute) NOT relative paths — `bun --filter @archon/server start` changes CWD to `packages/server/`. - -## Graceful Shutdown - -```typescript -process.on('SIGTERM', () => { - stopCleanupScheduler(); - void pool.close(); - process.exit(0); -}); -``` - -## Key API Routes - -| Method | Path | Purpose | -|--------|------|---------| -| GET | `/api/conversations` | List conversations | -| POST | `/api/conversations` | Create conversation | -| POST | `/api/conversations/:id/message` | Send message | -| GET | `/api/stream/:id` | SSE stream | -| GET | `/api/workflows` | List workflows | -| POST | `/api/workflows/validate` | Validate YAML (in-memory) | -| GET | `/api/workflows/:name` | Get single workflow | -| PUT | `/api/workflows/:name` | Save workflow | -| DELETE | `/api/workflows/:name` | Delete workflow | -| GET | `/api/commands` | List commands | -| POST | `/webhooks/github` | GitHub webhook | - -## Anti-patterns - -- Never use `c.req.json()` for webhooks — signature must be verified against raw body -- Never expose API keys in JSON error responses -- Never serve static files with relative paths (use `import.meta.dir`) -- Never skip the `stream.closed` check before writing SSE -- Never call platform adapters directly from route handlers — use `handleMessage()` + lock manager diff --git a/.claude/rules/testing.md b/.claude/rules/testing.md deleted file mode 100644 index 030f697539..0000000000 --- a/.claude/rules/testing.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -paths: - - "**/*.test.ts" - - "**/*.spec.ts" ---- - -# Testing Conventions - -## CRITICAL: mock.module() Pollution Rules - -`mock.module()` permanently replaces modules in the **process-wide module cache**. `mock.restore()` does NOT undo it ([oven-sh/bun#7823](https://github.com/oven-sh/bun/issues/7823)). - -**Rules:** -1. **Never add `afterAll(() => mock.restore())` for `mock.module()` calls** — it has no effect -2. **Never have two test files `mock.module()` the same path with different implementations in the same `bun test` invocation** -3. **Use `spyOn()` for internal modules** — `spy.mockRestore()` DOES work for spies - -```typescript -// CORRECT: spy (restorable) -import * as git from '@archon/git'; -const spy = spyOn(git, 'checkout'); -spy.mockImplementation(async () => ({ ok: true, value: undefined })); -// afterEach: -spy.mockRestore(); - -// CORRECT: mock.module() for external deps (not restorable — isolate in separate test file) -mock.module('@slack/bolt', () => ({ App: mock(() => mockApp), LogLevel: { INFO: 'info' } })); -``` - -## Test Batching Per Package - -Each package splits tests into separate `bun test` invocations to prevent pollution: - -| Package | Batches | -|---------|---------| -| `@archon/core` | 7 batches (clients, handlers, db+utils, path-validation, cleanup-service, title-generator, workflows, orchestrator) | -| `@archon/workflows` | 5 batches | -| `@archon/adapters` | 3 batches (chat+community+forge-auth, github-adapter, github-context) | -| `@archon/isolation` | 3 batches | - -**Never run `bun test` from the repo root** — causes ~135 mock pollution failures. Always use: - -```bash -bun run test # Correct: per-package isolation via bun --filter '*' test -bun run test --watch # Watch mode (single package) -``` - -## Mock Pattern for Lazy Loggers - -All adapter/db/orchestrator files use lazy logger pattern. Mock before import: - -```typescript -// MUST come before import of the module under test -const mockLogger = { - fatal: mock(() => undefined), error: mock(() => undefined), - warn: mock(() => undefined), info: mock(() => undefined), - debug: mock(() => undefined), trace: mock(() => undefined), -}; -mock.module('@archon/paths', () => ({ createLogger: mock(() => mockLogger) })); - -import { SlackAdapter } from './adapter'; // Import AFTER mock -``` - -## Database Test Mocking - -```typescript -import { createQueryResult, mockPostgresDialect } from '../test/mocks/database'; - -const mockQuery = mock(() => Promise.resolve(createQueryResult([]))); -mock.module('./connection', () => ({ - pool: { query: mockQuery }, - getDialect: () => mockPostgresDialect, -})); - -// In tests: -mockQuery.mockResolvedValueOnce(createQueryResult([existingRow])); -mockQuery.mockClear(); // in beforeEach -``` - -## Test Structure - -```typescript -import { describe, test, expect, mock, beforeEach, afterEach } from 'bun:test'; - -describe('ComponentName', () => { - beforeEach(() => { - mockFn.mockClear(); // Reset call counts - }); - - test('does thing when condition', async () => { - mockQuery.mockResolvedValueOnce(createQueryResult([fixture])); - const result = await functionUnderTest(input); - expect(result).toEqual(expected); - expect(mockQuery).toHaveBeenCalledTimes(1); - }); -}); -``` - -## Anti-patterns - -- Never `import` a module before all `mock.module()` calls for its dependencies -- Never use `afterAll(() => mock.restore())` for `mock.module()` — it silently does nothing -- Never test with real database or filesystem in unit tests — always mock -- Never run `bun test` from the repo root -- Never add a new test file with conflicting `mock.module()` to an existing batch — create a new batch in the package's `package.json` test script diff --git a/.claude/rules/web-frontend.md b/.claude/rules/web-frontend.md deleted file mode 100644 index 7811997fde..0000000000 --- a/.claude/rules/web-frontend.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -paths: - - "packages/web/**/*.tsx" - - "packages/web/**/*.ts" - - "packages/web/**/*.css" ---- - -# Web Frontend Conventions - -## Tech Stack - -- React 19 + Vite 6 + TypeScript -- Tailwind CSS v4 (CSS-first config) -- shadcn/ui components -- TanStack Query v5 for REST data -- React Router v7 (`react-router`, NOT `react-router-dom`) -- Manual `EventSource` for SSE streaming (no library) -- **Dark theme only** — no light mode toggle - -## Tailwind v4 Critical Differences - -```css -/* CORRECT: CSS-first import */ -@import 'tailwindcss'; -@import 'tw-animate-css'; /* NOT tailwindcss-animate */ - -/* CORRECT: theme variables in @theme inline block */ -@theme inline { - --color-surface: var(--surface); - --color-accent-bright: var(--accent-bright); -} - -/* WRONG: never use @tailwind base/components/utilities */ -``` - -Plugin in `vite.config.ts`: `import tailwindcss from '@tailwindcss/vite'` — uses Vite plugin, **not PostCSS**. `components.json` has blank `tailwind.config` for v4. - -## Color Palette (oklch) - -All custom colors are OKLCH. Key tokens (defined in `:root` in `index.css`): -- `--surface` (0.18): main surface -- `--surface-elevated` (0.22): cards, popovers -- `--background` (0.14): page background -- `--primary` / `--ring`: blue accent at oklch(0.65 0.18 250) -- `--text-primary` (0.93), `--text-secondary` (0.65), `--text-tertiary` (0.45) -- `--success` (green 155), `--warning` (yellow 75), `--error` (red 25) - -Use CSS variables via Tailwind utilities: `bg-surface`, `text-text-primary`, `border-border`, `text-accent-bright`, etc. - -## SSE Streaming Pattern - -`useSSE()` in `src/hooks/useSSE.ts` is the single SSE consumer. It: -- Opens `EventSource` to `/api/stream/{conversationId}` -- Batches text events (50ms flush timer) to reduce re-renders -- Flushes immediately before `tool_call`, `tool_result`, `workflow_dispatch` events -- Marks disconnected only on `CLOSED` state (not `CONNECTING` — avoids flicker) -- `handlersRef` pattern ensures stable EventSource with fresh handlers - -Event types: `text`, `tool_call`, `tool_result`, `error`, `conversation_lock`, `session_info`, `workflow_step`, `workflow_status`, `parallel_agent`, `workflow_artifact`, `dag_node`, `workflow_dispatch`, `workflow_output_preview`, `warning`, `retract`, `heartbeat`. - -## Routing - -```tsx -// CORRECT -import { BrowserRouter, Routes, Route } from 'react-router'; -// WRONG -import { BrowserRouter } from 'react-router-dom'; -``` - -Routes: `/` (Dashboard), `/chat`, `/chat/*`, `/workflows`, `/workflows/builder`, `/workflows/runs/:runId`, `/settings`. - -## API Client Pattern - -```typescript -// src/lib/api.ts exports SSE_BASE_URL and REST functions -import { SSE_BASE_URL } from '@/lib/api'; -// In dev: Vite proxies /api/* to localhost:{VITE_API_PORT} -// API port injected at build time: import.meta.env.VITE_API_PORT -``` - -TanStack Query `staleTime: 10_000`, `refetchOnWindowFocus: true`. - -## Anti-patterns - -- Never add a light mode — dark-only is intentional -- Never use `react-router-dom` — use `react-router` (v7) -- Never configure Tailwind in `tailwind.config.js/ts` — v4 is CSS-first -- Never use `tailwindcss-animate` — use `tw-animate-css` -- Never open a second `EventSource` per conversation — `useSSE()` handles it -- Never pass inline style objects for theme colors — use Tailwind classes with CSS variables diff --git a/.claude/rules/workflows.md b/.claude/rules/workflows.md deleted file mode 100644 index 99cf6f8913..0000000000 --- a/.claude/rules/workflows.md +++ /dev/null @@ -1,101 +0,0 @@ ---- -paths: - - "packages/workflows/**/*.ts" - - ".archon/workflows/**/*.yaml" - - ".archon/commands/**/*.md" ---- - -# Workflows Conventions - -## DAG Workflow Format - -All workflows use the DAG (Directed Acyclic Graph) format with `nodes:`. Loop nodes are supported as a node type within DAGs. - -```yaml -nodes: - - id: classify - prompt: "Is this a bug or feature? Answer JSON: {type: 'BUG'|'FEATURE'}" - output_format: {type: object, properties: {type: {type: string}}} - - id: implement - command: execute - depends_on: [classify] - when: "$classify.output.type == 'FEATURE'" - - id: run_lint - bash: "bun run lint" - depends_on: [implement] - - id: iterate - loop: - until: "COMPLETE" - max_iterations: 10 - prompt: "Iterate until the tests pass. Signal COMPLETE when done." - depends_on: [run_lint] -``` - -## Variable Substitution - -| Variable | Resolved to | -|----------|-------------| -| `$1`, `$2`, `$3` | Positional arguments from user message | -| `$ARGUMENTS` | All user arguments as single string | -| `$ARTIFACTS_DIR` | Pre-created external artifacts directory | -| `$WORKFLOW_ID` | Current workflow run ID | -| `$BASE_BRANCH` | Base branch from config or auto-detected | -| `$DOCS_DIR` | Documentation directory path (default: `docs/`) | -| `$nodeId.output` | Captured stdout/AI output from completed DAG node | - -## WorkflowDeps — Dependency Injection - -`@archon/workflows` has ZERO `@archon/core` dependency. Everything is injected: - -```typescript -interface WorkflowDeps { - store: IWorkflowStore; // DB abstraction - getAssistantClient: AssistantClientFactory; // Returns claude or codex client - loadConfig: (cwd: string) => Promise; -} - -// Core creates the adapter: -import { createWorkflowDeps } from '@archon/core/workflows/store-adapter'; -const deps = createWorkflowDeps(); -await executeWorkflow(deps, platform, conversationId, cwd, workflow, ...); -``` - -## DAG Node Types - -- `command:` — named file from `.archon/commands/`, AI-executed -- `prompt:` — inline prompt string, AI-executed -- `bash:` — shell script, no AI; stdout captured as `$nodeId.output`; default timeout 120000ms -- `script:` — inline code or named file from `.archon/scripts/`, runs via `runtime: bun` (`.ts`/`.js`) or `runtime: uv` (`.py`), no AI; stdout captured as `$nodeId.output`; supports `deps:` for dependency installation and `timeout:` (ms); runtime availability checked at load time with a warning if binary is missing - -DAG node options: `depends_on`, `when` (condition expression), `trigger_rule` (`all_success` | `one_success` | `none_failed_min_one_success` | `all_done`), `output_format` (JSON Schema, Claude only), `allowed_tools` / `denied_tools` (Claude only), `idle_timeout` (ms), `context: 'fresh'`, per-node `provider` and `model`, `deps` (script nodes only — dependency list), `runtime` (script nodes only — `'bun'` or `'uv'`). - -## Event Emitter for Observability - -```typescript -import { getWorkflowEventEmitter } from '@archon/workflows'; - -const emitter = getWorkflowEventEmitter(); -emitter.registerRun(runId, conversationId); - -// Subscribe (returns unsubscribe fn) -const unsubscribe = emitter.subscribeForConversation(conversationId, (event) => { - // event.type: 'step_started' | 'step_completed' | 'node_started' | ... -}); -``` - -Listener errors never propagate to the executor — fire-and-forget with internal catch. - -## Architecture - -- Model validation at load time — invalid provider/model combinations fail `parseWorkflow()` with clear error -- Resilient discovery — one broken YAML doesn't abort `discoverWorkflows()`; errors returned in `WorkflowLoadResult.errors` -- Bundled defaults embedded in binary builds; loaded from filesystem in source builds -- Repo workflows override bundled defaults by name -- Router fallback: if no `/invoke-workflow` produced → falls back to `archon-assist`; raw AI response only when `archon-assist` unavailable - -## Anti-patterns - -- Never import `@archon/core` from `@archon/workflows` (circular dependency) -- Never add `clearContext: true` to every step — context continuity is valuable; use sparingly -- Never put `output_format` on Codex nodes — it logs a warning and is ignored -- Never set `allowed_tools: undefined` expecting "no tools" — use `allowed_tools: []` for that diff --git a/.gitignore b/.gitignore index a2f33c5d5c..1ab645070c 100644 --- a/.gitignore +++ b/.gitignore @@ -105,3 +105,6 @@ packages/server/.env skills-lock.json test-results/ .archon/ralph/ + +# Keystores — must never be committed +*.jks diff --git a/CHANGELOG.md b/CHANGELOG.md index 29fb4e1166..a2201632b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -179,7 +179,7 @@ Chat-first navigation redesign, DAG graph viewer, per-node MCP and skills, and e - Idle timeout not detecting stuck tool calls during execution (#649) - `commitAllChanges` failing on empty commits (#745) - Explicit base branch config now required for worktree creation (#686) -- Subprocess-level retry added to CodexClient (#641) +- Subprocess-level retry added to CodexProvider (#641) - Validate `cwd` query param against registered codebases (#630) - Server-internal paths redacted from `/api/config` response (#632) - SQLite conversations index missing `WHERE deleted_at IS NULL` (#629) @@ -231,7 +231,7 @@ DAG hardening, security fixes, validate-pr workflow, and worktree lifecycle mana - **`--json` flag for `workflow list`** — machine-readable workflow output (#594) - **`archon-validate-pr` workflow** with per-node idle timeout support (#635) - **Typed SessionMetadata** with Zod validation for safer metadata handling (#600) -- **`persistSession: false`** in ClaudeClient to avoid disk pollution from session transcripts (#626) +- **`persistSession: false`** in ClaudeProvider to avoid disk pollution from session transcripts (#626) - **DAG workflow for GitHub issue resolution** with structured node pipeline ### Changed diff --git a/CLAUDE.md b/CLAUDE.md index 0e902537dd..a2b9d8d973 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -68,7 +68,7 @@ These are implementation constraints, not slogans. Apply them by default. **SRP + ISP — Single Responsibility + Interface Segregation** - Keep each module and package focused on one concern -- Extend behavior by implementing existing narrow interfaces (`IPlatformAdapter`, `IAssistantClient`, `IDatabase`, `IWorkflowStore`) whenever possible +- Extend behavior by implementing existing narrow interfaces (`IPlatformAdapter`, `IAgentProvider`, `IDatabase`, `IWorkflowStore`) whenever possible - Avoid fat interfaces and "god modules" that mix policy, transport, and storage - Do not add unrelated methods to an existing interface — define a new one @@ -122,7 +122,7 @@ bun test --watch # Watch mode (single package) bun test packages/core/src/handlers/command-handler.test.ts # Single file ``` -**Test isolation (mock.module pollution):** Bun's `mock.module()` permanently replaces modules in the process-wide cache — `mock.restore()` does NOT undo it ([oven-sh/bun#7823](https://github.com/oven-sh/bun/issues/7823)). To prevent cross-file pollution, packages that have conflicting `mock.module()` calls split their tests into separate `bun test` invocations: `@archon/core` (7 batches), `@archon/workflows` (5), `@archon/adapters` (4), `@archon/isolation` (3). See each package's `package.json` for the exact splits. +**Test isolation (mock.module pollution):** Bun's `mock.module()` permanently replaces modules in the process-wide cache — `mock.restore()` does NOT undo it ([oven-sh/bun#7823](https://github.com/oven-sh/bun/issues/7823)). To prevent cross-file pollution, packages that have conflicting `mock.module()` calls split their tests into separate `bun test` invocations: `@archon/core` (7 batches), `@archon/workflows` (5), `@archon/adapters` (3), `@archon/isolation` (3). See each package's `package.json` for the exact splits. **Do NOT run `bun test` from the repo root** — it discovers all test files across all packages and runs them in one process, causing ~135 mock pollution failures. Always use `bun run test` (which uses `bun --filter '*' test` for per-package isolation). @@ -198,10 +198,6 @@ bun run cli workflow run implement --branch feature-auth "Add auth" # Opt out of isolation (run in live checkout) bun run cli workflow run quick-fix --no-worktree "Fix typo" -# Grant env-leak-gate consent during auto-registration (for repos whose .env -# contains sensitive keys). Audit-logged with actor: 'user-cli'. -bun run cli workflow run plan --cwd /path/to/leaky/repo --allow-env-keys "..." - # Show running workflows bun run cli workflow status @@ -266,9 +262,16 @@ packages/ │ ├── adapters/ # CLI adapter (stdout output) │ ├── commands/ # CLI command implementations │ └── cli.ts # CLI entry point +├── providers/ # @archon/providers - AI agent providers (SDK deps live here) +│ └── src/ +│ ├── types.ts # Contract layer (IAgentProvider, SendQueryOptions, MessageChunk — ZERO SDK deps) +│ ├── factory.ts # getAgentProvider() switch (built-in: claude, codex) +│ ├── errors.ts # UnknownProviderError +│ ├── claude/ # ClaudeProvider + parseClaudeConfig + MCP/hooks/skills translation +│ ├── codex/ # CodexProvider + parseCodexConfig + binary-resolver +│ └── index.ts # Package exports ├── core/ # @archon/core - Shared business logic │ └── src/ -│ ├── clients/ # AI SDK clients (Claude, Codex) │ ├── config/ # YAML config loading │ ├── db/ # Database connection, queries │ ├── handlers/ # Command handler (slash commands) @@ -289,7 +292,7 @@ packages/ │ ├── executor.ts # Workflow execution orchestrator (executeWorkflow) │ ├── dag-executor.ts # DAG-specific execution logic │ ├── store.ts # IWorkflowStore interface (database abstraction) -│ ├── deps.ts # WorkflowDeps injection types (IWorkflowPlatform, IWorkflowAssistantClient) +│ ├── deps.ts # WorkflowDeps injection types (IWorkflowPlatform, imports from @archon/providers/types) │ ├── event-emitter.ts # Workflow observability events │ ├── logger.ts # JSONL file logger │ ├── validator.ts # Resource validation (command files, MCP configs, skill dirs) @@ -401,10 +404,11 @@ import type { DagNode, WorkflowDefinition } from '@/lib/api'; **Package Split:** - **@archon/paths**: Path resolution utilities, Pino logger factory, web dist cache path (`getWebDistDir`), CWD env stripper (`stripCwdEnv`, `strip-cwd-env-boot`) (no @archon/* deps; `pino` and `dotenv` are allowed external deps) - **@archon/git**: Git operations - worktrees, branches, repos, exec wrappers (depends only on @archon/paths) +- **@archon/providers**: AI agent providers (Claude, Codex) — owns SDK deps, `IAgentProvider` interface, `sendQuery()` contract, and provider-specific option translation. `@archon/providers/types` is the contract subpath (zero SDK deps, zero runtime side effects) that `@archon/workflows` imports from. Providers receive raw `nodeConfig` + `assistantConfig` and translate to SDK-specific options internally. - **@archon/isolation**: Worktree isolation types, providers, resolver, error classifiers (depends only on @archon/git + @archon/paths) -- **@archon/workflows**: Workflow engine - loader, router, executor, DAG, logger, bundled defaults (depends only on @archon/git + @archon/paths + @hono/zod-openapi + zod; DB/AI/config injected via `WorkflowDeps`) +- **@archon/workflows**: Workflow engine - loader, router, executor, DAG, logger, bundled defaults (depends only on @archon/git + @archon/paths + @archon/providers/types + @hono/zod-openapi + zod; DB/AI/config injected via `WorkflowDeps`) - **@archon/cli**: Command-line interface for running workflows and starting the web UI server (depends on @archon/server + @archon/adapters for the serve command) -- **@archon/core**: Business logic, database, orchestration, AI clients (provides `createWorkflowStore()` adapter bridging core DB → `IWorkflowStore`) +- **@archon/core**: Business logic, database, orchestration (depends on @archon/providers for AI; provides `createWorkflowStore()` adapter bridging core DB → `IWorkflowStore`) - **@archon/adapters**: Platform adapters for Slack, Telegram, GitHub, Discord (depends on @archon/core) - **@archon/server**: OpenAPIHono HTTP server (Zod + OpenAPI spec generation via `@hono/zod-openapi`), Web adapter (SSE), API routes, Web UI static serving (depends on @archon/adapters) - **@archon/web**: React frontend (Vite + Tailwind v4 + shadcn/ui + Zustand), SSE streaming to server. `WorkflowRunStatus`, `WorkflowDefinition`, and `DagNode` are all derived from `src/lib/api.generated.d.ts` (generated from the OpenAPI spec via `bun generate:types`; never import from `@archon/workflows`) @@ -429,7 +433,8 @@ import type { DagNode, WorkflowDefinition } from '@/lib/api'; **2. Command Handler** (`packages/core/src/handlers/`) - Process slash commands (deterministic, no AI) -- Commands: `/command-set`, `/load-commands`, `/clone`, `/getcwd`, `/setcwd`, `/repos`, `/repo`, `/repo-remove`, `/worktree`, `/workflow`, `/status`, `/commands`, `/help`, `/reset`, `/reset-context`, `/init` +- The orchestrator treats only these top-level commands as deterministic: `/help`, `/status`, `/reset`, `/workflow`, `/register-project`, `/update-project`, `/remove-project`, `/commands`, `/init`, `/worktree` +- `/workflow` handles subcommands like `list`, `run`, `status`, `cancel`, `resume`, `abandon`, `approve`, `reject` - Update database, perform operations, return responses **3. Orchestrator** (`packages/core/src/orchestrator/`) @@ -439,10 +444,10 @@ import type { DagNode, WorkflowDefinition } from '@/lib/api'; - Session management: Create new or resume existing - Stream AI responses to platform -**4. AI Assistant Clients** (`packages/core/src/clients/`) -- Implement `IAssistantClient` interface -- **ClaudeClient**: `@anthropic-ai/claude-agent-sdk` -- **CodexClient**: `@openai/codex-sdk` +**4. AI Agent Providers** (`packages/providers/src/`) +- Implement `IAgentProvider` interface +- **ClaudeProvider**: `@anthropic-ai/claude-agent-sdk` +- **CodexProvider**: `@openai/codex-sdk` - Streaming: `for await (const event of events) { await platform.send(event) }` ### Configuration @@ -530,7 +535,7 @@ curl http://localhost:3637/api/conversations//messages ``` ~/.archon/ ├── workspaces/owner/repo/ # Project-centric layout -│ ├── source/ # Clone (from /clone) or symlink → local path +│ ├── source/ # Cloned repo or symlink → local path │ ├── worktrees/ # Git worktrees for this project │ ├── artifacts/ # Workflow artifacts (NEVER in git) │ │ ├── runs/{id}/ # Per-run artifacts ($ARTIFACTS_DIR) @@ -561,7 +566,7 @@ curl http://localhost:3637/api/conversations//messages **Quick reference:** - **Platform Adapters**: Implement `IPlatformAdapter`, handle auth, polling/webhooks -- **AI Clients**: Implement `IAssistantClient`, session management, streaming +- **AI Providers**: Implement `IAgentProvider`, session management, streaming - **Slash Commands**: Add to command-handler.ts, update database, no AI - **Database Operations**: Use `IDatabase` interface (supports PostgreSQL and SQLite via adapters) @@ -675,8 +680,8 @@ async function createSession(conversationId: string, codebaseId: string) { 1. **Codebase Commands** (per-repo): - Stored in `.archon/commands/` (plain text/markdown) - - Auto-detected via `/clone` or `/load-commands ` - - Loaded by `/clone` or `/load-commands`, invoked by AI via orchestrator routing + - Discovered from the repository `.archon/commands/` directory + - Surfaced via `GET /api/commands` for the workflow builder and invoked by workflow `command:` nodes 2. **Workflows** (YAML-based): - Stored in `.archon/workflows/` (searched recursively) @@ -759,9 +764,11 @@ Pattern: Use `classifyIsolationError()` (from `@archon/isolation`) to map git er **Codebases:** - `GET /api/codebases` / `GET /api/codebases/:id` - List / fetch codebases -- `POST /api/codebases` - Register a codebase (clone or local path); body accepts `allowEnvKeys` for the env-leak gate -- `PATCH /api/codebases/:id` - Flip the `allow_env_keys` consent bit; body: `{ allowEnvKeys: boolean }`. Audit-logged at `warn` level on every grant/revoke (`env_leak_consent_granted` / `env_leak_consent_revoked`) with `codebaseId`, `path`, `files`, `keys`, `scanStatus`, `actor` +- `POST /api/codebases` - Register a codebase (clone or local path) - `DELETE /api/codebases/:id` - Delete a codebase and clean up resources +- `GET /api/codebases/:id/env` - List env var keys for a codebase (never returns values) +- `PUT /api/codebases/:id/env` / `DELETE /api/codebases/:id/env/:key` - Upsert / delete a single codebase env var +- `GET /api/codebases/:id/environments` - List tracked isolation environments for a codebase **Artifact Files:** - `GET /api/artifacts/:runId/*` - Serve a workflow artifact file by run ID and relative path; returns `text/markdown` for `.md` files, `text/plain` otherwise; 400 on path traversal (`..`), 404 if run or file not found @@ -770,6 +777,7 @@ Pattern: Use `classifyIsolationError()` (from `@archon/isolation`) to map git er - `GET /api/commands` - List available command names (bundled + project-defined); optional `?cwd=`; returns `{ commands: [{ name, source: 'bundled' | 'project' }] }` **System:** +- `GET /api/health` - Health check with adapter/system status - `GET /api/update-check` - Check for available updates; returns `{ updateAvailable, currentVersion, latestVersion, releaseUrl }`; skips GitHub API call for non-binary builds **OpenAPI Spec:** diff --git a/Dockerfile b/Dockerfile index da4783e019..139b3efaf7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,6 +24,7 @@ COPY packages/docs-web/package.json ./packages/docs-web/ COPY packages/git/package.json ./packages/git/ COPY packages/isolation/package.json ./packages/isolation/ COPY packages/paths/package.json ./packages/paths/ +COPY packages/providers/package.json ./packages/providers/ COPY packages/server/package.json ./packages/server/ COPY packages/web/package.json ./packages/web/ COPY packages/workflows/package.json ./packages/workflows/ @@ -130,6 +131,7 @@ COPY packages/docs-web/package.json ./packages/docs-web/ COPY packages/git/package.json ./packages/git/ COPY packages/isolation/package.json ./packages/isolation/ COPY packages/paths/package.json ./packages/paths/ +COPY packages/providers/package.json ./packages/providers/ COPY packages/server/package.json ./packages/server/ COPY packages/web/package.json ./packages/web/ COPY packages/workflows/package.json ./packages/workflows/ @@ -144,6 +146,7 @@ COPY packages/core/ ./packages/core/ COPY packages/git/ ./packages/git/ COPY packages/isolation/ ./packages/isolation/ COPY packages/paths/ ./packages/paths/ +COPY packages/providers/ ./packages/providers/ COPY packages/server/ ./packages/server/ COPY packages/workflows/ ./packages/workflows/ diff --git a/bun.lock b/bun.lock index 04517f4fbf..3ade585b26 100644 --- a/bun.lock +++ b/bun.lock @@ -6,6 +6,7 @@ "name": "archon", "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.74", + "@openai/codex-sdk": "^0.121.0", }, "devDependencies": { "@eslint/js": "^9.39.1", @@ -23,7 +24,7 @@ }, "packages/adapters": { "name": "@archon/adapters", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@archon/core": "workspace:*", "@archon/git": "workspace:*", @@ -41,7 +42,7 @@ }, "packages/cli": { "name": "@archon/cli", - "version": "0.3.5", + "version": "0.3.6", "bin": { "archon": "./src/cli.ts", }, @@ -51,6 +52,7 @@ "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/server": "workspace:*", "@archon/workflows": "workspace:*", "@clack/prompts": "^1.0.0", @@ -62,14 +64,13 @@ }, "packages/core": { "name": "@archon/core", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.89", "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/workflows": "workspace:*", - "@openai/codex-sdk": "^0.116.0", "pg": "^8.11.0", "zod": "^3", }, @@ -83,7 +84,7 @@ }, "packages/docs-web": { "name": "@archon/docs-web", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@astrojs/starlight": "^0.38.0", "astro": "^6.1.0", @@ -92,7 +93,7 @@ }, "packages/git": { "name": "@archon/git", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@archon/paths": "workspace:*", }, @@ -102,7 +103,7 @@ }, "packages/isolation": { "name": "@archon/isolation", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", @@ -113,7 +114,7 @@ }, "packages/paths": { "name": "@archon/paths", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "dotenv": "^17", "pino": "^9", @@ -123,14 +124,30 @@ "typescript": "^5.0.0", }, }, + "packages/providers": { + "name": "@archon/providers", + "version": "0.3.6", + "dependencies": { + "@anthropic-ai/claude-agent-sdk": "^0.2.89", + "@archon/paths": "workspace:*", + "@openai/codex-sdk": "^0.116.0", + }, + "devDependencies": { + "pino": "^9", + }, + "peerDependencies": { + "typescript": "^5.0.0", + }, + }, "packages/server": { "name": "@archon/server", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@archon/adapters": "workspace:*", "@archon/core": "workspace:*", "@archon/git": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/workflows": "workspace:*", "@hono/zod-openapi": "^0.19.6", "dotenv": "^17.2.3", @@ -143,7 +160,7 @@ }, "packages/web": { "name": "@archon/web", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@dagrejs/dagre": "^2.0.4", "@radix-ui/react-alert-dialog": "^1.1.15", @@ -195,10 +212,11 @@ }, "packages/workflows": { "name": "@archon/workflows", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@hono/zod-openapi": "^0.19.6", "zod": "^3.25.28", }, @@ -231,6 +249,8 @@ "@archon/paths": ["@archon/paths@workspace:packages/paths"], + "@archon/providers": ["@archon/providers@workspace:packages/providers"], + "@archon/server": ["@archon/server@workspace:packages/server"], "@archon/web": ["@archon/web@workspace:packages/web"], @@ -565,21 +585,21 @@ "@open-draft/until": ["@open-draft/until@2.1.0", "", {}, "sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg=="], - "@openai/codex": ["@openai/codex@0.116.0", "", { "optionalDependencies": { "@openai/codex-darwin-arm64": "npm:@openai/codex@0.116.0-darwin-arm64", "@openai/codex-darwin-x64": "npm:@openai/codex@0.116.0-darwin-x64", "@openai/codex-linux-arm64": "npm:@openai/codex@0.116.0-linux-arm64", "@openai/codex-linux-x64": "npm:@openai/codex@0.116.0-linux-x64", "@openai/codex-win32-arm64": "npm:@openai/codex@0.116.0-win32-arm64", "@openai/codex-win32-x64": "npm:@openai/codex@0.116.0-win32-x64" }, "bin": { "codex": "bin/codex.js" } }, "sha512-K6q9P2ZmpnzGmpS6Ybjvsdtvu8AbJx3f/Z4KmjH1u85StSS9TWMSQB8z0PPObKMejbtiIkHwhGyEIHi4iBYjig=="], + "@openai/codex": ["@openai/codex@0.121.0", "", { "optionalDependencies": { "@openai/codex-darwin-arm64": "npm:@openai/codex@0.121.0-darwin-arm64", "@openai/codex-darwin-x64": "npm:@openai/codex@0.121.0-darwin-x64", "@openai/codex-linux-arm64": "npm:@openai/codex@0.121.0-linux-arm64", "@openai/codex-linux-x64": "npm:@openai/codex@0.121.0-linux-x64", "@openai/codex-win32-arm64": "npm:@openai/codex@0.121.0-win32-arm64", "@openai/codex-win32-x64": "npm:@openai/codex@0.121.0-win32-x64" }, "bin": { "codex": "bin/codex.js" } }, "sha512-kCJ2NeATd4QBQRmqV04ymdN1ZU3MSwnJQDm/KzjpuzGvCuUVEn7no/T2mRyxQ2x77AACqriNOyPPoM/yufyvNg=="], - "@openai/codex-darwin-arm64": ["@openai/codex@0.116.0-darwin-arm64", "", { "os": "darwin", "cpu": "arm64" }, "sha512-WkdL083p8uMeASpg8bwV0DPGgzkm48LjN3MyU2m/YukujbiLnknAmG29O2q2rFCLm0oLSDIGUK8EnXA4ZcAF9Q=="], + "@openai/codex-darwin-arm64": ["@openai/codex@0.121.0-darwin-arm64", "", { "os": "darwin", "cpu": "arm64" }, "sha512-ZyBqIB6Fb4I0hGb/h65Vu7ePYjHSmGiqqfm+/1djEuxDPkqjfi4wkxYxNYNY+6najyNGN4UijOSTTf19eDCrqw=="], - "@openai/codex-darwin-x64": ["@openai/codex@0.116.0-darwin-x64", "", { "os": "darwin", "cpu": "x64" }, "sha512-Ax8uTwYSNIwGrzcNRcn0jJQhZzNcKGDbbn00Emde7gGOemjSLhRALjUaKjckAaW5xWnNqHTGdtzzPB4phNlDYg=="], + "@openai/codex-darwin-x64": ["@openai/codex@0.121.0-darwin-x64", "", { "os": "darwin", "cpu": "x64" }, "sha512-1/OAtdkAZ5yPI3xqaEFlHuPziS1yCqL2gOZdswE7HTmmwpIxi6Z3FCo60JWDPluIp89z4tftdjq73/OCN0YVcw=="], - "@openai/codex-linux-arm64": ["@openai/codex@0.116.0-linux-arm64", "", { "os": "linux", "cpu": "arm64" }, "sha512-X7cL8rBSGDB+RSZc2FoKiqcMVeLPMmo06bkss/en4lLQsV1XG2DZI56WuXg92IOX3SjYl6Av/eOWgsb1t3UeLQ=="], + "@openai/codex-linux-arm64": ["@openai/codex@0.121.0-linux-arm64", "", { "os": "linux", "cpu": "arm64" }, "sha512-2UgMmdo237o7SCMsfb529cOSEM2HFUgN6OBkv5SBLwfNY1NO2Ex6JnUjlppEXlX6/4cXfZ5qjDghVz5j/+B9zw=="], - "@openai/codex-linux-x64": ["@openai/codex@0.116.0-linux-x64", "", { "os": "linux", "cpu": "x64" }, "sha512-S9InOgJT3tj6uQp55NqrCA1k5tklwFaH00JdC2ElbRmxchm7ard4WxHSJZX9TiY8enj4cQoLIC04NFTUCO+/PQ=="], + "@openai/codex-linux-x64": ["@openai/codex@0.121.0-linux-x64", "", { "os": "linux", "cpu": "x64" }, "sha512-vlpNJXIqss800J+32Vy7TUZzv31n61b45OLxmsVQGFkTNLJcjFrj9jDUC7I62eC4F16gLioilefNfv4CdJQOEw=="], - "@openai/codex-sdk": ["@openai/codex-sdk@0.116.0", "", { "dependencies": { "@openai/codex": "0.116.0" } }, "sha512-qrn1Pu5G1GJ9w4m/Lk3L3466ulMGG9SfyR0LPAaXdisuQI1rqgoUOuoZ4byX7cCzn0x1g2+WPc0apZgjMEK04Q=="], + "@openai/codex-sdk": ["@openai/codex-sdk@0.121.0", "", { "dependencies": { "@openai/codex": "0.121.0" } }, "sha512-LfDbIBIrRYya6Y+zR8i5ci+wGx8zyTpzTy9iSeRTzZnb4N8Cn30cW+un1Vd2JfjoWhxGXcOTpXy8sSjlSeyvKA=="], - "@openai/codex-win32-arm64": ["@openai/codex@0.116.0-win32-arm64", "", { "os": "win32", "cpu": "arm64" }, "sha512-kX2oAUzkgZX9OsYpd4omv9IGf+9VWj4Vy3UtIAnQKBu1DTSzmTJmXDuDn87mkyUciSZadm2QbeqQQzm2NC0NYw=="], + "@openai/codex-win32-arm64": ["@openai/codex@0.121.0-win32-arm64", "", { "os": "win32", "cpu": "arm64" }, "sha512-m88q4f3XI5npn1t6OG0nWGHWWAjO5FgjRwxh4hdujbLO6t9CiCNfhfPZIOSsoATbrCNwLC+6S77m3cjbNToPNg=="], - "@openai/codex-win32-x64": ["@openai/codex@0.116.0-win32-x64", "", { "os": "win32", "cpu": "x64" }, "sha512-6sBIMOoA9FNuxQvCCnK0P548Wqrlk3I9SMdtOCUg2zYzYU7jOF2mWS1VpRQ6R+Jvo2x50dxeJZ+W37dBmXfprw=="], + "@openai/codex-win32-x64": ["@openai/codex@0.121.0-win32-x64", "", { "os": "win32", "cpu": "x64" }, "sha512-Fp0ecVOyM+VcBi/y4HVvRzhifO9YqRiHzhV3rhtAppC7flh22WPguLC4kmvXYAR0p3RPzbo35M2CedWnkOT+cw=="], "@oslojs/encoding": ["@oslojs/encoding@1.1.0", "", {}, "sha512-70wQhgYmndg4GCPxPPxPGevRKqTIJ2Nh4OkiMWmDAVYsTQ+Ta7Sq+rPevXyXGdzr30/qZBnyOalCszoMxlyldQ=="], @@ -2437,7 +2457,9 @@ "@antfu/ni/tinyexec": ["tinyexec@1.0.2", "", {}, "sha512-W/KYk+NFhkmsYpuHq5JykngiOCnxeVL8v8dFnqxSD8qEEdRfXk1SDM6JzNqcERbcGYj9tMrDQBYV9cjgnunFIg=="], - "@archon/core/@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.2.89", "", { "dependencies": { "@anthropic-ai/sdk": "^0.74.0", "@modelcontextprotocol/sdk": "^1.27.1" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "^0.34.2", "@img/sharp-darwin-x64": "^0.34.2", "@img/sharp-linux-arm": "^0.34.2", "@img/sharp-linux-arm64": "^0.34.2", "@img/sharp-linux-x64": "^0.34.2", "@img/sharp-linuxmusl-arm64": "^0.34.2", "@img/sharp-linuxmusl-x64": "^0.34.2", "@img/sharp-win32-arm64": "^0.34.2", "@img/sharp-win32-x64": "^0.34.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-/9W0lyBGuGHw1uu7pQafsp6BLpxfqCv1QYE0Z/eZTX6lGHht4j4Q+O3UImzjsiyEE9cGkOAwZBGAEHDEqt+QUA=="], + "@archon/providers/@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.2.89", "", { "dependencies": { "@anthropic-ai/sdk": "^0.74.0", "@modelcontextprotocol/sdk": "^1.27.1" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "^0.34.2", "@img/sharp-darwin-x64": "^0.34.2", "@img/sharp-linux-arm": "^0.34.2", "@img/sharp-linux-arm64": "^0.34.2", "@img/sharp-linux-x64": "^0.34.2", "@img/sharp-linuxmusl-arm64": "^0.34.2", "@img/sharp-linuxmusl-x64": "^0.34.2", "@img/sharp-win32-arm64": "^0.34.2", "@img/sharp-win32-x64": "^0.34.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-/9W0lyBGuGHw1uu7pQafsp6BLpxfqCv1QYE0Z/eZTX6lGHht4j4Q+O3UImzjsiyEE9cGkOAwZBGAEHDEqt+QUA=="], + + "@archon/core/@openai/codex-sdk": ["@openai/codex-sdk@0.116.0", "", { "dependencies": { "@openai/codex": "0.116.0" } }, "sha512-qrn1Pu5G1GJ9w4m/Lk3L3466ulMGG9SfyR0LPAaXdisuQI1rqgoUOuoZ4byX7cCzn0x1g2+WPc0apZgjMEK04Q=="], "@astrojs/markdown-remark/remark-parse": ["remark-parse@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "micromark-util-types": "^2.0.0", "unified": "^11.0.0" } }, "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA=="], @@ -2737,6 +2759,8 @@ "yargs/yargs-parser": ["yargs-parser@21.1.1", "", {}, "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw=="], + "@archon/core/@openai/codex-sdk/@openai/codex": ["@openai/codex@0.116.0", "", { "optionalDependencies": { "@openai/codex-darwin-arm64": "npm:@openai/codex@0.116.0-darwin-arm64", "@openai/codex-darwin-x64": "npm:@openai/codex@0.116.0-darwin-x64", "@openai/codex-linux-arm64": "npm:@openai/codex@0.116.0-linux-arm64", "@openai/codex-linux-x64": "npm:@openai/codex@0.116.0-linux-x64", "@openai/codex-win32-arm64": "npm:@openai/codex@0.116.0-win32-arm64", "@openai/codex-win32-x64": "npm:@openai/codex@0.116.0-win32-x64" }, "bin": { "codex": "bin/codex.js" } }, "sha512-K6q9P2ZmpnzGmpS6Ybjvsdtvu8AbJx3f/Z4KmjH1u85StSS9TWMSQB8z0PPObKMejbtiIkHwhGyEIHi4iBYjig=="], + "@astrojs/markdown-remark/unified/bail": ["bail@2.0.2", "", {}, "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw=="], "@astrojs/markdown-remark/unified/is-plain-obj": ["is-plain-obj@4.1.0", "", {}, "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg=="], @@ -3065,6 +3089,18 @@ "yargs/string-width/strip-ansi": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="], + "@archon/core/@openai/codex-sdk/@openai/codex/@openai/codex-darwin-arm64": ["@openai/codex@0.116.0-darwin-arm64", "", { "os": "darwin", "cpu": "arm64" }, "sha512-WkdL083p8uMeASpg8bwV0DPGgzkm48LjN3MyU2m/YukujbiLnknAmG29O2q2rFCLm0oLSDIGUK8EnXA4ZcAF9Q=="], + + "@archon/core/@openai/codex-sdk/@openai/codex/@openai/codex-darwin-x64": ["@openai/codex@0.116.0-darwin-x64", "", { "os": "darwin", "cpu": "x64" }, "sha512-Ax8uTwYSNIwGrzcNRcn0jJQhZzNcKGDbbn00Emde7gGOemjSLhRALjUaKjckAaW5xWnNqHTGdtzzPB4phNlDYg=="], + + "@archon/core/@openai/codex-sdk/@openai/codex/@openai/codex-linux-arm64": ["@openai/codex@0.116.0-linux-arm64", "", { "os": "linux", "cpu": "arm64" }, "sha512-X7cL8rBSGDB+RSZc2FoKiqcMVeLPMmo06bkss/en4lLQsV1XG2DZI56WuXg92IOX3SjYl6Av/eOWgsb1t3UeLQ=="], + + "@archon/core/@openai/codex-sdk/@openai/codex/@openai/codex-linux-x64": ["@openai/codex@0.116.0-linux-x64", "", { "os": "linux", "cpu": "x64" }, "sha512-S9InOgJT3tj6uQp55NqrCA1k5tklwFaH00JdC2ElbRmxchm7ard4WxHSJZX9TiY8enj4cQoLIC04NFTUCO+/PQ=="], + + "@archon/core/@openai/codex-sdk/@openai/codex/@openai/codex-win32-arm64": ["@openai/codex@0.116.0-win32-arm64", "", { "os": "win32", "cpu": "arm64" }, "sha512-kX2oAUzkgZX9OsYpd4omv9IGf+9VWj4Vy3UtIAnQKBu1DTSzmTJmXDuDn87mkyUciSZadm2QbeqQQzm2NC0NYw=="], + + "@archon/core/@openai/codex-sdk/@openai/codex/@openai/codex-win32-x64": ["@openai/codex@0.116.0-win32-x64", "", { "os": "win32", "cpu": "x64" }, "sha512-6sBIMOoA9FNuxQvCCnK0P548Wqrlk3I9SMdtOCUg2zYzYU7jOF2mWS1VpRQ6R+Jvo2x50dxeJZ+W37dBmXfprw=="], + "@dotenvx/dotenvx/execa/onetime/mimic-fn": ["mimic-fn@2.1.0", "", {}, "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg=="], "@inquirer/core/wrap-ansi/string-width/emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], diff --git a/eslint.config.mjs b/eslint.config.mjs index 69bf635bd5..a7ba5b4c74 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -20,6 +20,7 @@ export default tseslint.config( '**/*.js', '*.mjs', '**/*.test.ts', + '**/src/test/**', // Test helper files (mock factories, fixtures) '*.d.ts', // Root-level declaration files (not in tsconfig project scope) '**/*.generated.d.ts', // Auto-generated declaration files (e.g. openapi-typescript output) 'packages/web/vite.config.ts', // Vite config doesn't need type-checked linting diff --git a/homebrew/archon.rb b/homebrew/archon.rb index 59c801c015..0bac58a339 100644 --- a/homebrew/archon.rb +++ b/homebrew/archon.rb @@ -7,28 +7,28 @@ class Archon < Formula desc "Remote agentic coding platform - control AI assistants from anywhere" homepage "https://github.com/coleam00/Archon" - version "0.3.5" + version "0.3.6" license "MIT" on_macos do on_arm do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-darwin-arm64" - sha256 "2c2065e580a085baaea02504cb5451be3f68e0d9fdb13a364cd45194d5b22de1" + sha256 "96b6dac50b046eece9eddbb988a0c39b4f9a0e2faac66e49b977ba6360069e86" end on_intel do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-darwin-x64" - sha256 "515aca3b2bc30d3b5d4dfb67c04648f70b66e8ed345ea6ab039e76e6578e82fe" + sha256 "09f1dbe12417b4300b7b07b531eb7391a286305f8d4eafc11e7f61f5d26eb8eb" end end on_linux do on_arm do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-linux-arm64" - sha256 "96920d98ae0d4dc7ef78e6de4f9018a9ba2031b9c2b010fd5d748d9513c49f60" + sha256 "80b06a6ff699ec57cd4a3e49cfe7b899a3e8212688d70285f5a887bf10086731" end on_intel do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-linux-x64" - sha256 "80e7d115da424d5ee47b7db773382c9b8d0db728408f9815c05081872da6b74f" + sha256 "09f5dac6db8037ed6f3e5b7e9c5eb8e37f19822a4ed2bf4cd7e654780f9d00de" end end diff --git a/package.json b/package.json index b296d638ca..3dcb4479a2 100644 --- a/package.json +++ b/package.json @@ -49,6 +49,7 @@ "test-exclude": "^7.0.1" }, "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.74" + "@anthropic-ai/claude-agent-sdk": "^0.2.74", + "@openai/codex-sdk": "^0.121.0" } } diff --git a/packages/cli/package.json b/packages/cli/package.json index bd8c7390bf..f39e530ffd 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -17,6 +17,7 @@ "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/server": "workspace:*", "@archon/workflows": "workspace:*", "@clack/prompts": "^1.0.0", diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index d7dedf4810..f64416369c 100755 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -125,9 +125,6 @@ Options: --json Output machine-readable JSON (for workflow list) --workflow Workflow to run for 'continue' (default: archon-assist) --no-context Skip context injection for 'continue' - --allow-env-keys Grant env-key consent during auto-registration - (bypasses the env-leak gate for this codebase; - logs an audit entry) --port Override server port for 'serve' (default: 3090) --download-only Download web UI without starting the server @@ -207,7 +204,6 @@ async function main(): Promise { reason: { type: 'string' }, workflow: { type: 'string' }, 'no-context': { type: 'boolean' }, - 'allow-env-keys': { type: 'boolean' }, port: { type: 'string' }, 'download-only': { type: 'boolean' }, }, @@ -231,8 +227,6 @@ async function main(): Promise { const resumeFlag = values.resume as boolean | undefined; const spawnFlag = values.spawn as boolean | undefined; const jsonFlag = values.json as boolean | undefined; - const allowEnvKeysFlag = values['allow-env-keys'] as boolean | undefined; - // Handle help flag if (values.help) { printUsage(); @@ -344,7 +338,6 @@ async function main(): Promise { fromBranch, noWorktree, resume: resumeFlag, - allowEnvKeys: allowEnvKeysFlag, quiet: values.quiet as boolean | undefined, verbose: values.verbose as boolean | undefined, }; diff --git a/packages/cli/src/commands/workflow.ts b/packages/cli/src/commands/workflow.ts index 89dd5911e4..6ba31d1256 100644 --- a/packages/cli/src/commands/workflow.ts +++ b/packages/cli/src/commands/workflow.ts @@ -62,8 +62,6 @@ export interface WorkflowRunOptions { noWorktree?: boolean; resume?: boolean; codebaseId?: string; // Passed by resume/approve to skip path-based lookup - /** When true, skip the env-leak-gate during auto-registration. */ - allowEnvKeys?: boolean; quiet?: boolean; verbose?: boolean; /** Platform conversation ID (e.g. `cli-{ts}-{rand}`), NOT a DB UUID. */ @@ -325,7 +323,7 @@ export async function workflowRunCommand( const repoRoot = await git.findRepoRoot(cwd); if (repoRoot) { try { - const result = await registerRepository(repoRoot, options.allowEnvKeys, 'register-cli'); + const result = await registerRepository(repoRoot); codebase = await codebaseDb.getCodebase(result.codebaseId); if (!result.alreadyExisted) { getLog().info({ name: result.name }, 'cli.codebase_auto_registered'); diff --git a/packages/core/package.json b/packages/core/package.json index 8aa397ea33..970b01e4d4 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -9,7 +9,6 @@ "./types": "./src/types/index.ts", "./db": "./src/db/index.ts", "./db/*": "./src/db/*.ts", - "./clients": "./src/clients/index.ts", "./operations": "./src/operations/index.ts", "./operations/*": "./src/operations/*.ts", "./workflows": "./src/workflows/index.ts", @@ -23,17 +22,16 @@ "./state/*": "./src/state/*.ts" }, "scripts": { - "test": "bun test src/clients/codex-binary-guard.test.ts && bun test src/utils/codex-binary-resolver.test.ts && bun test src/utils/codex-binary-resolver-dev.test.ts && bun test src/clients/claude.test.ts src/clients/codex.test.ts src/clients/factory.test.ts && bun test src/handlers/command-handler.test.ts && bun test src/handlers/clone.test.ts && bun test src/db/adapters/postgres.test.ts && bun test src/db/adapters/sqlite.test.ts src/db/codebases.test.ts src/db/connection.test.ts src/db/conversations.test.ts src/db/env-vars.test.ts src/db/isolation-environments.test.ts src/db/messages.test.ts src/db/sessions.test.ts src/db/workflow-events.test.ts src/db/workflows.test.ts src/utils/defaults-copy.test.ts src/utils/worktree-sync.test.ts src/utils/conversation-lock.test.ts src/utils/credential-sanitizer.test.ts src/utils/port-allocation.test.ts src/utils/error.test.ts src/utils/error-formatter.test.ts src/utils/github-graphql.test.ts src/utils/env-leak-scanner.test.ts src/config/ src/state/ && bun test src/utils/path-validation.test.ts && bun test src/services/cleanup-service.test.ts && bun test src/services/title-generator.test.ts && bun test src/workflows/ && bun test src/operations/workflow-operations.test.ts && bun test src/operations/isolation-operations.test.ts && bun test src/orchestrator/orchestrator.test.ts && bun test src/orchestrator/orchestrator-agent.test.ts && bun test src/orchestrator/orchestrator-isolation.test.ts", + "test": "bun test src/handlers/command-handler.test.ts && bun test src/handlers/clone.test.ts && bun test src/db/adapters/postgres.test.ts && bun test src/db/adapters/sqlite.test.ts src/db/codebases.test.ts src/db/connection.test.ts src/db/conversations.test.ts src/db/env-vars.test.ts src/db/isolation-environments.test.ts src/db/messages.test.ts src/db/sessions.test.ts src/db/workflow-events.test.ts src/db/workflows.test.ts src/utils/defaults-copy.test.ts src/utils/worktree-sync.test.ts src/utils/conversation-lock.test.ts src/utils/credential-sanitizer.test.ts src/utils/port-allocation.test.ts src/utils/error.test.ts src/utils/error-formatter.test.ts src/utils/github-graphql.test.ts src/config/ src/state/ && bun test src/utils/path-validation.test.ts && bun test src/services/cleanup-service.test.ts && bun test src/services/title-generator.test.ts && bun test src/workflows/ && bun test src/operations/workflow-operations.test.ts && bun test src/operations/isolation-operations.test.ts && bun test src/orchestrator/orchestrator.test.ts && bun test src/orchestrator/orchestrator-agent.test.ts && bun test src/orchestrator/orchestrator-isolation.test.ts", "type-check": "bun x tsc --noEmit", "build": "echo 'No build needed - Bun runs TypeScript directly'" }, "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.89", "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/workflows": "workspace:*", - "@openai/codex-sdk": "^0.116.0", "pg": "^8.11.0", "zod": "^3" }, diff --git a/packages/core/src/clients/claude.ts b/packages/core/src/clients/claude.ts deleted file mode 100644 index 90595e1d25..0000000000 --- a/packages/core/src/clients/claude.ts +++ /dev/null @@ -1,657 +0,0 @@ -/** - * Claude Agent SDK wrapper - * Provides async generator interface for streaming Claude responses - * - * Type Safety Pattern: - * - Uses `Options` type from SDK for query configuration - * - SDK message types (SDKMessage, SDKAssistantMessage, etc.) have strict - * type checking that requires explicit type handling for content blocks - * - Content blocks are typed via inline assertions for clarity - * - * Authentication: - * - CLAUDE_USE_GLOBAL_AUTH=true: Use global auth from `claude /login`, filter env tokens - * - CLAUDE_USE_GLOBAL_AUTH=false: Use explicit tokens from env vars - * - Not set: Auto-detect - use tokens if present in env, otherwise global auth - */ -import { - query, - type Options, - type HookCallback, - type HookCallbackMatcher, -} from '@anthropic-ai/claude-agent-sdk'; -// The `/embed` entry point uses `import ... with { type: 'file' }` to embed -// the SDK's `cli.js` into the compiled binary's $bunfs virtual filesystem, -// then extracts it to a temp path at runtime so the subprocess can exec it. -// Without this, the SDK falls back to resolving `cli.js` from -// `import.meta.url` of its own module — which bun freezes at build time to -// the build host's absolute node_modules path, producing a "Module not found -// /Users/runner/..." error on any machine other than the CI runner. -// Safe in dev too: resolves to the real on-disk cli.js. -import cliPath from '@anthropic-ai/claude-agent-sdk/embed'; -import { - type AssistantRequestOptions, - type IAssistantClient, - type MessageChunk, - type TokenUsage, -} from '../types'; -import { createLogger } from '@archon/paths'; -// No env filtering here — process.env is already clean: -// stripCwdEnv() at entry point stripped CWD .env keys + CLAUDECODE markers, -// then ~/.archon/.env was loaded as the trusted source. All keys the user sets -// in ~/.archon/.env are intentional and pass through to the subprocess. -import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner'; -import * as codebaseDb from '../db/codebases'; -import { loadConfig } from '../config/config-loader'; - -/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ -let cachedLog: ReturnType | undefined; -function getLog(): ReturnType { - if (!cachedLog) cachedLog = createLogger('client.claude'); - return cachedLog; -} - -/** - * Content block type for assistant messages - * Represents text or tool_use blocks from Claude API responses - */ -interface ContentBlock { - type: 'text' | 'tool_use'; - text?: string; - name?: string; - input?: Record; - /** Stable Anthropic `tool_use_id` — used to pair `tool_call`/`tool_result` events. */ - id?: string; -} - -function normalizeClaudeUsage(usage?: { - input_tokens?: number; - output_tokens?: number; - total_tokens?: number; -}): TokenUsage | undefined { - if (!usage) return undefined; - const input = usage.input_tokens; - const output = usage.output_tokens; - if (typeof input !== 'number' || typeof output !== 'number') return undefined; - const total = usage.total_tokens; - - return { - input, - output, - ...(typeof total === 'number' ? { total } : {}), - }; -} - -/** - * Build environment for Claude subprocess. - * - * process.env is already clean at this point: - * - stripCwdEnv() at entry point removed CWD .env keys + CLAUDECODE markers - * - ~/.archon/.env loaded with override:true as the trusted source - * - * Auth mode is determined by the SDK based on what tokens are present: - * - Tokens in env → SDK uses them (explicit auth) - * - No tokens → SDK uses `claude /login` credentials (global auth) - * - User controls this by what they put in ~/.archon/.env - * - * We log the detected mode for diagnostics but don't filter — the user's - * config is trusted. See coleam00/Archon#1067 for design rationale. - */ -function buildSubprocessEnv(): NodeJS.ProcessEnv { - const hasExplicitTokens = Boolean( - process.env.CLAUDE_CODE_OAUTH_TOKEN ?? process.env.CLAUDE_API_KEY - ); - const authMode = hasExplicitTokens ? 'explicit' : 'global'; - getLog().info( - { authMode }, - authMode === 'global' ? 'using_global_auth' : 'using_explicit_tokens' - ); - - return { ...process.env }; -} - -/** Max retries for transient subprocess failures (3 = 4 total attempts). - * SDK subprocess crashes (exit code 1) are often intermittent — AJV schema validation - * regressions, stale HTTP/2 connections, and other transient SDK issues typically - * succeed on retry 3 or 4. See: anthropics/claude-code#22973, claude-code-action#853 */ -const MAX_SUBPROCESS_RETRIES = 3; - -/** Delay between retries in milliseconds */ -const RETRY_BASE_DELAY_MS = 2000; - -/** Patterns indicating rate limiting in stderr/error messages */ -const RATE_LIMIT_PATTERNS = ['rate limit', 'too many requests', '429', 'overloaded']; - -/** Patterns indicating auth issues in stderr/error messages */ -const AUTH_PATTERNS = [ - 'credit balance', - 'unauthorized', - 'authentication', - 'invalid token', - '401', - '403', -]; - -/** Patterns indicating the subprocess crashed (transient, worth retrying) */ -const SUBPROCESS_CRASH_PATTERNS = [ - 'exited with code', - 'killed', - 'signal', - // "Operation aborted" can appear when the SDK's PostToolUse hook tries to write() - // back to a subprocess pipe that was closed by an abort signal. This is a race - // condition in SDK cleanup — safe to classify as a crash and retry. - 'operation aborted', -]; - -function classifySubprocessError( - errorMessage: string, - stderrOutput: string -): 'rate_limit' | 'auth' | 'crash' | 'unknown' { - const combined = `${errorMessage} ${stderrOutput}`.toLowerCase(); - if (RATE_LIMIT_PATTERNS.some(p => combined.includes(p))) return 'rate_limit'; - if (AUTH_PATTERNS.some(p => combined.includes(p))) return 'auth'; - if (SUBPROCESS_CRASH_PATTERNS.some(p => combined.includes(p))) return 'crash'; - return 'unknown'; -} - -/** Default timeout for first SDK message (ms). Configurable via env var. */ -function getFirstEventTimeoutMs(): number { - const raw = process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS; - if (raw) { - const parsed = Number(raw); - if (Number.isFinite(parsed) && parsed > 0) return parsed; - } - return 60_000; -} - -/** Build a diagnostic payload for claude.first_event_timeout log */ -function buildFirstEventHangDiagnostics( - subprocessEnv: Record, - model: string | undefined -): Record { - return { - subprocessEnvKeys: Object.keys(subprocessEnv), - parentClaudeKeys: Object.keys(process.env).filter( - k => k === 'CLAUDECODE' || k.startsWith('CLAUDE_CODE_') || k.startsWith('ANTHROPIC_') - ), - model, - platform: process.platform, - uid: getProcessUid(), - isTTY: process.stdout.isTTY ?? false, - claudeCode: process.env.CLAUDECODE, - claudeCodeEntrypoint: process.env.CLAUDE_CODE_ENTRYPOINT, - }; -} - -/** Sentinel error class to identify timeout rejections in withFirstMessageTimeout. */ -class FirstEventTimeoutError extends Error {} - -/** - * Wraps an async generator so that the first call to .next() must resolve - * within `timeoutMs`. If it doesn't, aborts the controller and throws a - * descriptive error. Subsequent .next() calls are forwarded directly. - * - * Uses Promise.race() — not just AbortController — because the pathological - * case is "SDK ignores abort", so we need an independent unblocking mechanism. - */ -export async function* withFirstMessageTimeout( - gen: AsyncGenerator, - controller: AbortController, - timeoutMs: number, - diagnostics: Record -): AsyncGenerator { - // Race first event against timeout - let timerId: ReturnType | undefined; - let firstValue: IteratorResult; - try { - firstValue = await Promise.race([ - gen.next(), - new Promise((_, reject) => { - timerId = setTimeout(() => { - reject(new FirstEventTimeoutError()); - }, timeoutMs); - }), - ]); - } catch (err) { - if (err instanceof FirstEventTimeoutError) { - controller.abort(); - getLog().error({ ...diagnostics, timeoutMs }, 'claude.first_event_timeout'); - throw new Error( - 'Claude Code subprocess produced no output within ' + - timeoutMs + - 'ms. ' + - 'See logs for claude.first_event_timeout diagnostic dump. ' + - 'Details: https://github.com/coleam00/Archon/issues/1067' - ); - } - throw err; - } finally { - clearTimeout(timerId); - } - - if (firstValue.done) return; - yield firstValue.value; - - // Forward remaining events directly - yield* gen; -} - -/** - * Returns the current process UID, or undefined on platforms that don't support it (e.g. Windows). - * Exported for testing — spyOn(claudeModule, 'getProcessUid') works cross-platform. - */ -export function getProcessUid(): number | undefined { - return typeof process.getuid === 'function' ? process.getuid() : undefined; -} - -/** - * Claude AI assistant client - * Implements generic IAssistantClient interface - */ -export class ClaudeClient implements IAssistantClient { - private readonly retryBaseDelayMs: number; - - constructor(options?: { retryBaseDelayMs?: number }) { - // Claude Code SDK silently rejects bypassPermissions when running as root (UID 0). - // Check once at construction time so the error surfaces early, not on first query. - // IS_SANDBOX=1 bypasses this check — the SDK itself honours this env var in sandboxed - // environments (Docker, VPS, CI) where running as root is expected. - if (getProcessUid() === 0 && process.env.IS_SANDBOX !== '1') { - throw new Error( - 'Claude Code SDK does not support bypassPermissions when running as root (UID 0). ' + - 'Run as a non-root user, set IS_SANDBOX=1, or use the Dockerfile which creates a non-root appuser.' - ); - } - this.retryBaseDelayMs = options?.retryBaseDelayMs ?? RETRY_BASE_DELAY_MS; - } - - /** - * Send a query to Claude and stream responses. - * Includes retry logic for transient failures (up to 3 retries with exponential backoff). - * Enriches errors with stderr context and classification. - */ - async *sendQuery( - prompt: string, - cwd: string, - resumeSessionId?: string, - requestOptions?: AssistantRequestOptions - ): AsyncGenerator { - // Pre-spawn: check for env key leak if codebase is not explicitly consented. - // Use prefix lookup so worktree paths (e.g. .../worktrees/feature-branch) still - // match the registered source cwd (e.g. .../source). - const codebase = - (await codebaseDb.findCodebaseByDefaultCwd(cwd)) ?? - (await codebaseDb.findCodebaseByPathPrefix(cwd)); - if (codebase && !codebase.allow_env_keys) { - // Fail-closed: a config load failure (corrupt YAML, permission denied) - // must NOT silently bypass the gate. Catch, log, and treat as - // `allowTargetRepoKeys = false` so the scanner still runs. - let allowTargetRepoKeys = false; - try { - const merged = await loadConfig(cwd); - allowTargetRepoKeys = merged.allowTargetRepoKeys; - } catch (configErr) { - getLog().warn({ err: configErr, cwd }, 'env_leak_gate.config_load_failed_gate_enforced'); - } - if (!allowTargetRepoKeys) { - const report = scanPathForSensitiveKeys(cwd); - if (report.findings.length > 0) { - throw new EnvLeakError(report, 'spawn-existing'); - } - } - } - - // Note: If subprocess crashes mid-stream after yielding chunks, those chunks - // are already consumed by the caller. Retry starts a fresh subprocess, so the - // caller may receive partial output from the failed attempt followed by full - // output from the retry. This is a known limitation of async generator retries. - let lastError: Error | undefined; - - for (let attempt = 0; attempt <= MAX_SUBPROCESS_RETRIES; attempt++) { - // Check if already aborted before starting attempt - if (requestOptions?.abortSignal?.aborted) { - throw new Error('Query aborted'); - } - - const stderrLines: string[] = []; - const toolResultQueue: { toolName: string; toolOutput: string; toolCallId?: string }[] = []; - - // Create per-attempt abort controller and wire to caller's signal - const controller = new AbortController(); - if (requestOptions?.abortSignal) { - requestOptions.abortSignal.addEventListener( - 'abort', - () => { - controller.abort(); - }, - { once: true } - ); - } - - const options: Options = { - cwd, - pathToClaudeCodeExecutable: cliPath, - env: requestOptions?.env - ? { ...buildSubprocessEnv(), ...requestOptions.env } - : buildSubprocessEnv(), - model: requestOptions?.model, - abortController: controller, - ...(requestOptions?.tools !== undefined ? { tools: requestOptions.tools } : {}), - ...(requestOptions?.disallowedTools !== undefined - ? { disallowedTools: requestOptions.disallowedTools } - : {}), - // Pass outputFormat for json_schema structured output (Claude Agent SDK v0.2.45+) - ...(requestOptions?.outputFormat !== undefined - ? { outputFormat: requestOptions.outputFormat } - : {}), - // Note: hooks are merged below (line with `hooks: { ... }`) — not spread here - // Pass MCP servers for per-node MCP support (Claude Agent SDK v0.2.74+) - ...(requestOptions?.mcpServers !== undefined - ? { mcpServers: requestOptions.mcpServers } - : {}), - // Pass allowedTools for MCP tool wildcards (e.g., 'mcp__github__*') - ...(requestOptions?.allowedTools !== undefined - ? { allowedTools: requestOptions.allowedTools } - : {}), - // Pass agents/agent for per-node skill scoping via AgentDefinition wrapping - ...(requestOptions?.agents !== undefined ? { agents: requestOptions.agents } : {}), - ...(requestOptions?.agent !== undefined ? { agent: requestOptions.agent } : {}), - // Skip writing session transcripts to ~/.claude/projects/ — Archon manages its own - // session persistence. persistSession: false reduces disk I/O and keeps the session - // directory clean. Claude Agent SDK v0.2.74+. - ...(requestOptions?.persistSession !== undefined - ? { persistSession: requestOptions.persistSession } - : {}), - // When forkSession is true, the SDK copies the prior session's history into a new - // session file, leaving the original untouched — safe to use on retries. - ...(requestOptions?.forkSession !== undefined - ? { forkSession: requestOptions.forkSession } - : {}), - // Forward Claude-only SDK options (effort, thinking, maxBudgetUsd, fallbackModel, betas, sandbox) - ...(requestOptions?.effort !== undefined ? { effort: requestOptions.effort } : {}), - ...(requestOptions?.thinking !== undefined ? { thinking: requestOptions.thinking } : {}), - ...(requestOptions?.maxBudgetUsd !== undefined - ? { maxBudgetUsd: requestOptions.maxBudgetUsd } - : {}), - ...(requestOptions?.fallbackModel !== undefined - ? { fallbackModel: requestOptions.fallbackModel } - : {}), - // betas: string[] from user config; SDK expects SdkBeta[] (string literal union). - // User-provided values are validated upstream — cast is safe. - ...(requestOptions?.betas !== undefined - ? { betas: requestOptions.betas as Options['betas'] } - : {}), - ...(requestOptions?.sandbox !== undefined ? { sandbox: requestOptions.sandbox } : {}), - permissionMode: 'bypassPermissions', - allowDangerouslySkipPermissions: true, - systemPrompt: requestOptions?.systemPrompt ?? { type: 'preset', preset: 'claude_code' }, - settingSources: requestOptions?.settingSources ?? ['project'], - // Merge user-provided hooks with our PostToolUse capture hook - hooks: { - ...(requestOptions?.hooks ?? {}), - PostToolUse: [ - ...((requestOptions?.hooks?.PostToolUse ?? []) as HookCallbackMatcher[]), - { - hooks: [ - (async (input: Record): Promise<{ continue: true }> => { - const toolName = (input as { tool_name?: string }).tool_name ?? 'unknown'; - const toolUseId = (input as { tool_use_id?: string }).tool_use_id; - const toolResponse = (input as { tool_response?: unknown }).tool_response; - const output = - typeof toolResponse === 'string' - ? toolResponse - : JSON.stringify(toolResponse ?? ''); - // Truncate large outputs (e.g., file reads) to prevent DB bloat - const maxLen = 10_000; - toolResultQueue.push({ - toolName, - toolOutput: output.length > maxLen ? output.slice(0, maxLen) + '...' : output, - ...(toolUseId !== undefined ? { toolCallId: toolUseId } : {}), - }); - return { continue: true }; - }) as HookCallback, - ], - }, - ], - // Without this, errored / interrupted / permission-denied tools never produce - // a paired tool_result chunk and the corresponding UI card spins forever. - // SDK type: PostToolUseFailureHookInput { tool_name, tool_use_id, error, is_interrupt? } - PostToolUseFailure: [ - ...((requestOptions?.hooks?.PostToolUseFailure ?? []) as HookCallbackMatcher[]), - { - hooks: [ - (async (input: Record): Promise<{ continue: true }> => { - // Always return { continue: true } even on internal errors so a - // malformed SDK payload can never crash the hook dispatch silently. - try { - const toolName = (input as { tool_name?: string }).tool_name ?? 'unknown'; - const toolUseId = (input as { tool_use_id?: string }).tool_use_id; - const rawError = (input as { error?: string }).error; - if (rawError === undefined) { - getLog().debug({ input }, 'claude.post_tool_use_failure_no_error_field'); - } - const errorText = rawError ?? 'tool failed'; - const isInterrupt = (input as { is_interrupt?: boolean }).is_interrupt === true; - const prefix = isInterrupt ? '⚠️ Interrupted' : '❌ Error'; - toolResultQueue.push({ - toolName, - toolOutput: `${prefix}: ${errorText}`, - ...(toolUseId !== undefined ? { toolCallId: toolUseId } : {}), - }); - } catch (e) { - getLog().error({ err: e, input }, 'claude.post_tool_use_failure_hook_error'); - } - return { continue: true }; - }) as HookCallback, - ], - }, - ], - }, - stderr: (data: string) => { - const output = data.trim(); - if (!output) return; - - // Always capture stderr for diagnostics — previous filtering discarded - // useful SDK startup output, leaving stderrContext empty on crashes. - stderrLines.push(output); - - const isError = - output.toLowerCase().includes('error') || - output.toLowerCase().includes('fatal') || - output.toLowerCase().includes('failed') || - output.toLowerCase().includes('exception') || - output.includes('at ') || - output.includes('Error:'); - - const isInfoMessage = - output.includes('Spawning Claude Code') || - output.includes('--output-format') || - output.includes('--permission-mode'); - - if (isError && !isInfoMessage) { - getLog().error({ stderr: output }, 'subprocess_error'); - } - }, - }; - - if (resumeSessionId) { - options.resume = resumeSessionId; - getLog().debug( - { sessionId: resumeSessionId, forkSession: requestOptions?.forkSession }, - 'resuming_session' - ); - } else { - getLog().debug({ cwd, attempt }, 'starting_new_session'); - } - - try { - const rawEvents = query({ prompt, options }); - const timeoutMs = getFirstEventTimeoutMs(); - const diagnostics = buildFirstEventHangDiagnostics( - options.env as Record, - options.model - ); - const events = withFirstMessageTimeout(rawEvents, controller, timeoutMs, diagnostics); - for await (const msg of events) { - // Drain tool results captured by PostToolUse hook before processing the next message - while (toolResultQueue.length > 0) { - const tr = toolResultQueue.shift(); - if (tr) { - yield { - type: 'tool_result', - toolName: tr.toolName, - toolOutput: tr.toolOutput, - ...(tr.toolCallId !== undefined ? { toolCallId: tr.toolCallId } : {}), - }; - } - } - - if (msg.type === 'assistant') { - const message = msg as { message: { content: ContentBlock[] } }; - const content = message.message.content; - - for (const block of content) { - if (block.type === 'text' && block.text) { - yield { type: 'assistant', content: block.text }; - } else if (block.type === 'tool_use' && block.name) { - yield { - type: 'tool', - toolName: block.name, - toolInput: block.input ?? {}, - ...(block.id !== undefined ? { toolCallId: block.id } : {}), - }; - } - } - } else if (msg.type === 'system') { - // Check MCP server connection status from system/init - const sysMsg = msg as { - subtype?: string; - mcp_servers?: { name: string; status: string }[]; - }; - if (sysMsg.subtype === 'init' && sysMsg.mcp_servers) { - const failed = sysMsg.mcp_servers.filter(s => s.status !== 'connected'); - if (failed.length > 0) { - const names = failed.map(s => `${s.name} (${s.status})`).join(', '); - yield { type: 'system', content: `MCP server connection failed: ${names}` }; - } - } else { - getLog().debug({ subtype: sysMsg.subtype }, 'claude.system_message_unhandled'); - } - } else if (msg.type === 'rate_limit_event') { - const rateLimitMsg = msg as { rate_limit_info?: Record }; - getLog().warn( - { rateLimitInfo: rateLimitMsg.rate_limit_info }, - 'claude.rate_limit_event' - ); - yield { type: 'rate_limit', rateLimitInfo: rateLimitMsg.rate_limit_info ?? {} }; - } else if (msg.type === 'result') { - const resultMsg = msg as { - session_id?: string; - is_error?: boolean; - subtype?: string; - usage?: { input_tokens?: number; output_tokens?: number; total_tokens?: number }; - structured_output?: unknown; - total_cost_usd?: number; - stop_reason?: string | null; - num_turns?: number; - model_usage?: Record< - string, - { - input_tokens: number; - output_tokens: number; - cache_read_input_tokens?: number; - cache_creation_input_tokens?: number; - } - >; - }; - const tokens = normalizeClaudeUsage(resultMsg.usage); - yield { - type: 'result', - sessionId: resultMsg.session_id, - ...(tokens ? { tokens } : {}), - ...(resultMsg.structured_output !== undefined - ? { structuredOutput: resultMsg.structured_output } - : {}), - ...(resultMsg.is_error ? { isError: true, errorSubtype: resultMsg.subtype } : {}), - ...(resultMsg.total_cost_usd !== undefined ? { cost: resultMsg.total_cost_usd } : {}), - ...(resultMsg.stop_reason != null ? { stopReason: resultMsg.stop_reason } : {}), - ...(resultMsg.num_turns !== undefined ? { numTurns: resultMsg.num_turns } : {}), - ...(resultMsg.model_usage - ? { modelUsage: resultMsg.model_usage as Record } - : {}), - }; - } - } - // Drain any remaining tool results from the hook queue. - // Must mirror the in-loop drain — PostToolUseFailure results commonly land - // here (they fire just before the SDK's terminal `result` message), so - // dropping toolCallId here would defeat the stable-pairing fix. - while (toolResultQueue.length > 0) { - const tr = toolResultQueue.shift(); - if (tr) { - yield { - type: 'tool_result', - toolName: tr.toolName, - toolOutput: tr.toolOutput, - ...(tr.toolCallId !== undefined ? { toolCallId: tr.toolCallId } : {}), - }; - } - } - return; // Success - exit retry loop - } catch (error) { - const err = error as Error; - - // Don't retry aborted queries - if (controller.signal.aborted) { - throw new Error('Query aborted'); - } - - const stderrContext = stderrLines.join('\n'); - const errorClass = classifySubprocessError(err.message, stderrContext); - - getLog().error( - { err, stderrContext, errorClass, attempt, maxRetries: MAX_SUBPROCESS_RETRIES }, - 'query_error' - ); - - // Don't retry auth errors - they won't resolve - if (errorClass === 'auth') { - const enrichedError = new Error( - `Claude Code auth error: ${err.message}${stderrContext ? ` (${stderrContext})` : ''}` - ); - enrichedError.cause = error; - throw enrichedError; - } - - // Retry transient failures (rate limit, crash) - if ( - attempt < MAX_SUBPROCESS_RETRIES && - (errorClass === 'rate_limit' || errorClass === 'crash') - ) { - const delayMs = this.retryBaseDelayMs * Math.pow(2, attempt); - getLog().info({ attempt, delayMs, errorClass }, 'retrying_subprocess'); - await new Promise(resolve => setTimeout(resolve, delayMs)); - lastError = err; - continue; - } - - // Final failure - enrich and throw - const enrichedMessage = stderrContext - ? `Claude Code ${errorClass}: ${err.message} (stderr: ${stderrContext})` - : `Claude Code ${errorClass}: ${err.message}`; - const enrichedError = new Error(enrichedMessage); - enrichedError.cause = error; - throw enrichedError; - } - } - - // Should not reach here, but handle defensively - throw lastError ?? new Error('Claude Code query failed after retries'); - } - - /** - * Get the assistant type identifier - */ - getType(): string { - return 'claude'; - } -} diff --git a/packages/core/src/clients/codex.ts b/packages/core/src/clients/codex.ts deleted file mode 100644 index e6e9d1dd09..0000000000 --- a/packages/core/src/clients/codex.ts +++ /dev/null @@ -1,581 +0,0 @@ -/** - * Codex SDK wrapper - * Provides async generator interface for streaming Codex responses - * - * With Bun runtime, we can directly import ESM packages without the - * dynamic import workaround that was needed for CommonJS/Node.js. - */ -import { - Codex, - type ThreadOptions, - type TurnOptions, - type TurnCompletedEvent, -} from '@openai/codex-sdk'; -import { - type AssistantRequestOptions, - type IAssistantClient, - type MessageChunk, - type TokenUsage, -} from '../types'; -import { createLogger } from '@archon/paths'; -import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner'; -import * as codebaseDb from '../db/codebases'; -import { loadConfig } from '../config/config-loader'; -import { resolveCodexBinaryPath } from '../utils/codex-binary-resolver'; - -/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ -let cachedLog: ReturnType | undefined; -function getLog(): ReturnType { - if (!cachedLog) cachedLog = createLogger('client.codex'); - return cachedLog; -} - -// Singleton Codex instance (async because binary path resolution is async) -let codexInstance: Codex | null = null; -let codexInitPromise: Promise | null = null; - -/** Reset singleton state. Exported for tests only. */ -export function resetCodexSingleton(): void { - codexInstance = null; - codexInitPromise = null; -} - -/** - * Get or create Codex SDK instance. - * Async because in compiled binary mode, binary path resolution is async. - * Once initialized, the binary path is fixed for the process lifetime. - */ -async function getCodex(configCodexBinaryPath?: string): Promise { - if (codexInstance) return codexInstance; - - // Prevent concurrent initialization race - if (!codexInitPromise) { - codexInitPromise = (async (): Promise => { - const codexPathOverride = await resolveCodexBinaryPath(configCodexBinaryPath); - const instance = new Codex({ codexPathOverride }); - codexInstance = instance; - return instance; - })().catch(err => { - // Clear promise so next call can retry (e.g. after user installs Codex) - codexInitPromise = null; - throw err; - }); - } - return codexInitPromise; -} - -/** - * Build thread options for Codex SDK - * Extracted to avoid duplication across thread creation paths - */ -function buildThreadOptions(cwd: string, options?: AssistantRequestOptions): ThreadOptions { - return { - workingDirectory: cwd, - skipGitRepoCheck: true, - sandboxMode: 'danger-full-access', // Full filesystem access (needed for git worktree operations) - networkAccessEnabled: true, // Allow network calls (GitHub CLI, HTTP requests) - approvalPolicy: 'never', // Auto-approve all operations without user confirmation - model: options?.model, - modelReasoningEffort: options?.modelReasoningEffort, - webSearchMode: options?.webSearchMode, - additionalDirectories: options?.additionalDirectories, - }; -} - -const CODEX_MODEL_FALLBACKS: Record = { - 'gpt-5.3-codex': 'gpt-5.2-codex', -}; - -function isModelAccessError(errorMessage: string): boolean { - const m = errorMessage.toLowerCase(); - const hasModel = m.includes('model'); - const hasAvailabilitySignal = - m.includes('not available') || m.includes('not found') || m.includes('access denied'); - return hasModel && hasAvailabilitySignal; -} - -function buildModelAccessMessage(model?: string): string { - const normalizedModel = model?.trim(); - const selectedModel = normalizedModel || 'the configured model'; - const suggested = normalizedModel ? CODEX_MODEL_FALLBACKS[normalizedModel] : undefined; - - const fixLine = suggested - ? `To fix: update your model in ~/.archon/config.yaml:\n assistants:\n codex:\n model: ${suggested}` - : 'To fix: update your model in ~/.archon/config.yaml to one your account can access.'; - - const workflowLine = suggested - ? `Or set it per-workflow with \`model: ${suggested}\` in workflow YAML.` - : 'Or set it per-workflow with a valid `model:` in workflow YAML.'; - - return `❌ Model "${selectedModel}" is not available for your account.\n\n${fixLine}\n\n${workflowLine}`; -} - -/** Max retries for transient failures (3 = 4 total attempts). - * Mirrors ClaudeClient retry logic — Codex process crashes are similarly intermittent. */ -const MAX_SUBPROCESS_RETRIES = 3; - -/** Delay between retries in milliseconds */ -const RETRY_BASE_DELAY_MS = 2000; - -/** Patterns indicating rate limiting in error messages */ -const RATE_LIMIT_PATTERNS = ['rate limit', 'too many requests', '429', 'overloaded']; - -/** Patterns indicating auth issues in error messages */ -const AUTH_PATTERNS = [ - 'credit balance', - 'unauthorized', - 'authentication', - 'invalid token', - '401', - '403', -]; - -/** Patterns indicating a transient process crash (worth retrying) */ -const SUBPROCESS_CRASH_PATTERNS = ['exited with code', 'killed', 'signal', 'codex exec']; - -function classifyCodexError( - errorMessage: string -): 'rate_limit' | 'auth' | 'crash' | 'model_access' | 'unknown' { - if (isModelAccessError(errorMessage)) return 'model_access'; - const m = errorMessage.toLowerCase(); - if (RATE_LIMIT_PATTERNS.some(p => m.includes(p))) return 'rate_limit'; - if (AUTH_PATTERNS.some(p => m.includes(p))) return 'auth'; - if (SUBPROCESS_CRASH_PATTERNS.some(p => m.includes(p))) return 'crash'; - return 'unknown'; -} - -function extractUsageFromCodexEvent(event: TurnCompletedEvent): TokenUsage { - if (!event.usage) { - getLog().warn({ eventType: event.type }, 'codex.usage_null_on_turn_completed'); - return { input: 0, output: 0 }; - } - return { - input: event.usage.input_tokens, - output: event.usage.output_tokens, - }; -} - -/** - * Codex AI assistant client - * Implements generic IAssistantClient interface - */ -export class CodexClient implements IAssistantClient { - private readonly retryBaseDelayMs: number; - - constructor(options?: { retryBaseDelayMs?: number }) { - this.retryBaseDelayMs = options?.retryBaseDelayMs ?? RETRY_BASE_DELAY_MS; - } - - /** - * Send a query to Codex and stream responses - * @param prompt - User message or prompt - * @param cwd - Working directory for Codex - * @param resumeSessionId - Optional thread ID to resume - */ - async *sendQuery( - prompt: string, - cwd: string, - resumeSessionId?: string, - options?: AssistantRequestOptions - ): AsyncGenerator { - // Load config once — used for env-leak gate and (on first call) codexBinaryPath resolution. - let mergedConfig: Awaited> | undefined; - try { - mergedConfig = await loadConfig(cwd); - } catch (configErr) { - // Fail-closed: config load failure enforces the env-leak gate (allowTargetRepoKeys stays false) - getLog().warn({ err: configErr, cwd }, 'env_leak_gate.config_load_failed_gate_enforced'); - } - - // Pre-spawn: check for env key leak if codebase is not explicitly consented. - // Use prefix lookup so worktree paths (e.g. .../worktrees/feature-branch) still - // match the registered source cwd (e.g. .../source). - const codebase = - (await codebaseDb.findCodebaseByDefaultCwd(cwd)) ?? - (await codebaseDb.findCodebaseByPathPrefix(cwd)); - if (codebase && !codebase.allow_env_keys) { - // Fail-closed: a config load failure must NOT silently bypass the gate. - const allowTargetRepoKeys = mergedConfig?.allowTargetRepoKeys ?? false; - if (!allowTargetRepoKeys) { - const report = scanPathForSensitiveKeys(cwd); - if (report.findings.length > 0) { - throw new EnvLeakError(report, 'spawn-existing'); - } - } - } - - // Initialize Codex SDK with binary path override (resolved from env/config/vendor). - // In dev mode, resolveCodexBinaryPath returns undefined and the SDK uses node_modules. - // In binary mode, it resolves from env/config/vendor or throws with install instructions. - const codex = await getCodex(mergedConfig?.assistants.codex.codexBinaryPath); - const threadOptions = buildThreadOptions(cwd, options); - - // Check if already aborted before starting - if (options?.abortSignal?.aborted) { - throw new Error('Query aborted'); - } - - // Track if we fell back from a failed resume (to notify user) - let sessionResumeFailed = false; - - // Get or create thread (synchronous operations!) - let thread; - if (resumeSessionId) { - getLog().debug({ sessionId: resumeSessionId }, 'resuming_thread'); - try { - // NOTE: resumeThread is synchronous, not async - // IMPORTANT: Must pass options when resuming! - thread = codex.resumeThread(resumeSessionId, threadOptions); - } catch (error) { - getLog().error({ err: error, sessionId: resumeSessionId }, 'resume_thread_failed'); - // Fall back to creating new thread - try { - thread = codex.startThread(threadOptions); - } catch (startError) { - const err = startError as Error; - if (isModelAccessError(err.message)) { - throw new Error(buildModelAccessMessage(options?.model)); - } - throw new Error(`Codex query failed: ${err.message}`); - } - sessionResumeFailed = true; - } - } else { - getLog().debug({ cwd }, 'starting_new_thread'); - // NOTE: startThread is synchronous, not async - try { - thread = codex.startThread(threadOptions); - } catch (error) { - const err = error as Error; - if (isModelAccessError(err.message)) { - throw new Error(buildModelAccessMessage(options?.model)); - } - throw new Error(`Codex query failed: ${err.message}`); - } - } - - // Notify user if session resume failed (don't silently lose context) - if (sessionResumeFailed) { - yield { - type: 'system', - content: '⚠️ Could not resume previous session. Starting fresh conversation.', - }; - } - - let lastTodoListSignature: string | undefined; - let lastError: Error | undefined; - - for (let attempt = 0; attempt <= MAX_SUBPROCESS_RETRIES; attempt++) { - // Check abort signal before each attempt - if (options?.abortSignal?.aborted) { - throw new Error('Query aborted'); - } - - // On retries, create a fresh thread (crashed thread is invalid) - if (attempt > 0) { - getLog().debug({ cwd, attempt }, 'starting_new_thread'); - try { - thread = codex.startThread(threadOptions); - } catch (startError) { - const err = startError as Error; - if (isModelAccessError(err.message)) { - throw new Error(buildModelAccessMessage(options?.model)); - } - throw new Error(`Codex query failed: ${err.message}`); - } - } - - try { - // Build per-turn options (structured output schema, abort signal) - const turnOptions: TurnOptions = {}; - if (options?.outputFormat) { - turnOptions.outputSchema = options.outputFormat.schema; - } - if (options?.abortSignal) { - turnOptions.signal = options.abortSignal; - } - - // Run streamed query (this IS async) - const result = await thread.runStreamed(prompt, turnOptions); - - // Process streaming events - for await (const event of result.events) { - // Check abort signal between events - if (options?.abortSignal?.aborted) { - getLog().info('query_aborted_between_events'); - break; - } - - // Log progress for item.started (visibility fix for Codex appearing to hang) - if (event.type === 'item.started') { - const item = event.item; - getLog().debug( - { eventType: event.type, itemType: item.type, itemId: item.id }, - 'item_started' - ); - } - - // Handle error events - if (event.type === 'error') { - getLog().error({ message: event.message }, 'stream_error'); - // Don't send MCP timeout errors (they're optional) - if (!event.message.includes('MCP client')) { - yield { type: 'system', content: `⚠️ ${event.message}` }; - } - continue; - } - - // Handle turn failed events - if (event.type === 'turn.failed') { - const errorObj = event.error as { message?: string } | undefined; - const errorMessage = errorObj?.message ?? 'Unknown error'; - getLog().error({ errorMessage }, 'turn_failed'); - yield { - type: 'system', - content: `❌ Turn failed: ${errorMessage}`, - }; - break; - } - - // Handle item.completed events - map to MessageChunk types - if (event.type === 'item.completed') { - const item = event.item; - - // Log progress with context for debugging - const logContext: Record = { - eventType: event.type, - itemType: item.type, - itemId: item.id, - }; - if (item.type === 'command_execution' && item.command) { - logContext.command = item.command; - } - getLog().debug(logContext, 'item_completed'); - - switch (item.type) { - case 'agent_message': - // Agent text response - if (item.text) { - yield { type: 'assistant', content: item.text }; - } - break; - - case 'command_execution': - // Tool/command execution. The Codex SDK only emits item.completed - // once the command has fully run, so we emit the start + result - // back-to-back to close the UI's tool card immediately. Without - // the paired tool_result, the card spins forever until lock release. - if (item.command) { - yield { type: 'tool', toolName: item.command }; - const exitSuffix = - item.exit_code != null && item.exit_code !== 0 - ? `\n[exit code: ${item.exit_code}]` - : ''; - yield { - type: 'tool_result', - toolName: item.command, - toolOutput: (item.aggregated_output ?? '') + exitSuffix, - }; - } else { - getLog().warn({ itemId: item.id }, 'command_execution_missing_command'); - } - break; - - case 'reasoning': - // Agent reasoning/thinking - if (item.text) { - yield { type: 'thinking', content: item.text }; - } - break; - - case 'web_search': - if (item.query) { - const searchToolName = `🔍 Searching: ${item.query}`; - yield { type: 'tool', toolName: searchToolName }; - // Web search items only fire on completion, so close the card immediately. - yield { type: 'tool_result', toolName: searchToolName, toolOutput: '' }; - } else { - getLog().debug({ itemId: item.id }, 'web_search_missing_query'); - } - break; - - case 'todo_list': - if (Array.isArray(item.items) && item.items.length > 0) { - const normalizedItems = item.items.map(t => ({ - text: typeof t.text === 'string' ? t.text : '(unnamed task)', - completed: t.completed ?? false, - })); - const signature = JSON.stringify(normalizedItems); - if (signature !== lastTodoListSignature) { - lastTodoListSignature = signature; - const taskList = normalizedItems - .map(t => `${t.completed ? '✅' : '⬜'} ${t.text}`) - .join('\n'); - yield { type: 'system', content: `📋 Tasks:\n${taskList}` }; - } - } else { - getLog().debug({ itemId: item.id }, 'todo_list_empty_or_invalid'); - } - break; - - case 'file_change': { - const statusIcon = item.status === 'failed' ? '❌' : '✅'; - const rawError = 'error' in item ? (item as { error?: unknown }).error : undefined; - const fileErrorMessage = - typeof rawError === 'string' - ? rawError - : typeof rawError === 'object' && rawError !== null && 'message' in rawError - ? String((rawError as { message: unknown }).message) - : undefined; - - if (Array.isArray(item.changes) && item.changes.length > 0) { - const changeList = item.changes - .map(c => { - const icon = c.kind === 'add' ? '➕' : c.kind === 'delete' ? '➖' : '📝'; - return `${icon} ${c.path ?? '(unknown file)'}`; - }) - .join('\n'); - const errorSuffix = - item.status === 'failed' && fileErrorMessage ? `\n${fileErrorMessage}` : ''; - yield { - type: 'system', - content: `${statusIcon} File changes:\n${changeList}${errorSuffix}`, - }; - } else if (item.status === 'failed') { - getLog().warn( - { itemId: item.id, status: item.status }, - 'file_change_failed_no_changes' - ); - const failMsg = fileErrorMessage - ? `❌ File change failed: ${fileErrorMessage}` - : '❌ File change failed'; - yield { type: 'system', content: failMsg }; - } else { - getLog().debug( - { itemId: item.id, status: item.status }, - 'file_change_no_changes' - ); - } - break; - } - - case 'mcp_tool_call': { - const toolInfo = - item.server && item.tool - ? `${item.server}/${item.tool}` - : (item.tool ?? item.server ?? 'MCP tool'); - const mcpToolName = `🔌 MCP: ${toolInfo}`; - - // Always emit start+result so the UI card closes. item.completed - // fires once the call is final (completed or failed). - yield { type: 'tool', toolName: mcpToolName }; - - if (item.status === 'failed') { - getLog().warn( - { server: item.server, tool: item.tool, error: item.error, itemId: item.id }, - 'mcp_tool_call_failed' - ); - const errMsg = item.error?.message - ? `❌ Error: ${item.error.message}` - : '❌ Error: MCP tool failed'; - yield { type: 'tool_result', toolName: mcpToolName, toolOutput: errMsg }; - } else { - // status === 'completed' (or 'in_progress', which shouldn't reach - // item.completed but is closed defensively). - let toolOutput = ''; - if (item.result?.content) { - if (Array.isArray(item.result.content)) { - toolOutput = JSON.stringify(item.result.content); - } else { - getLog().warn( - { - itemId: item.id, - server: item.server, - tool: item.tool, - resultType: typeof item.result.content, - }, - 'mcp_tool_call_unexpected_result_shape' - ); - } - } - yield { type: 'tool_result', toolName: mcpToolName, toolOutput }; - } - break; - } - - // Other item types are ignored (like file edits, etc.) - } - } - - // Handle turn.completed event - if (event.type === 'turn.completed') { - getLog().debug('turn_completed'); - // Yield result with thread ID for persistence - const usage = extractUsageFromCodexEvent(event); - yield { - type: 'result', - sessionId: thread.id ?? undefined, - tokens: usage, - }; - // CRITICAL: Break out of event loop - turn is complete! - // Without this, the loop waits for stream to end (causes 90s timeout) - break; - } - } - return; // Success - exit retry loop - } catch (error) { - const err = error as Error; - - // Don't retry aborted queries - if (options?.abortSignal?.aborted) { - throw new Error('Query aborted'); - } - - const errorClass = classifyCodexError(err.message); - getLog().error( - { err, errorClass, attempt, maxRetries: MAX_SUBPROCESS_RETRIES }, - 'query_error' - ); - - // Model access errors are never retryable - if (errorClass === 'model_access') { - throw new Error(buildModelAccessMessage(options?.model)); - } - - // Auth errors won't resolve on retry - if (errorClass === 'auth') { - const enrichedError = new Error(`Codex auth error: ${err.message}`); - enrichedError.cause = error; - throw enrichedError; - } - - // Retry transient failures (rate limit, crash) - if ( - attempt < MAX_SUBPROCESS_RETRIES && - (errorClass === 'rate_limit' || errorClass === 'crash') - ) { - const delayMs = this.retryBaseDelayMs * Math.pow(2, attempt); - getLog().info({ attempt, delayMs, errorClass }, 'retrying_query'); - await new Promise(resolve => setTimeout(resolve, delayMs)); - lastError = err; - continue; - } - - // Final failure - enrich and throw - const enrichedError = new Error(`Codex ${errorClass}: ${err.message}`); - enrichedError.cause = error; - throw enrichedError; - } - } - - // Should not reach here, but handle defensively - throw lastError ?? new Error('Codex query failed after retries'); - } - - /** - * Get the assistant type identifier - */ - getType(): string { - return 'codex'; - } -} diff --git a/packages/core/src/clients/factory.test.ts b/packages/core/src/clients/factory.test.ts deleted file mode 100644 index a8aed89f0b..0000000000 --- a/packages/core/src/clients/factory.test.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { describe, test, expect } from 'bun:test'; -import { getAssistantClient } from './factory'; - -describe('factory', () => { - describe('getAssistantClient', () => { - test('returns ClaudeClient for claude type', () => { - const client = getAssistantClient('claude'); - - expect(client).toBeDefined(); - expect(client.getType()).toBe('claude'); - expect(typeof client.sendQuery).toBe('function'); - }); - - test('returns CodexClient for codex type', () => { - const client = getAssistantClient('codex'); - - expect(client).toBeDefined(); - expect(client.getType()).toBe('codex'); - expect(typeof client.sendQuery).toBe('function'); - }); - - test('throws error for unknown type', () => { - expect(() => getAssistantClient('unknown')).toThrow( - "Unknown assistant type: unknown. Supported types: 'claude', 'codex'" - ); - }); - - test('throws error for empty string', () => { - expect(() => getAssistantClient('')).toThrow( - "Unknown assistant type: . Supported types: 'claude', 'codex'" - ); - }); - - test('is case sensitive - Claude throws', () => { - expect(() => getAssistantClient('Claude')).toThrow( - "Unknown assistant type: Claude. Supported types: 'claude', 'codex'" - ); - }); - - test('each call returns new instance', () => { - const client1 = getAssistantClient('claude'); - const client2 = getAssistantClient('claude'); - - // Each call should return a new instance - expect(client1).not.toBe(client2); - }); - }); -}); diff --git a/packages/core/src/clients/factory.ts b/packages/core/src/clients/factory.ts deleted file mode 100644 index 027f9843fa..0000000000 --- a/packages/core/src/clients/factory.ts +++ /dev/null @@ -1,37 +0,0 @@ -/** - * AI Assistant Client Factory - * - * Dynamically instantiates the appropriate AI assistant client based on type string. - * Supports Claude and Codex assistants. - */ -import type { IAssistantClient } from '../types'; -import { ClaudeClient } from './claude'; -import { CodexClient } from './codex'; -import { createLogger } from '@archon/paths'; - -/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ -let cachedLog: ReturnType | undefined; -function getLog(): ReturnType { - if (!cachedLog) cachedLog = createLogger('client.factory'); - return cachedLog; -} - -/** - * Get the appropriate AI assistant client based on type - * - * @param type - Assistant type identifier ('claude' or 'codex') - * @returns Instantiated assistant client - * @throws Error if assistant type is unknown - */ -export function getAssistantClient(type: string): IAssistantClient { - switch (type) { - case 'claude': - getLog().debug({ provider: 'claude' }, 'client_selected'); - return new ClaudeClient(); - case 'codex': - getLog().debug({ provider: 'codex' }, 'client_selected'); - return new CodexClient(); - default: - throw new Error(`Unknown assistant type: ${type}. Supported types: 'claude', 'codex'`); - } -} diff --git a/packages/core/src/clients/index.ts b/packages/core/src/clients/index.ts deleted file mode 100644 index 98b1d10f20..0000000000 --- a/packages/core/src/clients/index.ts +++ /dev/null @@ -1,16 +0,0 @@ -/** - * AI Assistant Clients - * - * Prefer importing from '@archon/core' for most use cases: - * import { ClaudeClient, getAssistantClient } from '@archon/core'; - * - * Use this submodule path when you only need client-specific code: - * import { ClaudeClient } from '@archon/core/clients'; - */ - -export { ClaudeClient } from './claude'; -export { CodexClient } from './codex'; -export { getAssistantClient } from './factory'; - -// Re-export types for consumers importing from this submodule directly -export type { IAssistantClient, MessageChunk } from '../types'; diff --git a/packages/core/src/config/config-loader.ts b/packages/core/src/config/config-loader.ts index 8ee702c613..f0f51ba0a4 100644 --- a/packages/core/src/config/config-loader.ts +++ b/packages/core/src/config/config-loader.ts @@ -38,24 +38,6 @@ function getLog(): ReturnType { return cachedLog; } -/** - * Tracks which env-leak-gate-disabled sources have already warned in this - * process. `loadConfig()` is called once per pre-spawn check (per workflow - * step), so without this guard the warn would flood logs and break alert - * rate-limiting downstream. - */ -const envLeakGateDisabledWarnedSources = new Set<'global_config' | 'repo_config'>(); -function warnEnvLeakGateDisabledOnce(source: 'global_config' | 'repo_config'): void { - if (envLeakGateDisabledWarnedSources.has(source)) return; - envLeakGateDisabledWarnedSources.add(source); - getLog().warn({ source }, 'env_leak_gate_disabled'); -} - -// Test-only: reset the warn-once state so unit tests can re-trigger the log. -export function resetEnvLeakGateWarnedSourcesForTests(): void { - envLeakGateDisabledWarnedSources.clear(); -} - /** * Parse YAML using Bun's native YAML parser */ @@ -216,7 +198,6 @@ function getDefaults(): MergedConfig { loadDefaultCommands: true, loadDefaultWorkflows: true, }, - allowTargetRepoKeys: false, }; } @@ -321,12 +302,6 @@ function mergeGlobalConfig(defaults: MergedConfig, global: GlobalConfig): Merged result.concurrency.maxConversations = global.concurrency.maxConversations; } - // Env-leak gate bypass (global) - if (global.allow_target_repo_keys === true) { - result.allowTargetRepoKeys = true; - warnEnvLeakGateDisabledOnce('global_config'); - } - return result; } @@ -400,14 +375,6 @@ function mergeRepoConfig(merged: MergedConfig, repo: RepoConfig): MergedConfig { result.envVars = { ...result.envVars, ...repo.env }; } - // Repo-level env-leak gate override (wins over global) - if (repo.allow_target_repo_keys !== undefined) { - result.allowTargetRepoKeys = repo.allow_target_repo_keys; - if (repo.allow_target_repo_keys) { - warnEnvLeakGateDisabledOnce('repo_config'); - } - } - return result; } diff --git a/packages/core/src/config/config-types.ts b/packages/core/src/config/config-types.ts index 3baa3dfdca..983720c13b 100644 --- a/packages/core/src/config/config-types.ts +++ b/packages/core/src/config/config-types.ts @@ -10,25 +10,12 @@ * Global configuration (non-secret user preferences) * Located at ~/.archon/config.yaml */ -import type { ModelReasoningEffort, WebSearchMode } from '../types'; -export interface AssistantDefaults { - model?: string; - modelReasoningEffort?: ModelReasoningEffort; - webSearchMode?: WebSearchMode; - additionalDirectories?: string[]; - /** Path to the Codex CLI binary. Overrides auto-detection in compiled Archon builds. - * Only relevant for the Codex provider; ignored for Claude. */ - codexBinaryPath?: string; -} +// Provider config defaults — canonical definitions live in @archon/providers/types. +// Imported and re-exported here so existing consumers don't break. +import type { ClaudeProviderDefaults, CodexProviderDefaults } from '@archon/providers/types'; -export interface ClaudeAssistantDefaults { - model?: string; - /** Claude Code settingSources — controls which CLAUDE.md files are loaded. - * @default ['project'] - * @see https://github.com/anthropics/claude-agent-sdk */ - settingSources?: ('project' | 'user')[]; -} +export type { ClaudeProviderDefaults, CodexProviderDefaults }; export interface GlobalConfig { /** @@ -47,8 +34,8 @@ export interface GlobalConfig { * Assistant-specific defaults (model, reasoning effort, etc.) */ assistants?: { - claude?: ClaudeAssistantDefaults; - codex?: AssistantDefaults; + claude?: ClaudeProviderDefaults; + codex?: CodexProviderDefaults; }; /** @@ -87,20 +74,6 @@ export interface GlobalConfig { */ maxConversations?: number; }; - - /** - * Bypass the env-leak gate globally. When true, Archon will not refuse to - * register or spawn subprocesses for codebases whose auto-loaded .env files - * contain sensitive keys (ANTHROPIC_API_KEY, OPENAI_API_KEY, etc). - * - * WARNING: Weakens the env-leak gate. Keys in the target repo's .env will - * be auto-loaded by Bun subprocesses (Claude/Codex) and bypass Archon's - * env allowlist. Use only on trusted machines. - * - * YAML key: `allow_target_repo_keys` - * @default false - */ - allow_target_repo_keys?: boolean; } /** @@ -118,8 +91,8 @@ export interface RepoConfig { * Assistant-specific defaults for this repository */ assistants?: { - claude?: ClaudeAssistantDefaults; - codex?: AssistantDefaults; + claude?: ClaudeProviderDefaults; + codex?: CodexProviderDefaults; }; /** @@ -175,12 +148,6 @@ export interface RepoConfig { */ env?: Record; - /** - * Per-repo override for the env-leak gate bypass. Repo value wins over global. - * YAML key: `allow_target_repo_keys` - */ - allow_target_repo_keys?: boolean; - /** * Default commands/workflows configuration */ @@ -217,8 +184,8 @@ export interface MergedConfig { botName: string; assistant: 'claude' | 'codex'; assistants: { - claude: ClaudeAssistantDefaults; - codex: AssistantDefaults; + claude: ClaudeProviderDefaults; + codex: CodexProviderDefaults; }; streaming: { telegram: 'stream' | 'batch'; @@ -263,14 +230,6 @@ export interface MergedConfig { * Undefined when no env vars are configured. */ envVars?: Record; - - /** - * Effective value of the env-leak gate bypass. When true, the env scanner - * is skipped during registration and pre-spawn. Repo-level override wins - * over global (explicit `false` at repo level re-enables the gate). - * @default false - */ - allowTargetRepoKeys: boolean; } /** @@ -281,8 +240,8 @@ export interface SafeConfig { botName: string; assistant: 'claude' | 'codex'; assistants: { - claude: Pick; - codex: Pick; + claude: Pick; + codex: Pick; }; streaming: { telegram: 'stream' | 'batch'; diff --git a/packages/core/src/db/adapters/sqlite.ts b/packages/core/src/db/adapters/sqlite.ts index 2864e4fc43..485706d040 100644 --- a/packages/core/src/db/adapters/sqlite.ts +++ b/packages/core/src/db/adapters/sqlite.ts @@ -215,22 +215,6 @@ export class SqliteAdapter implements IDatabase { } catch (e: unknown) { getLog().warn({ err: e as Error }, 'db.sqlite_migration_session_columns_failed'); } - - // Codebases columns (added in #983 — env-leak gate consent bit) - try { - const cbCols = this.db.prepare("PRAGMA table_info('remote_agent_codebases')").all() as { - name: string; - }[]; - const cbColNames = new Set(cbCols.map(c => c.name)); - - if (!cbColNames.has('allow_env_keys')) { - this.db.run( - 'ALTER TABLE remote_agent_codebases ADD COLUMN allow_env_keys INTEGER DEFAULT 0' - ); - } - } catch (e: unknown) { - getLog().warn({ err: e as Error }, 'db.sqlite_migration_codebases_columns_failed'); - } } /** @@ -252,7 +236,6 @@ export class SqliteAdapter implements IDatabase { default_cwd TEXT NOT NULL, default_branch TEXT DEFAULT 'main', ai_assistant_type TEXT DEFAULT 'claude', - allow_env_keys INTEGER DEFAULT 0, commands TEXT DEFAULT '{}', created_at TEXT DEFAULT (datetime('now')), updated_at TEXT DEFAULT (datetime('now')) diff --git a/packages/core/src/db/codebases.test.ts b/packages/core/src/db/codebases.test.ts index ec3c249d14..26c269a085 100644 --- a/packages/core/src/db/codebases.test.ts +++ b/packages/core/src/db/codebases.test.ts @@ -22,7 +22,6 @@ import { findCodebaseByDefaultCwd, findCodebaseByName, updateCodebase, - updateCodebaseAllowEnvKeys, deleteCodebase, } from './codebases'; @@ -37,7 +36,6 @@ describe('codebases', () => { repository_url: 'https://github.com/user/repo', default_cwd: '/workspace/test-project', ai_assistant_type: 'claude', - allow_env_keys: false, commands: { plan: { path: '.claude/commands/plan.md', description: 'Plan feature' } }, created_at: new Date(), updated_at: new Date(), @@ -56,8 +54,8 @@ describe('codebases', () => { expect(result).toEqual(mockCodebase); expect(mockQuery).toHaveBeenCalledWith( - 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type, allow_env_keys) VALUES ($1, $2, $3, $4, $5) RETURNING *', - ['test-project', 'https://github.com/user/repo', '/workspace/test-project', 'claude', false] + 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type) VALUES ($1, $2, $3, $4) RETURNING *', + ['test-project', 'https://github.com/user/repo', '/workspace/test-project', 'claude'] ); }); @@ -75,8 +73,8 @@ describe('codebases', () => { expect(result).toEqual(codebaseWithoutOptional); expect(mockQuery).toHaveBeenCalledWith( - 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type, allow_env_keys) VALUES ($1, $2, $3, $4, $5) RETURNING *', - ['test-project', null, '/workspace/test-project', 'claude', false] + 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type) VALUES ($1, $2, $3, $4) RETURNING *', + ['test-project', null, '/workspace/test-project', 'claude'] ); }); @@ -299,7 +297,6 @@ describe('codebases', () => { name: 'test-repo', default_cwd: '/workspace/test-repo', ai_assistant_type: 'claude', - allow_env_keys: false, repository_url: null, commands: {}, created_at: new Date(), @@ -399,26 +396,6 @@ describe('codebases', () => { }); }); - describe('updateCodebaseAllowEnvKeys', () => { - test('flips the consent bit', async () => { - mockQuery.mockResolvedValueOnce(createQueryResult([], 1)); - - await updateCodebaseAllowEnvKeys('codebase-123', true); - - expect(mockQuery).toHaveBeenCalledWith( - 'UPDATE remote_agent_codebases SET allow_env_keys = $1, updated_at = NOW() WHERE id = $2', - [true, 'codebase-123'] - ); - }); - - test('throws when codebase not found', async () => { - mockQuery.mockResolvedValueOnce(createQueryResult([], 0)); - await expect(updateCodebaseAllowEnvKeys('missing', false)).rejects.toThrow( - 'Codebase missing not found' - ); - }); - }); - describe('deleteCodebase', () => { test('should unlink sessions, conversations, and delete codebase', async () => { // First call: unlink sessions diff --git a/packages/core/src/db/codebases.ts b/packages/core/src/db/codebases.ts index b9f45578b6..f3947fb6c1 100644 --- a/packages/core/src/db/codebases.ts +++ b/packages/core/src/db/codebases.ts @@ -17,13 +17,11 @@ export async function createCodebase(data: { repository_url?: string; default_cwd: string; ai_assistant_type?: string; - allow_env_keys?: boolean; }): Promise { const assistantType = data.ai_assistant_type ?? 'claude'; - const allowEnvKeys = data.allow_env_keys ?? false; const result = await pool.query( - 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type, allow_env_keys) VALUES ($1, $2, $3, $4, $5) RETURNING *', - [data.name, data.repository_url ?? null, data.default_cwd, assistantType, allowEnvKeys] + 'INSERT INTO remote_agent_codebases (name, repository_url, default_cwd, ai_assistant_type) VALUES ($1, $2, $3, $4) RETURNING *', + [data.name, data.repository_url ?? null, data.default_cwd, assistantType] ); if (!result.rows[0]) { throw new Error('Failed to create codebase: INSERT succeeded but no row returned'); @@ -158,21 +156,6 @@ export async function updateCodebase( } } -/** - * Flip the `allow_env_keys` consent bit for an existing codebase. - * Throws when the codebase does not exist. - */ -export async function updateCodebaseAllowEnvKeys(id: string, allowEnvKeys: boolean): Promise { - const dialect = getDialect(); - const result = await pool.query( - `UPDATE remote_agent_codebases SET allow_env_keys = $1, updated_at = ${dialect.now()} WHERE id = $2`, - [allowEnvKeys, id] - ); - if ((result.rowCount ?? 0) === 0) { - throw new Error(`Codebase ${id} not found`); - } -} - export async function listCodebases(): Promise { const result = await pool.query( 'SELECT * FROM remote_agent_codebases ORDER BY name ASC' diff --git a/packages/core/src/handlers/clone.test.ts b/packages/core/src/handlers/clone.test.ts index 7f948cfb33..c913c1a78c 100644 --- a/packages/core/src/handlers/clone.test.ts +++ b/packages/core/src/handlers/clone.test.ts @@ -20,7 +20,6 @@ const mockCreateCodebase = mock(() => repository_url: 'https://github.com/owner/repo', default_cwd: '/home/test/.archon/workspaces/owner/repo/source', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -67,20 +66,6 @@ mock.module('../utils/commands', () => ({ findMarkdownFilesRecursive: mockFindMarkdownFilesRecursive, })); -// ── env-leak-scanner mock ─────────────────────────────────────────────────── -class MockEnvLeakError extends Error { - constructor(public report: unknown) { - super('Cannot add codebase — /test/path contains keys that will leak into AI subprocesses'); - this.name = 'EnvLeakError'; - } -} - -const mockScanPathForSensitiveKeys = mock(() => ({ path: '', findings: [] })); -mock.module('../utils/env-leak-scanner', () => ({ - scanPathForSensitiveKeys: mockScanPathForSensitiveKeys, - EnvLeakError: MockEnvLeakError, -})); - // ── Import module under test AFTER mocks are registered ──────────────────── import { cloneRepository, registerRepository } from './clone'; @@ -118,7 +103,6 @@ function clearMocks(): void { mockFindCodebaseByName.mockReset(); mockUpdateCodebase.mockReset(); mockFindMarkdownFilesRecursive.mockReset(); - mockScanPathForSensitiveKeys.mockReset(); mockLogger.info.mockClear(); mockLogger.debug.mockClear(); mockLogger.warn.mockClear(); @@ -132,7 +116,6 @@ function clearMocks(): void { mockFindCodebaseByName.mockResolvedValue(null); mockUpdateCodebase.mockResolvedValue(undefined); mockFindMarkdownFilesRecursive.mockResolvedValue([]); - mockScanPathForSensitiveKeys.mockReturnValue({ path: '', findings: [] }); } afterAll(() => { @@ -157,7 +140,6 @@ function makeCodebase( repository_url: 'https://github.com/owner/repo', default_cwd: '/home/test/.archon/workspaces/owner/repo/source', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -948,33 +930,4 @@ describe('RegisterResult shape', () => { expect(result.alreadyExisted).toBe(true); expect(result.commandCount).toBe(0); }); - - describe('env leak gate', () => { - test('throws EnvLeakError when scanner finds sensitive keys and allowEnvKeys is false', async () => { - mockScanPathForSensitiveKeys.mockReturnValueOnce({ - path: '/home/test/.archon/workspaces/owner/repo/source', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - - await expect(cloneRepository('https://github.com/owner/repo')).rejects.toThrow( - 'Cannot add codebase' - ); - }); - - test('does not throw when allowEnvKeys is true, even with scanner findings present', async () => { - mockCreateCodebase.mockResolvedValueOnce(makeCodebase() as ReturnType); - // Scanner is still called for the audit-log payload (files/keys), but the - // gate must NOT throw — the per-call grant is the bypass. - mockScanPathForSensitiveKeys.mockReturnValueOnce({ - path: '/home/test/.archon/workspaces/owner/repo/source', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - - const result = await cloneRepository('https://github.com/owner/repo', true); - - expect(result.codebaseId).toBe('codebase-uuid-1'); - // Scanner is called once — for the audit log, not as a gate - expect(mockScanPathForSensitiveKeys).toHaveBeenCalledTimes(1); - }); - }); }); diff --git a/packages/core/src/handlers/clone.ts b/packages/core/src/handlers/clone.ts index 3dc96f499c..fe7e4d9570 100644 --- a/packages/core/src/handlers/clone.ts +++ b/packages/core/src/handlers/clone.ts @@ -16,12 +16,6 @@ import { parseOwnerRepo, } from '@archon/paths'; import { findMarkdownFilesRecursive } from '../utils/commands'; -import { - scanPathForSensitiveKeys, - EnvLeakError, - type LeakErrorContext, -} from '../utils/env-leak-scanner'; -import { loadConfig } from '../config/config-loader'; import { createLogger } from '@archon/paths'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ @@ -46,53 +40,8 @@ export interface RegisterResult { async function registerRepoAtPath( targetPath: string, name: string, - repositoryUrl: string | null, - allowEnvKeys = false, - context: LeakErrorContext = 'register-ui' + repositoryUrl: string | null ): Promise { - // Scan for sensitive keys in auto-loaded .env files before registering. - // Two bypass paths exist (in order of precedence): - // 1. Per-call `allowEnvKeys=true` (Web UI checkbox or CLI --allow-env-keys) - // 2. Config-level `allow_target_repo_keys: true` (global YAML) - // When the per-call bypass is used we still emit an audit-log entry so the - // grant has a permanent breadcrumb (parity with the PATCH route's - // `env_leak_consent_granted` log). - if (!allowEnvKeys) { - const merged = await loadConfig(targetPath); - if (!merged.allowTargetRepoKeys) { - const report = scanPathForSensitiveKeys(targetPath); - if (report.findings.length > 0) { - throw new EnvLeakError(report, context); - } - } - } else { - // Per-call grant — emit audit log mirroring the PATCH route shape so the - // CLI/UI add-with-consent paths leave the same breadcrumbs. - let files: string[] = []; - let keys: string[] = []; - let scanStatus: 'ok' | 'skipped' = 'ok'; - try { - const report = scanPathForSensitiveKeys(targetPath); - files = report.findings.map(f => f.file); - keys = Array.from(new Set(report.findings.flatMap(f => f.keys))); - } catch (scanErr) { - scanStatus = 'skipped'; - getLog().warn({ err: scanErr, path: targetPath }, 'env_leak_consent_scan_skipped'); - } - const actor = context === 'register-cli' ? 'user-cli' : 'user-ui'; - getLog().warn( - { - name, - path: targetPath, - files, - keys, - scanStatus, - actor, - }, - 'env_leak_consent_granted' - ); - } - // Auto-detect assistant type based on folder structure let suggestedAssistant = 'claude'; const codexFolder = join(targetPath, '.codex'); @@ -173,7 +122,6 @@ async function registerRepoAtPath( repository_url: repositoryUrl ?? undefined, default_cwd: targetPath, ai_assistant_type: suggestedAssistant, - allow_env_keys: allowEnvKeys, }); // Auto-load commands if found @@ -242,15 +190,11 @@ function normalizeRepoUrl(rawUrl: string): { * Local paths (starting with /, ~, or .) are delegated to registerRepository * to avoid wrong owner/repo naming. See #383 for broader rethink. */ -export async function cloneRepository( - repoUrl: string, - allowEnvKeys?: boolean, - context: LeakErrorContext = 'register-ui' -): Promise { +export async function cloneRepository(repoUrl: string): Promise { // Local paths should be registered (symlink), not cloned (copied) if (repoUrl.startsWith('/') || repoUrl.startsWith('~') || repoUrl.startsWith('.')) { const resolvedPath = repoUrl.startsWith('~') ? expandTilde(repoUrl) : resolve(repoUrl); - return registerRepository(resolvedPath, allowEnvKeys, context); + return registerRepository(resolvedPath); } const { workingUrl, ownerName, repoName, targetPath } = normalizeRepoUrl(repoUrl); @@ -331,13 +275,7 @@ export async function cloneRepository( await execFileAsync('git', ['config', '--global', '--add', 'safe.directory', targetPath]); getLog().debug({ path: targetPath }, 'safe_directory_added'); - const result = await registerRepoAtPath( - targetPath, - `${ownerName}/${repoName}`, - workingUrl, - allowEnvKeys, - context - ); + const result = await registerRepoAtPath(targetPath, `${ownerName}/${repoName}`, workingUrl); getLog().info({ url: workingUrl, targetPath }, 'clone_completed'); return result; } @@ -345,11 +283,7 @@ export async function cloneRepository( /** * Register an existing local repository in the database (no git clone). */ -export async function registerRepository( - localPath: string, - allowEnvKeys?: boolean, - context: LeakErrorContext = 'register-ui' -): Promise { +export async function registerRepository(localPath: string): Promise { // Validate path exists and is a git repo try { await execFileAsync('git', ['-C', localPath, 'rev-parse', '--git-dir']); @@ -415,5 +349,5 @@ export async function registerRepository( ); // default_cwd is the real local path (not the symlink) - return registerRepoAtPath(localPath, name, remoteUrl, allowEnvKeys, context); + return registerRepoAtPath(localPath, name, remoteUrl); } diff --git a/packages/core/src/handlers/command-handler.test.ts b/packages/core/src/handlers/command-handler.test.ts index 4f29e7247b..de6516cb98 100644 --- a/packages/core/src/handlers/command-handler.test.ts +++ b/packages/core/src/handlers/command-handler.test.ts @@ -511,7 +511,6 @@ describe('CommandHandler', () => { repository_url: 'https://github.com/user/my-repo', default_cwd: '/workspace/my-repo', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -567,7 +566,6 @@ describe('CommandHandler', () => { repository_url: 'https://github.com/owner/repo', default_cwd: '/workspace/repo', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -606,7 +604,6 @@ describe('CommandHandler', () => { repository_url: 'https://github.com/owner/orphaned-repo', default_cwd: '/workspace/orphaned-repo', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -721,7 +718,6 @@ describe('CommandHandler', () => { repository_url: 'https://github.com/user/my-repo', default_cwd: '/workspace/my-repo', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index e212eb10c9..8c5e928a98 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -24,8 +24,6 @@ export { type IWebPlatformAdapter, isWebAdapter, type MessageMetadata, - type MessageChunk, - type IAssistantClient, } from './types'; // ============================================================================= @@ -52,13 +50,6 @@ export * as messageDb from './db/messages'; // Re-export SessionNotFoundError for error handling export { SessionNotFoundError } from './db/sessions'; -// ============================================================================= -// AI Clients -// ============================================================================= -export { ClaudeClient } from './clients/claude'; -export { CodexClient } from './clients/codex'; -export { getAssistantClient } from './clients/factory'; - // ============================================================================= // Workflows // ============================================================================= @@ -145,15 +136,6 @@ export { toError } from './utils/error'; // Credential sanitization export { sanitizeCredentials, sanitizeError } from './utils/credential-sanitizer'; -// Env leak scanner -export { - EnvLeakError, - scanPathForSensitiveKeys, - formatLeakError, - type LeakReport, - type LeakErrorContext, -} from './utils/env-leak-scanner'; - // GitHub GraphQL export { getLinkedIssueNumbers } from './utils/github-graphql'; diff --git a/packages/core/src/orchestrator/orchestrator-agent.test.ts b/packages/core/src/orchestrator/orchestrator-agent.test.ts index 70080cc01a..dfde310bbd 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.test.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.test.ts @@ -93,10 +93,11 @@ mock.module('@archon/workflows/executor', () => ({ executeWorkflow: mockExecuteWorkflow, })); -mock.module('../clients/factory', () => ({ - getAssistantClient: mock(() => ({ +mock.module('@archon/providers', () => ({ + getAgentProvider: mock(() => ({ sendQuery: mock(async function* () {}), getType: mock(() => 'claude'), + getCapabilities: mock(() => ({})), })), })); @@ -181,7 +182,6 @@ function makeCodebase(name: string, id = `id-${name}`): Codebase { repository_url: null, default_cwd: `/repos/${name}`, ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -805,7 +805,6 @@ function makeCodebaseForSync() { repository_url: 'https://github.com/test/repo', default_cwd: '/repos/test-repo', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -971,7 +970,6 @@ describe('workflow dispatch routing — interactive flag', () => { repository_url: null, default_cwd: '/repos/test-repo', ai_assistant_type: 'claude' as const, - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -1072,7 +1070,6 @@ describe('natural-language approval routing', () => { repository_url: null, default_cwd: '/repos/test-repo', ai_assistant_type: 'claude' as const, - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), diff --git a/packages/core/src/orchestrator/orchestrator-agent.ts b/packages/core/src/orchestrator/orchestrator-agent.ts index 97d989f47c..ca86f79a68 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.ts @@ -13,9 +13,9 @@ import type { HandleMessageContext, Conversation, Codebase, - AssistantRequestOptions, AttachedFile, } from '../types'; +import type { SendQueryOptions } from '@archon/providers/types'; import { ConversationNotFoundError } from '../types'; import * as db from '../db/conversations'; import * as codebaseDb from '../db/codebases'; @@ -24,7 +24,7 @@ import * as commandHandler from '../handlers/command-handler'; import { formatToolCall } from '@archon/workflows/utils/tool-formatter'; import { classifyAndFormatError } from '../utils/error-formatter'; import { toError } from '../utils/error'; -import { getAssistantClient } from '../clients/factory'; +import { getAgentProvider } from '@archon/providers'; import { getArchonHome, getArchonWorkspacesPath } from '@archon/paths'; import { syncArchonToWorktree } from '../utils/worktree-sync'; import { syncWorkspace, toRepoPath } from '@archon/git'; @@ -751,17 +751,16 @@ export async function handleMessage( }); } - // 5. Send to AI client - const aiClient = getAssistantClient(conversation.ai_assistant_type); + // 5. Send to AI provider + const aiClient = getAgentProvider(conversation.ai_assistant_type); getLog().debug({ assistantType: conversation.ai_assistant_type }, 'sending_to_ai'); // Reuse the config already loaded during workflow discovery (avoids a second disk read). // Fall back to loadConfig only when no codebase is scoped (discoveredConfig is undefined). const config = discoveredConfig ?? (await loadConfig()); - const requestOptions: AssistantRequestOptions = { - ...(conversation.ai_assistant_type === 'claude' && config.assistants.claude.settingSources - ? { settingSources: config.assistants.claude.settingSources } - : {}), + const providerKey = conversation.ai_assistant_type as 'claude' | 'codex'; + const requestOptions: SendQueryOptions = { + assistantConfig: (config.assistants[providerKey] ?? {}) as Record, }; const mode = platform.getStreamingMode(); @@ -824,14 +823,14 @@ async function handleStreamMode( originalMessage: string, codebases: readonly Codebase[], workflows: readonly WorkflowDefinition[], - aiClient: ReturnType, + aiClient: ReturnType, fullPrompt: string, cwd: string, session: { id: string; assistant_session_id: string | null }, isolationHints: HandleMessageContext['isolationHints'], conversation: Conversation, issueContext?: string, - requestOptions?: AssistantRequestOptions + requestOptions?: SendQueryOptions ): Promise { const allMessages: string[] = []; let newSessionId: string | undefined; @@ -940,14 +939,14 @@ async function handleBatchMode( originalMessage: string, codebases: readonly Codebase[], workflows: readonly WorkflowDefinition[], - aiClient: ReturnType, + aiClient: ReturnType, fullPrompt: string, cwd: string, session: { id: string; assistant_session_id: string | null }, isolationHints: HandleMessageContext['isolationHints'], conversation: Conversation, issueContext?: string, - requestOptions?: AssistantRequestOptions + requestOptions?: SendQueryOptions ): Promise { const allChunks: { type: string; content: string }[] = []; const assistantMessages: string[] = []; diff --git a/packages/core/src/orchestrator/orchestrator-isolation.test.ts b/packages/core/src/orchestrator/orchestrator-isolation.test.ts index f46930f02c..6bcbedb697 100644 --- a/packages/core/src/orchestrator/orchestrator-isolation.test.ts +++ b/packages/core/src/orchestrator/orchestrator-isolation.test.ts @@ -50,14 +50,14 @@ mock.module('../handlers/command-handler', () => ({ })), })); -mock.module('../clients/factory', () => ({ - getAssistantClient: mock(() => null), +mock.module('@archon/providers', () => ({ + getAgentProvider: mock(() => null), })); mock.module('../workflows/store-adapter', () => ({ createWorkflowDeps: mock(() => ({ store: {}, - getAssistantClient: () => ({}), + getAgentProvider: () => ({}), loadConfig: async () => ({}), })), })); @@ -176,7 +176,6 @@ function makeCodebase(overrides?: Partial): Codebase { id: 'cb-1', name: 'test-repo', default_cwd: '/workspace/test-repo', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), diff --git a/packages/core/src/orchestrator/orchestrator.test.ts b/packages/core/src/orchestrator/orchestrator.test.ts index d5e81038da..de4618ed15 100644 --- a/packages/core/src/orchestrator/orchestrator.test.ts +++ b/packages/core/src/orchestrator/orchestrator.test.ts @@ -79,11 +79,11 @@ mock.module('../handlers/command-handler', () => ({ parseCommand: mockParseCommand, })); -// AI client mock -const mockGetAssistantClient = mock(() => null); +// AI provider mock +const mockGetAgentProvider = mock(() => null); -mock.module('../clients/factory', () => ({ - getAssistantClient: mockGetAssistantClient, +mock.module('@archon/providers', () => ({ + getAgentProvider: mockGetAgentProvider, })); // Workflow mocks @@ -96,7 +96,7 @@ const mockFindWorkflow = mock((name: string, workflows: readonly WorkflowDefinit mock.module('../workflows/store-adapter', () => ({ createWorkflowDeps: mock(() => ({ store: {}, - getAssistantClient: () => ({}), + getAgentProvider: () => ({}), loadConfig: async () => ({}), })), })); @@ -216,7 +216,6 @@ const mockCodebase: Codebase = { repository_url: 'https://github.com/user/repo', default_cwd: '/workspace/test-project', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date(), updated_at: new Date(), @@ -274,7 +273,7 @@ function clearAllMocks(): void { mockTransitionSession.mockClear(); mockHandleCommand.mockClear(); mockParseCommand.mockClear(); - mockGetAssistantClient.mockClear(); + mockGetAgentProvider.mockClear(); mockDiscoverWorkflows.mockClear(); mockExecuteWorkflow.mockClear(); mockFindWorkflow.mockClear(); @@ -457,7 +456,7 @@ describe('orchestrator-agent handleMessage', () => { mockGetActiveSession.mockResolvedValue(null); mockCreateSession.mockResolvedValue(mockSession); mockTransitionSession.mockResolvedValue(mockSession); - mockGetAssistantClient.mockReturnValue(mockClient); + mockGetAgentProvider.mockReturnValue(mockClient); mockDiscoverWorkflows.mockResolvedValue({ workflows: [], errors: [] }); mockParseCommand.mockImplementation((message: string) => { const parts = message.split(/\s+/); @@ -479,7 +478,7 @@ describe('orchestrator-agent handleMessage', () => { expect(mockHandleCommand).toHaveBeenCalled(); expect(platform.sendMessage).toHaveBeenCalledWith('chat-456', 'Status info'); - expect(mockGetAssistantClient).not.toHaveBeenCalled(); + expect(mockGetAgentProvider).not.toHaveBeenCalled(); }); test('delegates /help to command handler', async () => { @@ -676,7 +675,7 @@ describe('orchestrator-agent handleMessage', () => { await handleMessage(platform, 'chat-456', 'hello'); expect(mockTransitionSession).not.toHaveBeenCalled(); - // Should pass existing assistant_session_id to AI client + // Should pass existing assistant_session_id to AI provider expect(mockClient.sendQuery).toHaveBeenCalledWith( expect.any(String), expect.any(String), @@ -699,8 +698,8 @@ describe('orchestrator-agent handleMessage', () => { // ─── settingSources forwarding ──────────────────────────────────────── - describe('settingSources forwarding', () => { - test('passes settingSources from config to AI client for claude', async () => { + describe('assistantConfig forwarding', () => { + test('passes assistantConfig with settingSources for claude', async () => { mockLoadConfig.mockResolvedValueOnce({ botName: 'Archon', assistant: 'claude', @@ -725,11 +724,13 @@ describe('orchestrator-agent handleMessage', () => { expect.any(String), expect.any(String), expect.anything(), - expect.objectContaining({ settingSources: ['project', 'user'] }) + expect.objectContaining({ + assistantConfig: expect.objectContaining({ settingSources: ['project', 'user'] }), + }) ); }); - test('does not pass settingSources for non-claude assistant', async () => { + test('passes codex assistantConfig for codex assistant', async () => { const codexConversation: Conversation = { ...mockConversation, ai_assistant_type: 'codex', @@ -754,15 +755,16 @@ describe('orchestrator-agent handleMessage', () => { yield { type: 'result', sessionId: 'codex-session' }; }), }; - mockGetAssistantClient.mockReturnValueOnce(codexClient); + mockGetAgentProvider.mockReturnValueOnce(codexClient); await handleMessage(platform, 'chat-456', 'hello'); - // settingSources should NOT be in requestOptions since assistant type is codex + // Should pass codex assistantConfig, not claude's const callArgs = codexClient.sendQuery.mock.calls[0]; const requestOptions = callArgs?.[3] as Record | undefined; expect(requestOptions).toBeDefined(); expect(requestOptions).not.toHaveProperty('settingSources'); + expect(requestOptions?.assistantConfig).toBeDefined(); }); }); diff --git a/packages/core/src/services/title-generator.test.ts b/packages/core/src/services/title-generator.test.ts index a53499a543..0d85e43c78 100644 --- a/packages/core/src/services/title-generator.test.ts +++ b/packages/core/src/services/title-generator.test.ts @@ -31,13 +31,13 @@ const mockSendQuery = mock(async function* (): AsyncGenerator { ) => AsyncGenerator >; -const mockGetAssistantClient = mock(() => ({ +const mockGetAgentProvider = mock(() => ({ sendQuery: mockSendQuery, getType: () => 'claude', })); -mock.module('../clients/factory', () => ({ - getAssistantClient: mockGetAssistantClient, +mock.module('@archon/providers', () => ({ + getAgentProvider: mockGetAgentProvider, })); // ─── Import module under test (AFTER all mocks) ───────────────────────────── @@ -50,7 +50,7 @@ describe('title-generator', () => { beforeEach(() => { mockUpdateConversationTitle.mockClear(); mockSendQuery.mockClear(); - mockGetAssistantClient.mockClear(); + mockGetAgentProvider.mockClear(); // Reset to default happy-path behavior mockSendQuery.mockImplementation(async function* (): AsyncGenerator { @@ -58,7 +58,7 @@ describe('title-generator', () => { yield { type: 'result' }; }); - mockGetAssistantClient.mockImplementation(() => ({ + mockGetAgentProvider.mockImplementation(() => ({ sendQuery: mockSendQuery, getType: () => 'claude', })); @@ -167,11 +167,14 @@ describe('title-generator', () => { expect(optionsArg.model).toBeUndefined(); }); - test('passes tools: [] to disable tool access', async () => { + test('passes nodeConfig with allowed_tools: [] to disable tool access', async () => { await generateAndSetTitle('conv-11', 'Some message', 'claude', '/tmp'); - const optionsArg = mockSendQuery.mock.calls[0][3] as { model?: string; tools?: string[] }; - expect(optionsArg.tools).toEqual([]); + const optionsArg = mockSendQuery.mock.calls[0][3] as { + model?: string; + nodeConfig?: { allowed_tools?: string[] }; + }; + expect(optionsArg.nodeConfig?.allowed_tools).toEqual([]); }); test('handles double failure gracefully (AI fails + fallback DB write fails)', async () => { diff --git a/packages/core/src/services/title-generator.ts b/packages/core/src/services/title-generator.ts index 7bfb8f9179..fdb9cdaab8 100644 --- a/packages/core/src/services/title-generator.ts +++ b/packages/core/src/services/title-generator.ts @@ -5,7 +5,7 @@ * Optionally uses TITLE_GENERATION_MODEL env var for a cheaper/faster model. * Designed to be fire-and-forget — never throws, all errors logged internally. */ -import { getAssistantClient } from '../clients/factory'; +import { getAgentProvider } from '@archon/providers'; import * as conversationDb from '../db/conversations'; import { createLogger } from '@archon/paths'; @@ -47,12 +47,12 @@ export async function generateAndSetTitle( const titlePrompt = buildTitlePrompt(userMessage, workflowName); // Use the configured AI client with no tools (pure text generation) - const client = getAssistantClient(assistantType); + const client = getAgentProvider(assistantType); let generatedTitle = ''; for await (const chunk of client.sendQuery(titlePrompt, cwd, undefined, { model: titleModel, - tools: [], // No tool access — pure text generation + nodeConfig: { allowed_tools: [] }, // No tool access — pure text generation })) { if (chunk.type === 'assistant') { generatedTitle += chunk.content; diff --git a/packages/core/src/test/mocks/streaming.ts b/packages/core/src/test/mocks/streaming.ts deleted file mode 100644 index dd7cc76906..0000000000 --- a/packages/core/src/test/mocks/streaming.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { mock, type Mock } from 'bun:test'; - -export interface StreamEvent { - type: 'text' | 'tool' | 'error' | 'complete'; - content?: string; - toolName?: string; - toolInput?: Record; - error?: Error; -} - -export async function* createMockStream(events: StreamEvent[]): AsyncGenerator { - for (const event of events) { - yield event; - } -} - -export const createMockAssistantClient = ( - events: StreamEvent[] = [] -): { - sendMessage: Mock<() => AsyncGenerator>; - getType: Mock<() => string>; - resumeSession: Mock<() => AsyncGenerator>; -} => ({ - sendMessage: mock(async function* () { - for (const event of events) { - yield event; - } - }), - getType: mock(() => 'claude'), - resumeSession: mock(async function* () { - for (const event of events) { - yield event; - } - }), -}); diff --git a/packages/core/src/types/index.ts b/packages/core/src/types/index.ts index 549891f35e..74966e3b2c 100644 --- a/packages/core/src/types/index.ts +++ b/packages/core/src/types/index.ts @@ -3,9 +3,11 @@ */ import type { TransitionTrigger } from '../state/session-transitions'; import type { WorkflowDefinition } from '@archon/workflows/schemas/workflow'; -import type { McpServerConfig, AgentDefinition } from '@anthropic-ai/claude-agent-sdk'; import { z } from 'zod'; +// MessageChunk imported for use in IPlatformAdapter/IWebPlatformAdapter below +import type { MessageChunk } from '@archon/providers/types'; + /** * Custom error for when a conversation is not found during update operations * Allows callers to programmatically handle this specific error case @@ -57,7 +59,6 @@ export interface Codebase { repository_url: string | null; default_cwd: string; ai_assistant_type: string; - allow_env_keys: boolean; commands: Record; created_at: Date; updated_at: Date; @@ -182,53 +183,7 @@ export function isWebAdapter(adapter: IPlatformAdapter): adapter is IWebPlatform return adapter.getPlatformType() === 'web'; } -/** - * Message chunk from AI assistant. - * Discriminated union with per-type required fields for type safety. - */ -export interface TokenUsage { - input: number; - output: number; - total?: number; - cost?: number; -} - -export type MessageChunk = - | { type: 'assistant'; content: string } - | { type: 'system'; content: string } - | { type: 'thinking'; content: string } - | { - type: 'result'; - sessionId?: string; - tokens?: TokenUsage; - structuredOutput?: unknown; - isError?: boolean; - errorSubtype?: string; - cost?: number; - stopReason?: string; - numTurns?: number; - modelUsage?: Record; - } - | { type: 'rate_limit'; rateLimitInfo: Record } - | { - type: 'tool'; - toolName: string; - toolInput?: Record; - /** Stable per-call ID from the underlying SDK (e.g. Claude `tool_use_id`). - * When present, the platform adapter uses it directly instead of generating - * one — guarantees `tool_call`/`tool_result` pair correctly even when - * multiple tools with the same name run concurrently. */ - toolCallId?: string; - } - | { - type: 'tool_result'; - toolName: string; - toolOutput: string; - /** Matching ID for the originating `tool` chunk. See `tool` variant above. */ - toolCallId?: string; - } - | { type: 'workflow_dispatch'; workerConversationId: string; workflowName: string }; - +// Re-export workflow schema types for config-types.ts compatibility import type { ModelReasoningEffort, WebSearchMode } from '@archon/workflows/schemas/workflow'; export type { ModelReasoningEffort, WebSearchMode }; import type { @@ -237,147 +192,3 @@ import type { SandboxSettings, } from '@archon/workflows/schemas/dag-node'; export type { EffortLevel, ThinkingConfig, SandboxSettings }; - -export interface AssistantRequestOptions { - model?: string; - modelReasoningEffort?: ModelReasoningEffort; - webSearchMode?: WebSearchMode; - additionalDirectories?: string[]; - /** - * Restrict the set of built-in tools available to the assistant. - * - `[]` — disable all built-in tools (Claude SDK only; Codex ignores this field) - * - `string[]` — restrict to the named tools - * Omit entirely to use the assistant's default tool set. - * Note: `undefined` (omitted) and `[]` have different semantics — do not confuse them. - */ - tools?: string[]; - /** - * Remove specific tools from the assistant's available set. - * Applied after `tools` whitelist (if both are set, denied tools are removed from the whitelist result). - * Claude SDK only — Codex ignores this field. - */ - disallowedTools?: string[]; - /** - * Structured output schema. - * Claude: passed as outputFormat option to Claude Agent SDK. - * Codex: passed as outputSchema in TurnOptions to Codex SDK (v0.116.0+). - * Shape: { type: 'json_schema', schema: } - */ - outputFormat?: { type: 'json_schema'; schema: Record }; - /** SDK hooks configuration. Passed directly to Claude Agent SDK Options.hooks. Claude only — ignored for Codex. */ - hooks?: Partial< - Record< - string, - { - matcher?: string; - hooks: (( - input: unknown, - toolUseID: string | undefined, - options: { signal: AbortSignal } - ) => Promise)[]; - timeout?: number; - }[] - > - >; - /** - * MCP server configuration passed to Claude Agent SDK Options.mcpServers. - * Uses SDK type directly — @archon/core already depends on the SDK. - * Claude only — Codex ignores this. - */ - mcpServers?: Record; - /** Tools to auto-allow without permission prompts (e.g., MCP tool wildcards). - * Passed to Claude Agent SDK Options.allowedTools. Claude only. */ - allowedTools?: string[]; - /** Custom subagent definitions passed to Claude Agent SDK Options.agents. - * Used for per-node skill scoping via AgentDefinition wrapping. Claude only. */ - agents?: Record; - /** Name of agent definition for the main thread. References a key in `agents`. Claude only. */ - agent?: string; - /** - * Abort signal for cancelling in-flight AI requests. - * When aborted, the AI client should terminate the subprocess/query gracefully. - */ - abortSignal?: AbortSignal; - /** - * When false (default), skips writing session transcript to ~/.claude/projects/. - * Claude Agent SDK v0.2.74+. The SDK default is true, but Archon overrides it to false - * to avoid disk pollution. Set to true only when session persistence is explicitly needed. - */ - persistSession?: boolean; - /** - * When true, the SDK copies the prior session's history into a new session file - * before appending, leaving the original untouched. Use with `resume` to safely - * preserve conversation context without risk of corrupting the source session. - * Claude only — ignored for Codex. - */ - forkSession?: boolean; - /** - * Claude Code settingSources — controls which CLAUDE.md files are loaded. - * Passed directly to Claude Agent SDK Options.settingSources. - * Claude only — ignored for Codex. - * @default ['project'] - */ - settingSources?: ('project' | 'user')[]; - /** - * Additional env vars merged into Claude subprocess environment after buildSubprocessEnv(). - * Final env: { ...buildSubprocessEnv(), ...env } (auth tokens conditionally filtered). - * Claude only — Codex SDK does not support env injection. - */ - env?: Record; - /** - * Controls reasoning depth for Claude. Claude only — ignored for Codex. - */ - effort?: EffortLevel; - /** - * Controls Claude's thinking/reasoning behavior. Claude only — ignored for Codex. - */ - thinking?: ThinkingConfig; - /** - * Maximum USD cost budget. SDK returns error_max_budget_usd result if exceeded. - * Claude only — ignored for Codex. - */ - maxBudgetUsd?: number; - /** - * Per-node system prompt string. Overrides the default claude_code preset. - * Claude only — ignored for Codex. - */ - systemPrompt?: string; - /** - * Fallback model if primary fails. Claude only — ignored for Codex. - */ - fallbackModel?: string; - /** - * SDK beta feature flags. Claude only — ignored for Codex. - */ - betas?: string[]; - /** - * OS-level sandbox settings passed to Claude subprocess. - * Claude only — ignored for Codex. - */ - sandbox?: SandboxSettings; -} - -/** - * Generic AI assistant client interface - * Allows supporting multiple AI assistants (Claude, Codex, etc.) - */ -export interface IAssistantClient { - /** - * Send a message and get streaming response - * @param prompt - User message or prompt - * @param cwd - Working directory for the assistant - * @param resumeSessionId - Optional session ID to resume - * @param options - Optional request options (model, provider-specific settings) - */ - sendQuery( - prompt: string, - cwd: string, - resumeSessionId?: string, - options?: AssistantRequestOptions - ): AsyncGenerator; - - /** - * Get the assistant type identifier - */ - getType(): string; -} diff --git a/packages/core/src/utils/env-leak-scanner.test.ts b/packages/core/src/utils/env-leak-scanner.test.ts deleted file mode 100644 index 4d436bbc24..0000000000 --- a/packages/core/src/utils/env-leak-scanner.test.ts +++ /dev/null @@ -1,133 +0,0 @@ -import { describe, it, expect, beforeEach, afterEach } from 'bun:test'; -import { writeFileSync, mkdirSync, rmSync } from 'fs'; -import { join } from 'path'; -import { - scanPathForSensitiveKeys, - EnvLeakError, - formatLeakError, - SENSITIVE_KEYS, - AUTOLOADED_FILES, -} from './env-leak-scanner'; - -describe('scanPathForSensitiveKeys', () => { - const tmpDir = '/tmp/archon-test-env-scan'; - - beforeEach(() => { - mkdirSync(tmpDir, { recursive: true }); - }); - afterEach(() => { - rmSync(tmpDir, { recursive: true, force: true }); - }); - - it('returns empty findings for clean directory', () => { - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(0); - }); - - it('returns empty findings for non-existent directory', () => { - const report = scanPathForSensitiveKeys('/tmp/archon-test-nonexistent-dir'); - expect(report.findings).toHaveLength(0); - }); - - // Each sensitive key × each auto-loaded filename - for (const key of SENSITIVE_KEYS) { - for (const filename of AUTOLOADED_FILES) { - it(`detects ${key} in ${filename}`, () => { - writeFileSync(join(tmpDir, filename), `${key}=sk-test-value\nOTHER=safe\n`); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(1); - expect(report.findings[0].file).toBe(filename); - expect(report.findings[0].keys).toContain(key); - // Clean up for next iteration - rmSync(join(tmpDir, filename)); - }); - } - } - - it('ignores commented-out keys', () => { - writeFileSync(join(tmpDir, '.env'), '# ANTHROPIC_API_KEY=value\n'); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(0); - }); - - it('ignores lines without =', () => { - writeFileSync(join(tmpDir, '.env'), 'ANTHROPIC_API_KEY\n'); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(0); - }); - - it('reports multiple files with findings', () => { - writeFileSync(join(tmpDir, '.env'), 'ANTHROPIC_API_KEY=sk-1\n'); - writeFileSync(join(tmpDir, '.env.local'), 'OPENAI_API_KEY=sk-2\n'); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(2); - }); - - it('reports multiple keys in same file', () => { - writeFileSync(join(tmpDir, '.env'), 'ANTHROPIC_API_KEY=sk-1\nOPENAI_API_KEY=sk-2\n'); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(1); - expect(report.findings[0].keys).toHaveLength(2); - }); - - it('ignores non-autoloaded filenames', () => { - writeFileSync(join(tmpDir, '.env.secrets'), 'ANTHROPIC_API_KEY=sk-1\n'); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(0); - }); - - it('ignores safe keys', () => { - writeFileSync(join(tmpDir, '.env'), 'DATABASE_URL=postgres://localhost\nNODE_ENV=dev\n'); - const report = scanPathForSensitiveKeys(tmpDir); - expect(report.findings).toHaveLength(0); - }); -}); - -describe('EnvLeakError', () => { - it('is instanceof EnvLeakError and Error', () => { - const report = { path: '/tmp', findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }] }; - const err = new EnvLeakError(report); - expect(err).toBeInstanceOf(Error); - expect(err).toBeInstanceOf(EnvLeakError); - expect(err.name).toBe('EnvLeakError'); - expect(err.message).toContain('ANTHROPIC_API_KEY'); - expect(err.report).toBe(report); - }); - - it('defaults context to register-ui and stores it on the error', () => { - const report = { path: '/x', findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }] }; - const err = new EnvLeakError(report); - expect(err.context).toBe('register-ui'); - expect(err.message).toContain('Add Project'); - }); - - it('produces distinct remediation bodies per context', () => { - const report = { path: '/x', findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }] }; - const ui = formatLeakError(report, 'register-ui'); - const cli = formatLeakError(report, 'register-cli'); - const spawn = formatLeakError(report, 'spawn-existing'); - expect(ui).toContain('Add Project'); - expect(cli).toContain('--allow-env-keys'); - expect(cli).toContain('allow_target_repo_keys'); - expect(spawn).toContain('Settings'); - expect(spawn).toContain('already-registered'); - // headers differ between register and spawn - expect(ui).toContain('Cannot add codebase'); - expect(spawn).toContain('Cannot run workflow'); - }); - - it('formats multiple findings', () => { - const report = { - path: '/test', - findings: [ - { file: '.env', keys: ['ANTHROPIC_API_KEY'] }, - { file: '.env.local', keys: ['OPENAI_API_KEY', 'GEMINI_API_KEY'] }, - ], - }; - const err = new EnvLeakError(report); - expect(err.message).toContain('.env'); - expect(err.message).toContain('.env.local'); - expect(err.message).toContain('OPENAI_API_KEY'); - expect(err.message).toContain('GEMINI_API_KEY'); - }); -}); diff --git a/packages/core/src/utils/env-leak-scanner.ts b/packages/core/src/utils/env-leak-scanner.ts deleted file mode 100644 index 48edc2c6b7..0000000000 --- a/packages/core/src/utils/env-leak-scanner.ts +++ /dev/null @@ -1,155 +0,0 @@ -import { readFileSync, existsSync } from 'fs'; -import { join } from 'path'; - -export const SENSITIVE_KEYS = new Set([ - 'ANTHROPIC_API_KEY', - 'ANTHROPIC_AUTH_TOKEN', - 'CLAUDE_API_KEY', - 'CLAUDE_CODE_OAUTH_TOKEN', - 'OPENAI_API_KEY', - 'CODEX_API_KEY', - 'GEMINI_API_KEY', -]); - -export const AUTOLOADED_FILES = [ - '.env', - '.env.local', - '.env.development', - '.env.production', - '.env.development.local', - '.env.production.local', -]; - -export interface LeakFinding { - file: string; - keys: string[]; -} - -export interface LeakReport { - path: string; - findings: LeakFinding[]; -} - -/** - * Context in which the env-leak error is being surfaced. Drives the remediation - * copy so users see guidance that matches how they hit the gate. - * - * - `register-ui`: Add-Project flow in the Web UI (checkbox is visible) - * - `register-cli`: CLI auto-register path (no Web UI) - * - `spawn-existing`: Pre-spawn check for an already-registered codebase - */ -export type LeakErrorContext = 'register-ui' | 'register-cli' | 'spawn-existing'; - -export class EnvLeakError extends Error { - public readonly context: LeakErrorContext; - constructor( - public readonly report: LeakReport, - context: LeakErrorContext = 'register-ui' - ) { - super(formatLeakError(report, context)); - this.name = 'EnvLeakError'; - this.context = context; - } -} - -/** - * Scan `dirPath` for auto-loaded .env files containing sensitive keys. - * Pure function — no side effects. - */ -export function scanPathForSensitiveKeys(dirPath: string): LeakReport { - const findings: LeakFinding[] = []; - - for (const filename of AUTOLOADED_FILES) { - const fullPath = join(dirPath, filename); - if (!existsSync(fullPath)) continue; - - let contents: string; - try { - contents = readFileSync(fullPath, 'utf8'); - } catch (err) { - // File exists but is unreadable — treat as a finding to avoid silently bypassing the gate - const code = (err as NodeJS.ErrnoException).code; - findings.push({ file: filename, keys: [`[unreadable — ${code ?? 'unknown error'}]`] }); - continue; - } - - const foundKeys: string[] = []; - for (const line of contents.split('\n')) { - const trimmed = line.trim(); - if (trimmed.startsWith('#') || !trimmed.includes('=')) continue; - const key = trimmed.split('=')[0].trim(); - if (SENSITIVE_KEYS.has(key)) { - foundKeys.push(key); - } - } - - if (foundKeys.length > 0) { - findings.push({ file: filename, keys: foundKeys }); - } - } - - return { path: dirPath, findings }; -} - -/** - * Exhaustive per-context consent remediation copy. Using `switch` with a - * `never` default means adding a new `LeakErrorContext` variant without - * handling it here is a compile error — important for a security-visible path. - */ -function consentCopy(context: LeakErrorContext): string { - switch (context) { - case 'register-cli': - return ` 3. Acknowledge the risk and allow this codebase to use its .env key: - Re-run the CLI command with --allow-env-keys, or set - 'allow_target_repo_keys: true' in ~/.archon/config.yaml to bypass this - gate globally.`; - case 'spawn-existing': - return ` 3. Acknowledge the risk for this already-registered codebase: - Open the Web UI (Settings → Projects), find this project, and toggle - "Allow env keys". Or set 'allow_target_repo_keys: true' in - ~/.archon/config.yaml to bypass this gate globally.`; - case 'register-ui': - return ` 3. Acknowledge the risk and allow this codebase to use its .env key: - Open the web UI (Settings → Projects → Add Project) and tick - "Allow env keys (I understand the risk)" when adding this project.`; - default: { - const exhaustive: never = context; - return exhaustive; - } - } -} - -export function formatLeakError( - report: LeakReport, - context: LeakErrorContext = 'register-ui' -): string { - const fileList = report.findings.map(f => ` ${f.file} — ${f.keys.join(', ')}`).join('\n'); - - const header = - context === 'spawn-existing' - ? `Cannot run workflow — ${report.path} contains keys that will leak into AI subprocesses` - : `Cannot add codebase — ${report.path} contains keys that will leak into AI subprocesses`; - - const consent = consentCopy(context); - - return `${header} - - Found: -${fileList} - - Why this matters: - Bun subprocesses auto-load .env from their working directory. Archon cleans - its own environment, but Claude/Codex subprocesses running with cwd= - will re-inject these keys at their own startup, bypassing archon's allowlist. - This can bill the wrong API account silently. - - Choose one: - 1. Remove the key from this repo's .env (recommended): - grep -v '^ANTHROPIC_API_KEY=' .env > .env.tmp && mv .env.tmp .env - - 2. Rename to a non-auto-loaded file: - mv .env .env.secrets - # update your app to load it explicitly - -${consent}`; -} diff --git a/packages/core/src/workflows/store-adapter.test.ts b/packages/core/src/workflows/store-adapter.test.ts index 0501a88000..f193a2075c 100644 --- a/packages/core/src/workflows/store-adapter.test.ts +++ b/packages/core/src/workflows/store-adapter.test.ts @@ -44,8 +44,8 @@ mock.module('../db/codebases', () => ({ getCodebase: mockGetCodebase, })); -mock.module('../clients/factory', () => ({ - getAssistantClient: mock(() => ({})), +mock.module('@archon/providers', () => ({ + getAgentProvider: mock(() => ({})), })); mock.module('../config/config-loader', () => ({ @@ -145,10 +145,10 @@ describe('createWorkflowStore', () => { }); describe('createWorkflowDeps', () => { - test('returns WorkflowDeps with store, getAssistantClient, and loadConfig', () => { + test('returns WorkflowDeps with store, getAgentProvider, and loadConfig', () => { const deps = createWorkflowDeps(); expect(deps.store).toBeDefined(); - expect(typeof deps.getAssistantClient).toBe('function'); + expect(typeof deps.getAgentProvider).toBe('function'); expect(typeof deps.loadConfig).toBe('function'); }); diff --git a/packages/core/src/workflows/store-adapter.ts b/packages/core/src/workflows/store-adapter.ts index 0bf8683fb8..67040fda93 100644 --- a/packages/core/src/workflows/store-adapter.ts +++ b/packages/core/src/workflows/store-adapter.ts @@ -10,7 +10,7 @@ import * as workflowDb from '../db/workflows'; import * as workflowEventDb from '../db/workflow-events'; import * as codebaseDb from '../db/codebases'; import * as envVarDb from '../db/env-vars'; -import { getAssistantClient } from '../clients/factory'; +import { getAgentProvider } from '@archon/providers'; import { loadConfig as loadMergedConfig } from '../config/config-loader'; import { createLogger } from '@archon/paths'; @@ -69,7 +69,7 @@ export function createWorkflowStore(): IWorkflowStore { export function createWorkflowDeps(): WorkflowDeps { return { store: createWorkflowStore(), - getAssistantClient, + getAgentProvider, loadConfig: loadMergedConfig, }; } diff --git a/packages/docs-web/src/content/docs/guides/skills.md b/packages/docs-web/src/content/docs/guides/skills.md index 02f2fa0a74..8cfc5e5e81 100644 --- a/packages/docs-web/src/content/docs/guides/skills.md +++ b/packages/docs-web/src/content/docs/guides/skills.md @@ -124,7 +124,7 @@ Step-by-step content here. The agent loads this when the skill activates. ## Skill Discovery Skills are discovered from these locations (via `settingSources: ['project']` -set in ClaudeClient): +set in ClaudeProvider): | Location | Scope | |----------|-------| diff --git a/packages/docs-web/src/content/docs/reference/api.md b/packages/docs-web/src/content/docs/reference/api.md index 0e2fa8aa37..511355e091 100644 --- a/packages/docs-web/src/content/docs/reference/api.md +++ b/packages/docs-web/src/content/docs/reference/api.md @@ -138,7 +138,6 @@ Performs a soft delete -- the conversation is hidden but not destroyed. | GET | `/api/codebases` | List registered codebases | | GET | `/api/codebases/{id}` | Get a single codebase | | POST | `/api/codebases` | Register a codebase (clone or local path) | -| PATCH | `/api/codebases/{id}` | Update env-key consent (`allowEnvKeys`) | | DELETE | `/api/codebases/{id}` | Delete a codebase and clean up resources | | GET | `/api/codebases/{id}/environments` | List isolation environments for a codebase | @@ -166,16 +165,6 @@ curl -X POST http://localhost:3090/api/codebases \ -d '{"path": "/home/user/projects/my-repo"}' ``` -### Update Env-Key Consent - -Flip the env-leak-gate consent bit (`allow_env_keys`) on an existing codebase. Audit-logged on every grant and revoke as `env_leak_consent_granted` / `env_leak_consent_revoked` (warn-level) including `codebaseId`, `path`, scanned `files`, matched `keys`, `scanStatus`, and `actor`. - -```bash -curl -X PATCH http://localhost:3090/api/codebases/{id} \ - -H "Content-Type: application/json" \ - -d '{"allowEnvKeys": true}' -``` - ### Delete a Codebase ```bash diff --git a/packages/docs-web/src/content/docs/reference/architecture.md b/packages/docs-web/src/content/docs/reference/architecture.md index 4f5c16c01e..1a5badb8f7 100644 --- a/packages/docs-web/src/content/docs/reference/architecture.md +++ b/packages/docs-web/src/content/docs/reference/architecture.md @@ -10,7 +10,7 @@ sidebar: Comprehensive guide to understanding and extending Archon. -**Navigation:** [Overview](#system-overview) | [Platforms](#adding-platform-adapters) | [AI Assistants](#adding-ai-assistant-clients) | [Isolation](#isolation-providers) | [Commands](#command-system) | [Streaming](#streaming-modes) | [Database](#database-schema) +**Navigation:** [Overview](#system-overview) | [Platforms](#adding-platform-adapters) | [AI Providers](#adding-ai-agent-providers) | [Isolation](#isolation-providers) | [Commands](#command-system) | [Streaming](#streaming-modes) | [Database](#database-schema) --- @@ -43,11 +43,11 @@ Archon is a **platform-agnostic AI coding assistant orchestrator** that connects │ │ │ ▼ ▼ ▼ ┌───────────┐ ┌───────────────┐ ┌───────────────────┐ -│ Command │ │ AI Assistant │ │ Isolation │ -│ Handler │ │ Clients │ │ Providers │ +│ Command │ │ AI Agent │ │ Isolation │ +│ Handler │ │ Providers │ │ Providers │ │ │ │ │ │ │ -│ (Slash │ │ IAssistant- │ │ IIsolationProvider│ -│ commands) │ │ Client │ │ (worktree, etc.) │ +│ (Slash │ │ IAgent- │ │ IIsolationProvider│ +│ commands) │ │ Provider │ │ (worktree, etc.) │ └─────┬─────┘ └───────┬───────┘ └─────────┬─────────┘ │ │ │ └───────────────┼───────────────────┘ @@ -62,7 +62,7 @@ Archon is a **platform-agnostic AI coding assistant orchestrator** that connects ### Key Design Principles -1. **Interface-driven**: Both platform adapters and AI clients implement strict interfaces for swappability +1. **Interface-driven**: Both platform adapters and AI providers implement strict interfaces for swappability 2. **Streaming-first**: All AI responses stream through async generators for real-time delivery 3. **Session persistence**: AI sessions survive container restarts via database storage 4. **Generic commands**: Users define commands in Git-versioned markdown files, not hardcoded @@ -296,16 +296,16 @@ async handleWebhook(payload: any, signature: string): Promise { --- -## Adding AI Assistant Clients +## Adding AI Agent Providers -AI assistant clients wrap AI SDKs and provide a unified streaming interface. Implement the `IAssistantClient` interface to add new assistants. +AI agent providers wrap AI SDKs and provide a unified streaming interface. Implement the `IAgentProvider` interface to add new providers. -### IAssistantClient Interface +### IAgentProvider Interface **Location:** `packages/core/src/types/index.ts` ```typescript -export interface IAssistantClient { +export interface IAgentProvider { // Send a query and get streaming response sendQuery(prompt: string, cwd: string, resumeSessionId?: string): AsyncGenerator; @@ -328,14 +328,14 @@ interface MessageChunk { ### Implementation Guide -**1. Create client file:** `packages/core/src/clients/your-assistant.ts` +**1. Create provider file:** `packages/providers/src/your-assistant/provider.ts` **2. Implement the interface:** ```typescript -import { IAssistantClient, MessageChunk } from '../types'; +import { IAgentProvider, MessageChunk } from '../types'; -export class YourAssistantClient implements IAssistantClient { +export class YourAssistantProvider implements IAgentProvider { async *sendQuery( prompt: string, cwd: string, @@ -377,21 +377,21 @@ export class YourAssistantClient implements IAssistantClient { } ``` -**3. Register in factory:** `packages/core/src/clients/factory.ts` +**3. Register in factory:** `packages/providers/src/factory.ts` ```typescript -import { YourAssistantClient } from './your-assistant'; +import { YourAssistantProvider } from './your-assistant'; -export function getAssistantClient(type: string): IAssistantClient { +export function getAgentProvider(type: string): IAgentProvider { switch (type) { case 'claude': - return new ClaudeClient(); + return new ClaudeProvider(); case 'codex': - return new CodexClient(); + return new CodexProvider(); case 'your-assistant': - return new YourAssistantClient(); + return new YourAssistantProvider(); default: - throw new Error(`Unknown assistant type: ${type}`); + throw new Error(`Unknown provider type: ${type}`); } } ``` @@ -440,7 +440,7 @@ if (trigger && shouldCreateNewSession(trigger)) { Different SDKs use different event types. Map them to MessageChunk types: -**Claude Code SDK** (`packages/core/src/clients/claude.ts`): +**Claude Code SDK** (`packages/providers/src/claude/provider.ts`): ```typescript for await (const msg of query({ prompt, options })) { @@ -462,7 +462,7 @@ for await (const msg of query({ prompt, options })) { } ``` -**Codex SDK** (`packages/core/src/clients/codex.ts`): +**Codex SDK** (`packages/providers/src/codex/provider.ts`): ```typescript for await (const event of result.events) { @@ -1180,7 +1180,7 @@ Variable substitution (no args in this case) | Get or create session | -ClaudeClient.sendQuery(prompt, cwd, sessionId) +ClaudeProvider.sendQuery(prompt, cwd, sessionId) | Stream mode: Send each chunk immediately | @@ -1212,7 +1212,7 @@ Load command file, substitute variables | Get or create session | -CodexClient.sendQuery(prompt, cwd, sessionId) +CodexProvider.sendQuery(prompt, cwd, sessionId) | Batch mode: Accumulate all chunks | @@ -1236,14 +1236,14 @@ Post single comment on issue with summary - [ ] Add environment variables to `.env.example` - [ ] Test with both stream and batch modes -### Adding a New AI Assistant Client +### Adding a New AI Agent Provider -- [ ] Create `packages/core/src/clients/your-assistant.ts` -- [ ] Implement `IAssistantClient` interface +- [ ] Create `packages/providers/src/your-assistant/provider.ts` +- [ ] Implement `IAgentProvider` interface - [ ] Map SDK events to `MessageChunk` types - [ ] Handle session creation and resumption - [ ] Implement error handling and recovery -- [ ] Add to `packages/core/src/clients/factory.ts` +- [ ] Add to `packages/providers/src/factory.ts` - [ ] Add environment variables to `.env.example` - [ ] Test session persistence across restarts - [ ] Test plan-to-execute transition (new session) @@ -1341,9 +1341,9 @@ Context is passed as a dedicated `issueContext` parameter to `handleMessage()`, ## Key Takeaways -1. **Interfaces enable extensibility**: `IPlatformAdapter`, `IAssistantClient`, and `IIsolationProvider` allow adding platforms, AI assistants, and isolation strategies without modifying core logic +1. **Interfaces enable extensibility**: `IPlatformAdapter`, `IAgentProvider`, and `IIsolationProvider` allow adding platforms, AI providers, and isolation strategies without modifying core logic -2. **Async generators for streaming**: All AI clients return `AsyncGenerator` for unified streaming across different SDKs +2. **Async generators for streaming**: All AI providers return `AsyncGenerator` for unified streaming across different SDKs 3. **Session persistence is critical**: Store `assistant_session_id` in database to maintain context across restarts @@ -1353,7 +1353,7 @@ Context is passed as a dedicated `issueContext` parameter to `handleMessage()`, 6. **Plan-to-execute is special**: Only transition requiring new session (prevents token bloat during implementation) -7. **Factory pattern**: `getAssistantClient()` and `getIsolationProvider()` instantiate correct implementations based on configuration +7. **Factory pattern**: `getAgentProvider()` and `getIsolationProvider()` instantiate correct implementations based on configuration 8. **Error recovery**: Always provide `/reset` escape hatch for users when sessions get stuck @@ -1364,7 +1364,7 @@ Context is passed as a dedicated `issueContext` parameter to `handleMessage()`, **For detailed implementation examples, see:** - Platform adapter: `packages/adapters/src/chat/telegram/adapter.ts`, `packages/adapters/src/forge/github/adapter.ts` -- AI client: `packages/core/src/clients/claude.ts`, `packages/core/src/clients/codex.ts` +- AI provider: `packages/providers/src/claude/provider.ts`, `packages/providers/src/codex/provider.ts` - Isolation provider: `packages/isolation/src/providers/worktree.ts` - Isolation resolver: `packages/isolation/src/resolver.ts` - Isolation factory: `packages/isolation/src/factory.ts` diff --git a/packages/docs-web/src/content/docs/reference/cli.md b/packages/docs-web/src/content/docs/reference/cli.md index 33f6436884..ff492962b3 100644 --- a/packages/docs-web/src/content/docs/reference/cli.md +++ b/packages/docs-web/src/content/docs/reference/cli.md @@ -122,7 +122,6 @@ Progress events (node start/complete/fail/skip, approval gates) are written to s | `--from `, `--from-branch ` | Override base branch (start-point for worktree) | | `--no-worktree` | Opt out of isolation -- run directly in live checkout | | `--resume` | Resume from last failed run at the working path (skips completed nodes) | -| `--allow-env-keys` | Grant env-leak-gate consent during auto-registration (bypasses the gate for this codebase). Audit-logged as `env_leak_consent_granted` with `actor: 'user-cli'`. See [security.md](/reference/security/#env-leak-gate-target-repo-env-keys). | | `--quiet`, `-q` | Suppress all progress output to stderr | | `--verbose`, `-v` | Also show tool-level events (tool name and duration) | diff --git a/packages/docs-web/src/content/docs/reference/configuration.md b/packages/docs-web/src/content/docs/reference/configuration.md index c126b968f1..1e8d867abe 100644 --- a/packages/docs-web/src/content/docs/reference/configuration.md +++ b/packages/docs-web/src/content/docs/reference/configuration.md @@ -83,11 +83,6 @@ paths: concurrency: maxConversations: 10 -# Env-leak gate bypass (last resort — weakens a security control) -# allow_target_repo_keys: false # Set true to skip the env-leak-gate - # globally for all codebases on this machine. - # `env_leak_gate_disabled` is logged once per - # process per source. See security.md. ``` ## Repository Configuration @@ -135,11 +130,6 @@ defaults: # MY_API_KEY: value # CUSTOM_ENDPOINT: https://... -# Per-repo override for the env-leak-gate bypass. -# Set to `false` to re-enable the gate for THIS repo even when the global -# config has `allow_target_repo_keys: true`. Set to `true` to grant the -# bypass for THIS repo only. Wins over the global flag in either direction. -# allow_target_repo_keys: false ``` ### Claude settingSources diff --git a/packages/docs-web/src/content/docs/reference/security.md b/packages/docs-web/src/content/docs/reference/security.md index 4a2907d855..b3d1696e04 100644 --- a/packages/docs-web/src/content/docs/reference/security.md +++ b/packages/docs-web/src/content/docs/reference/security.md @@ -124,36 +124,20 @@ The GitHub and Gitea adapters verify webhook signatures to ensure payloads origi - Per-codebase env vars configured via `codebase_env_vars` or `.archon/config.yaml` `env:` are merged on top at workflow execution time. - CWD `.env` keys are the **only** untrusted source. They belong to the target project, not to Archon. -### Env-leak gate (target repo `.env` keys) +### Target repo `.env` isolation -As a second layer of defense, Archon scans target repos for sensitive keys **before spawning** AI subprocesses. A Claude or Codex subprocess started with `cwd=/path/to/target/repo` inherits Bun's auto-loaded `.env` from that CWD — the env-leak gate catches this by scanning the target repo's `.env` files at registration and pre-spawn time. +Archon prevents target repo `.env` from leaking into subprocesses through structural protection: -**What Archon scans:** auto-loaded filenames `.env`, `.env.local`, `.env.development`, `.env.production`, `.env.development.local`, `.env.production.local`. +1. **Boot cleanup:** `stripCwdEnv()` removes Bun-auto-loaded CWD `.env` keys from `process.env` before any application code runs. +2. **Claude Code subprocess:** `executableArgs: ['--no-env-file']` prevents Bun from auto-loading `.env` in the Claude Code subprocess CWD. +3. **Bun script nodes:** `bun --no-env-file` prevents script node subprocesses from loading target repo `.env`. +4. **Bash nodes:** Not affected — bash does not auto-load `.env` files. -**Scanned keys:** `ANTHROPIC_API_KEY`, `ANTHROPIC_AUTH_TOKEN`, `CLAUDE_API_KEY`, `CLAUDE_CODE_OAUTH_TOKEN`, `OPENAI_API_KEY`, `CODEX_API_KEY`, `GEMINI_API_KEY`. +Archon's own env sources (`~/.archon/.env`, dev `.env`) are loaded after the CWD strip and pass through to subprocesses normally. -:::caution -Renaming the file to `.env.local`, `.env.development`, etc. **does not work** — Bun auto-loads those too. Only `.env.secrets` (or any non-auto-loaded name) is safe. -::: - -**Where the gate runs:** - -| Failure point | When | What you see | -| --- | --- | --- | -| Registration (Web UI) | Adding a project via Settings → Add Project | 422 with the "Allow env keys" checkbox shown inline | -| Registration (CLI) | First `archon workflow run --cwd ` auto-registers | Error message points at `--allow-env-keys` and the global config flag | -| Pre-spawn | Existing codebase, before each Claude/Codex query | Error message points at Settings → Projects → "Allow env keys" toggle | - -**Primary remediation (recommended):** -1. Remove the key from the target repo's `.env`, or -2. Rename the file to `.env.secrets` and load it explicitly from your app code. - -**Secondary remediation (consent grants):** -- **Web UI:** Settings → Projects → click "Allow env keys" on the row. Revoke from the same place. Each grant/revoke writes a `warn`-level audit log (`env_leak_consent_granted` / `env_leak_consent_revoked`) including `codebaseId`, `path`, scanned `files`, matched `keys`, `scanStatus` (`'ok'` or `'skipped'`), and `actor`. -- **CLI:** `archon workflow run "your message" --cwd --allow-env-keys` grants consent during this run's auto-registration. The grant is persisted (the codebase row is created with `allow_env_keys = true`) and logged as `env_leak_consent_granted` with `actor: 'user-cli'`. -- **Global bypass:** set `allow_target_repo_keys: true` in `~/.archon/config.yaml` to disable the gate for all codebases on this machine. `env_leak_gate_disabled` is logged at most once per process per source (global vs. repo) the first time `loadConfig` resolves the bypass as active. A repo-level `.archon/config.yaml` with `allow_target_repo_keys: false` re-enables the gate for that repo. - -**Startup scan:** When `allow_target_repo_keys` is not set, the server scans every registered codebase with `allow_env_keys = false` and emits one `startup_env_leak_gate_will_block` warning per codebase **that has findings** (i.e. would actually be blocked). This gives you a chance to grant consent before hitting a fatal error mid-workflow. The scan is skipped entirely when the global bypass is active. +**If you need env vars available during workflow execution**, use managed env injection: +- `.archon/config.yaml` `env:` section (per-repo, checked into version control) +- Web UI: Settings → Projects → Env Vars (per-codebase, stored in Archon DB) **CORS:** - API routes use `WEB_UI_ORIGIN` to restrict CORS. The default is `*` (allow all), which is appropriate for local single-developer use. Set a specific origin when exposing the server publicly. diff --git a/packages/paths/src/env-integration.test.ts b/packages/paths/src/env-integration.test.ts index 0654c1a22e..5bb2dd036b 100644 --- a/packages/paths/src/env-integration.test.ts +++ b/packages/paths/src/env-integration.test.ts @@ -20,6 +20,8 @@ const TEST_KEYS = [ 'ANTHROPIC_API_KEY', 'CLAUDE_API_KEY', 'CLAUDE_CODE_OAUTH_TOKEN', + 'CLAUDE_CODE_USE_BEDROCK', + 'CLAUDE_CODE_USE_VERTEX', 'CLAUDE_USE_GLOBAL_AUTH', 'DATABASE_URL', 'LOG_LEVEL', @@ -31,6 +33,11 @@ const TEST_KEYS = [ 'CLAUDE_CODE_ENTRYPOINT', 'NODE_OPTIONS', 'REDIS_URL', + 'OPENAI_API_KEY', + 'ELEVENLABS_API_KEY', + 'SSH_AUTH_SOCK', + 'HTTP_PROXY', + 'MANAGED_SECRET', ]; describe('env isolation integration', () => { @@ -132,9 +139,11 @@ describe('env isolation integration', () => { expect(subprocessEnv.ANTHROPIC_API_KEY).toBeUndefined(); // Archon key present expect(subprocessEnv.ARCHON_ONLY_KEY).toBe('trusted'); - // Shell-inherited keys present - expect(subprocessEnv.PATH).toBeDefined(); - expect(subprocessEnv.HOME).toBeDefined(); + // Shell-inherited keys present (Windows uses "Path" casing and USERPROFILE instead of HOME) + const hasPath = subprocessEnv.PATH ?? subprocessEnv.Path; + expect(hasPath).toBeDefined(); + const hasHome = subprocessEnv.HOME ?? subprocessEnv.USERPROFILE; + expect(hasHome).toBeDefined(); }); it('scenario 4: same key in both CWD and archon env — archon value wins', () => { @@ -205,4 +214,117 @@ describe('env isolation integration', () => { expect(subprocessEnv.CLAUDE_CODE_ENTRYPOINT).toBeUndefined(); expect(subprocessEnv.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-ant-oat01-keep-this'); }); + + // ── Multiple .env file variants ──────────────────────────────────────── + + /** Simulate Bun auto-loading a specific .env file into process.env. */ + function simulateBunAutoLoad(filePath: string): void { + const parsed = config({ path: filePath, processEnv: {} }); + if (parsed.parsed) { + for (const [key, value] of Object.entries(parsed.parsed)) { + process.env[key] = value; + } + } + } + + it('strips keys from .env.local in addition to .env', () => { + // Bun auto-loads .env.local too — keys from there must also be stripped + writeFileSync(join(cwdDir, '.env.local'), 'OPENAI_API_KEY=sk-local-leaked\n'); + simulateBunAutoLoad(join(cwdDir, '.env.local')); + + const subprocessEnv = simulateEntryPointFlow( + 'ANTHROPIC_API_KEY=sk-main-leaked\n', + 'CLAUDE_USE_GLOBAL_AUTH=true\n' + ); + + expect(subprocessEnv.ANTHROPIC_API_KEY).toBeUndefined(); + expect(subprocessEnv.OPENAI_API_KEY).toBeUndefined(); + expect(subprocessEnv.CLAUDE_USE_GLOBAL_AUTH).toBe('true'); + }); + + it('strips keys from .env.development', () => { + writeFileSync(join(cwdDir, '.env.development'), 'ELEVENLABS_API_KEY=el-dev-leaked\n'); + simulateBunAutoLoad(join(cwdDir, '.env.development')); + + const subprocessEnv = simulateEntryPointFlow('', ''); + + expect(subprocessEnv.ELEVENLABS_API_KEY).toBeUndefined(); + }); + + // ── Shell-inherited env preservation ─────────────────────────────────── + + it('preserves shell-inherited env that is not in CWD .env', () => { + // User has SSH_AUTH_SOCK and HTTP_PROXY in their shell — these must survive + // because they are not from the target repo's .env + process.env.SSH_AUTH_SOCK = '/tmp/ssh-agent.sock'; + process.env.HTTP_PROXY = 'http://proxy.corp:8080'; + + const subprocessEnv = simulateEntryPointFlow('ANTHROPIC_API_KEY=sk-leaked\n', ''); + + // CWD key stripped + expect(subprocessEnv.ANTHROPIC_API_KEY).toBeUndefined(); + // Shell-inherited env preserved (not in any CWD .env file) + expect(subprocessEnv.SSH_AUTH_SOCK).toBe('/tmp/ssh-agent.sock'); + expect(subprocessEnv.HTTP_PROXY).toBe('http://proxy.corp:8080'); + }); + + it('strips shell-inherited env if same key also appears in CWD .env', () => { + // If SSH_AUTH_SOCK is in both shell AND CWD .env, the CWD value is what + // Bun auto-loaded — stripping removes it. This is correct behavior: + // the CWD .env overwrote the shell value during auto-load. + process.env.SSH_AUTH_SOCK = '/tmp/ssh-agent.sock'; + + const subprocessEnv = simulateEntryPointFlow('SSH_AUTH_SOCK=/tmp/repo-evil-agent.sock\n', ''); + + // Key was in CWD .env, so it gets stripped entirely + expect(subprocessEnv.SSH_AUTH_SOCK).toBeUndefined(); + }); + + // ── Bedrock/Vertex auth preservation ─────────────────────────────────── + + it('preserves CLAUDE_CODE_USE_BEDROCK and CLAUDE_CODE_USE_VERTEX', () => { + // These are CLAUDE_CODE_* vars but are auth-related — must survive marker strip + process.env.CLAUDECODE = '1'; + process.env.CLAUDE_CODE_ENTRYPOINT = 'cli'; + + const subprocessEnv = simulateEntryPointFlow( + '', + 'CLAUDE_CODE_USE_BEDROCK=1\nCLAUDE_CODE_USE_VERTEX=1\nCLAUDE_CODE_OAUTH_TOKEN=sk-token\n' + ); + + // Markers stripped + expect(subprocessEnv.CLAUDECODE).toBeUndefined(); + expect(subprocessEnv.CLAUDE_CODE_ENTRYPOINT).toBeUndefined(); + // Auth vars preserved + expect(subprocessEnv.CLAUDE_CODE_USE_BEDROCK).toBe('1'); + expect(subprocessEnv.CLAUDE_CODE_USE_VERTEX).toBe('1'); + expect(subprocessEnv.CLAUDE_CODE_OAUTH_TOKEN).toBe('sk-token'); + }); + + // ── Managed execution env (simulated) ────────────────────────────────── + + it('managed execution env merges on top of clean process.env', () => { + // After the entry point flow, the workflow executor merges managed env + // (from config.yaml env: + DB vars) on top of process.env. + // This simulates that final merge. + const subprocessEnv = simulateEntryPointFlow( + 'ANTHROPIC_API_KEY=sk-leaked\nDATABASE_URL=postgres://wrong\n', + 'CLAUDE_USE_GLOBAL_AUTH=true\n' + ); + + // Simulate managed env merge (what dag-executor does via requestOptions.env) + const managedEnv = { MANAGED_SECRET: 'from-db', ELEVENLABS_API_KEY: 'el-managed' }; + const finalEnv = { ...subprocessEnv, ...managedEnv }; + + // CWD keys still stripped + expect(finalEnv.ANTHROPIC_API_KEY).toBeUndefined(); + expect(finalEnv.DATABASE_URL).toBeUndefined(); + // Archon auth present + expect(finalEnv.CLAUDE_USE_GLOBAL_AUTH).toBe('true'); + // Managed env present + expect(finalEnv.MANAGED_SECRET).toBe('from-db'); + expect(finalEnv.ELEVENLABS_API_KEY).toBe('el-managed'); + // OS essentials present + expect(finalEnv.PATH ?? finalEnv.Path).toBeDefined(); + }); }); diff --git a/packages/providers/package.json b/packages/providers/package.json new file mode 100644 index 0000000000..2ef285486a --- /dev/null +++ b/packages/providers/package.json @@ -0,0 +1,33 @@ +{ + "name": "@archon/providers", + "version": "0.3.6", + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": "./src/index.ts", + "./types": "./src/types.ts", + "./claude/provider": "./src/claude/provider.ts", + "./claude/config": "./src/claude/config.ts", + "./codex/provider": "./src/codex/provider.ts", + "./codex/config": "./src/codex/config.ts", + "./codex/binary-resolver": "./src/codex/binary-resolver.ts", + "./errors": "./src/errors.ts", + "./factory": "./src/factory.ts" + }, + "scripts": { + "test": "bun test src/claude/provider.test.ts && bun test src/codex/provider.test.ts && bun test src/factory.test.ts && bun test src/codex/binary-guard.test.ts && bun test src/codex/binary-resolver.test.ts && bun test src/codex/binary-resolver-dev.test.ts", + "type-check": "bun x tsc --noEmit" + }, + "dependencies": { + "@anthropic-ai/claude-agent-sdk": "^0.2.89", + "@archon/paths": "workspace:*", + "@openai/codex-sdk": "^0.116.0" + }, + "devDependencies": { + "pino": "^9" + }, + "peerDependencies": { + "typescript": "^5.0.0" + } +} diff --git a/packages/providers/src/claude/config.ts b/packages/providers/src/claude/config.ts new file mode 100644 index 0000000000..3dca726e5f --- /dev/null +++ b/packages/providers/src/claude/config.ts @@ -0,0 +1,31 @@ +/** + * Typed config parsing for Claude provider defaults. + * Validates and narrows the opaque assistantConfig to typed fields. + */ +import type { ClaudeProviderDefaults } from '../types'; + +// Re-export so consumers can import the type from either location +export type { ClaudeProviderDefaults } from '../types'; + +/** + * Parse raw assistantConfig into typed Claude defaults. + * Defensive: invalid fields are silently dropped (not thrown). + */ +export function parseClaudeConfig(raw: Record): ClaudeProviderDefaults { + const result: ClaudeProviderDefaults = {}; + + if (typeof raw.model === 'string') { + result.model = raw.model; + } + + if (Array.isArray(raw.settingSources)) { + const valid = raw.settingSources.filter( + (s): s is 'project' | 'user' => s === 'project' || s === 'user' + ); + if (valid.length > 0) { + result.settingSources = valid; + } + } + + return result; +} diff --git a/packages/providers/src/claude/index.ts b/packages/providers/src/claude/index.ts new file mode 100644 index 0000000000..cc540542e4 --- /dev/null +++ b/packages/providers/src/claude/index.ts @@ -0,0 +1,8 @@ +export { ClaudeProvider } from './provider'; +export { parseClaudeConfig, type ClaudeProviderDefaults } from './config'; +export { + loadMcpConfig, + buildSDKHooksFromYAML, + withFirstMessageTimeout, + getProcessUid, +} from './provider'; diff --git a/packages/core/src/clients/claude.test.ts b/packages/providers/src/claude/provider.test.ts similarity index 75% rename from packages/core/src/clients/claude.test.ts rename to packages/providers/src/claude/provider.test.ts index e09c004822..1b9ed947dd 100644 --- a/packages/core/src/clients/claude.test.ts +++ b/packages/providers/src/claude/provider.test.ts @@ -1,4 +1,4 @@ -import { describe, test, expect, mock, beforeEach, afterEach, spyOn } from 'bun:test'; +import { describe, test, expect, mock, beforeEach, spyOn } from 'bun:test'; import { createMockLogger } from '../test/mocks/logger'; const mockLogger = createMockLogger(); @@ -16,17 +16,14 @@ mock.module('@anthropic-ai/claude-agent-sdk', () => ({ query: mockQuery, })); -import { ClaudeClient } from './claude'; -import * as claudeModule from './claude'; -import * as codebaseDb from '../db/codebases'; -import * as envLeakScanner from '../utils/env-leak-scanner'; -import * as configLoader from '../config/config-loader'; +import { ClaudeProvider } from './provider'; +import * as claudeModule from './provider'; -describe('ClaudeClient', () => { - let client: ClaudeClient; +describe('ClaudeProvider', () => { + let client: ClaudeProvider; beforeEach(() => { - client = new ClaudeClient({ retryBaseDelayMs: 1 }); + client = new ClaudeProvider({ retryBaseDelayMs: 1 }); mockQuery.mockClear(); mockLogger.info.mockClear(); mockLogger.warn.mockClear(); @@ -37,7 +34,7 @@ describe('ClaudeClient', () => { describe('constructor', () => { test('throws when running as root (UID 0)', () => { const spy = spyOn(claudeModule, 'getProcessUid').mockReturnValue(0); - expect(() => new ClaudeClient()).toThrow( + expect(() => new ClaudeProvider()).toThrow( 'does not support bypassPermissions when running as root' ); spy.mockRestore(); @@ -45,13 +42,13 @@ describe('ClaudeClient', () => { test('does not throw for non-root user', () => { const spy = spyOn(claudeModule, 'getProcessUid').mockReturnValue(1000); - expect(() => new ClaudeClient()).not.toThrow(); + expect(() => new ClaudeProvider()).not.toThrow(); spy.mockRestore(); }); test('does not throw when process.getuid is unavailable (Windows)', () => { const spy = spyOn(claudeModule, 'getProcessUid').mockReturnValue(undefined); - expect(() => new ClaudeClient()).not.toThrow(); + expect(() => new ClaudeProvider()).not.toThrow(); spy.mockRestore(); }); }); @@ -62,6 +59,26 @@ describe('ClaudeClient', () => { }); }); + describe('getCapabilities', () => { + test('returns full capability set for Claude provider', () => { + const caps = client.getCapabilities(); + expect(caps).toEqual({ + sessionResume: true, + mcp: true, + hooks: true, + skills: true, + toolRestrictions: true, + structuredOutput: true, + envInjection: true, + costControl: true, + effortControl: true, + thinkingControl: true, + fallbackModel: true, + sandbox: true, + }); + }); + }); + describe('sendQuery', () => { test('yields text events from assistant messages', async () => { mockQuery.mockImplementation(async function* () { @@ -306,7 +323,6 @@ describe('ClaudeClient', () => { }); // Consume the generator - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('my prompt', '/my/workspace', undefined, { model: 'sonnet', })) { @@ -328,7 +344,6 @@ describe('ClaudeClient', () => { // Empty generator }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } @@ -343,7 +358,6 @@ describe('ClaudeClient', () => { // Empty generator }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace', undefined, { persistSession: true, })) { @@ -363,7 +377,6 @@ describe('ClaudeClient', () => { // Empty generator }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('prompt', '/workspace', 'session-to-resume')) { // consume } @@ -447,9 +460,6 @@ describe('ClaudeClient', () => { }); test('subprocess env passes through all process.env keys (no allowlist filtering)', async () => { - // With the allowlist removed, buildSubprocessEnv returns { ...process.env }. - // CWD .env leakage and CLAUDECODE markers are handled at entry point by - // stripCwdEnv(), not by buildSubprocessEnv(). See #1067, #1097. const originalKey = process.env.CUSTOM_USER_KEY; process.env.CUSTOM_USER_KEY = 'user-trusted-value'; @@ -457,15 +467,23 @@ describe('ClaudeClient', () => { // Empty generator }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } - const callArgs = mockQuery.mock.calls[0][0] as { options: { env: NodeJS.ProcessEnv } }; + const callArgs = mockQuery.mock.calls[0][0] as { + options: { env: NodeJS.ProcessEnv; executableArgs?: string[] }; + }; + // --no-env-file prevents Bun from auto-loading .env in subprocess CWD + expect(callArgs.options.executableArgs).toEqual(['--no-env-file']); expect(callArgs.options.env.CUSTOM_USER_KEY).toBe('user-trusted-value'); - expect(callArgs.options.env.PATH).toBe(process.env.PATH); - expect(callArgs.options.env.HOME).toBe(process.env.HOME); + // Windows uses "Path" casing in spread objects and USERPROFILE instead of HOME + const envPath = callArgs.options.env.PATH ?? callArgs.options.env.Path; + const processPath = process.env.PATH ?? process.env.Path; + expect(envPath).toBe(processPath); + const envHome = callArgs.options.env.HOME ?? callArgs.options.env.USERPROFILE; + const processHome = process.env.HOME ?? process.env.USERPROFILE; + expect(envHome).toBe(processHome); // Cleanup if (originalKey !== undefined) process.env.CUSTOM_USER_KEY = originalKey; @@ -549,35 +567,29 @@ describe('ClaudeClient', () => { }); test('classifies "Operation aborted" errors as crash and retries', async () => { - // Simulates the SDK cleanup race: PostToolUse hook writes to a closed pipe - // after a DAG node abort. Should be classified as 'crash' (not 'unknown') - // so the retry path is taken. const error = new Error('Operation aborted'); mockQuery.mockImplementation(async function* () { throw error; }); const consumeGenerator = async (): Promise => { - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } }; - // crash classification = retried up to 3 times → 4 total calls + // crash classification = retried up to 3 times -> 4 total calls await expect(consumeGenerator()).rejects.toThrow(/Claude Code crash/); expect(mockQuery).toHaveBeenCalledTimes(4); }, 5_000); test('classifies mixed-case "OPERATION ABORTED" errors as crash', async () => { - // Pattern matching uses .toLowerCase() — case must not matter const error = new Error('OPERATION ABORTED'); mockQuery.mockImplementation(async function* () { throw error; }); const consumeGenerator = async (): Promise => { - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } @@ -588,8 +600,6 @@ describe('ClaudeClient', () => { }, 5_000); test('captures all stderr output for diagnostics', async () => { - // When the subprocess crashes, the enriched error should include all stderr, - // not just lines matching error keywords mockQuery.mockImplementation(async function* (args: { options: { stderr?: (data: string) => void }; }) { @@ -608,7 +618,7 @@ describe('ClaudeClient', () => { } }; - // Use rejects so assertions always execute — prevents vacuous pass when mock doesn't throw + // Use rejects so assertions always execute const err = await consumeGenerator().catch((e: unknown) => e as Error); expect(err).toBeInstanceOf(Error); // The error should contain stderr context from ALL captured lines @@ -617,14 +627,13 @@ describe('ClaudeClient', () => { expect(err.message).toContain('startup diagnostic'); }, 5_000); - test('passes settingSources from request options', async () => { + test('passes settingSources from assistantConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'test-session' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { - settingSources: ['project', 'user'], + assistantConfig: { settingSources: ['project', 'user'] }, })) { // consume } @@ -639,7 +648,6 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'test-session' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp')) { // consume } @@ -654,7 +662,6 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { env: { MY_SECRET: 'abc123' }, })) { @@ -675,8 +682,7 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'sid' }; }); - // HOME is always in process.env — override it to verify priority - // eslint-disable-next-line @typescript-eslint/no-unused-vars + // HOME is always in process.env -- override it to verify priority for await (const _ of client.sendQuery('test', '/tmp', undefined, { env: { HOME: '/custom/home' }, })) { @@ -689,13 +695,14 @@ describe('ClaudeClient', () => { expect(env.HOME).toBe('/custom/home'); }); - test('passes effort to SDK when provided', async () => { + test('passes effort to SDK via nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of client.sendQuery('test', '/tmp', undefined, { effort: 'high' })) { + for await (const _ of client.sendQuery('test', '/tmp', undefined, { + nodeConfig: { effort: 'high' }, + })) { // consume } @@ -704,12 +711,11 @@ describe('ClaudeClient', () => { expect(callArgs.options.effort).toBe('high'); }); - test('omits effort from SDK when not provided', async () => { + test('omits effort from SDK when not provided in nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp')) { // consume } @@ -719,14 +725,13 @@ describe('ClaudeClient', () => { expect(callArgs.options).not.toHaveProperty('effort'); }); - test('passes thinking object to SDK', async () => { + test('passes thinking object to SDK via nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { - thinking: { type: 'enabled', budgetTokens: 8000 }, + nodeConfig: { thinking: { type: 'enabled', budgetTokens: 8000 } }, })) { // consume } @@ -741,7 +746,6 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { maxBudgetUsd: 5.0 })) { // consume } @@ -756,7 +760,6 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { systemPrompt: 'You are a security reviewer', })) { @@ -773,7 +776,6 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp')) { // consume } @@ -788,7 +790,6 @@ describe('ClaudeClient', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { fallbackModel: 'claude-haiku-4-5', })) { @@ -800,14 +801,13 @@ describe('ClaudeClient', () => { expect(callArgs.options.fallbackModel).toBe('claude-haiku-4-5'); }); - test('passes betas array to SDK', async () => { + test('passes betas array to SDK via nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { - betas: ['context-1m-2025-08-07'], + nodeConfig: { betas: ['context-1m-2025-08-07'] }, })) { // consume } @@ -817,15 +817,16 @@ describe('ClaudeClient', () => { expect(callArgs.options.betas).toEqual(['context-1m-2025-08-07']); }); - test('passes sandbox object to SDK', async () => { + test('passes sandbox object to SDK via nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); const sandbox = { enabled: true, network: { allowedDomains: [] } }; - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of client.sendQuery('test', '/tmp', undefined, { sandbox })) { + for await (const _ of client.sendQuery('test', '/tmp', undefined, { + nodeConfig: { sandbox }, + })) { // consume } @@ -857,157 +858,6 @@ describe('ClaudeClient', () => { expect(chunks[0]).toEqual({ type: 'assistant', content: 'Real content' }); }); }); - - describe('pre-spawn env leak gate', () => { - let spyFindByDefaultCwd: ReturnType; - let spyFindByPathPrefix: ReturnType; - let spyScan: ReturnType; - - beforeEach(() => { - spyFindByDefaultCwd = spyOn(codebaseDb, 'findCodebaseByDefaultCwd').mockResolvedValue(null); - spyFindByPathPrefix = spyOn(codebaseDb, 'findCodebaseByPathPrefix').mockResolvedValue(null); - spyScan = spyOn(envLeakScanner, 'scanPathForSensitiveKeys').mockReturnValue({ - path: '/workspace', - findings: [], - }); - mockQuery.mockImplementation(async function* () { - yield { type: 'result', session_id: 'sid-gate' }; - }); - }); - - afterEach(() => { - spyFindByDefaultCwd.mockRestore(); - spyFindByPathPrefix.mockRestore(); - spyScan.mockRestore(); - }); - - test('throws EnvLeakError when .env contains sensitive keys and registered codebase has no consent', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: false, - default_cwd: '/workspace', - }); - spyScan.mockReturnValueOnce({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - - await expect(async () => { - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume - } - }).toThrow('Cannot run workflow'); - }); - - test('skips scan entirely when cwd is not a registered codebase', async () => { - // Both lookups return null (default from beforeEach) → unregistered cwd. - // Even if sensitive keys would be present, the pre-spawn check must not run - // because the canonical gate is registerRepoAtPath, not sendQuery. - spyScan.mockReturnValue({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } - - expect(spyScan).not.toHaveBeenCalled(); - expect(chunks).toHaveLength(1); - }); - - test('skips scan when codebase has allow_env_keys: true', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: true, - default_cwd: '/workspace', - }); - - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } - - expect(spyScan).not.toHaveBeenCalled(); - expect(chunks).toHaveLength(1); - }); - - test('proceeds without scanning when cwd has no registered codebase', async () => { - // Unregistered cwd — the pre-spawn safety net is out of scope. - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } - - expect(spyScan).not.toHaveBeenCalled(); - expect(chunks).toHaveLength(1); - }); - - test('skips scan when allowTargetRepoKeys is true in merged config', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: false, - default_cwd: '/workspace', - }); - const spyLoadConfig = spyOn(configLoader, 'loadConfig').mockResolvedValueOnce({ - allowTargetRepoKeys: true, - } as Awaited>); - // Even though scanner would return a finding, the config bypass must short-circuit - spyScan.mockReturnValueOnce({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } - - expect(spyScan).not.toHaveBeenCalled(); - expect(chunks).toHaveLength(1); - spyLoadConfig.mockRestore(); - }); - - test('falls back to scanner when loadConfig throws (fail-closed)', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: false, - default_cwd: '/workspace', - }); - const spyLoadConfig = spyOn(configLoader, 'loadConfig').mockRejectedValueOnce( - new Error('YAML parse error') - ); - spyScan.mockReturnValueOnce({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - - await expect(async () => { - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume - } - }).toThrow('Cannot run workflow'); - expect(spyScan).toHaveBeenCalled(); - spyLoadConfig.mockRestore(); - }); - - test('uses prefix lookup for worktree paths when exact match returns null', async () => { - spyFindByPathPrefix.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: true, - default_cwd: '/workspace/source', - }); - - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace/worktrees/feature')) { - chunks.push(chunk); - } - - expect(spyFindByPathPrefix).toHaveBeenCalledWith('/workspace/worktrees/feature'); - expect(spyScan).not.toHaveBeenCalled(); - }); - }); }); describe('withFirstMessageTimeout', () => { @@ -1095,3 +945,194 @@ describe('withFirstMessageTimeout', () => { ); }); }); + +// ─── Behavioral regression tests (black-box via sendQuery) ─────────────── +// These cover specific fixes from the sendQuery decomposition review: +// timeout preservation, one-time warnings, abort forwarding, error enrichment. + +describe('sendQuery decomposition behaviors', () => { + let client: ClaudeProvider; + + beforeEach(() => { + client = new ClaudeProvider({ retryBaseDelayMs: 1 }); + mockQuery.mockClear(); + mockLogger.info.mockClear(); + mockLogger.warn.mockClear(); + mockLogger.error.mockClear(); + mockLogger.debug.mockClear(); + }); + + test('preserves first-event timeout error instead of generic abort', async () => { + // withFirstMessageTimeout aborts the controller then throws. + // classifyAndEnrichError must preserve the timeout message, not "Query aborted". + mockQuery.mockImplementation(async function* () { + await new Promise(() => {}); // hang forever + yield { type: 'result', session_id: 'never' }; + }); + + const consumeGenerator = async (): Promise => { + // Use env var to set a short timeout for the test + const original = process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS; + process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS = '50'; + try { + for await (const _ of client.sendQuery('test', '/workspace')) { + // consume + } + } finally { + if (original !== undefined) process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS = original; + else delete process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS; + } + }; + + await expect(consumeGenerator()).rejects.toThrow('produced no output within'); + // Must NOT be "Query aborted" + await expect(consumeGenerator()).rejects.not.toThrow('Query aborted'); + }); + + test('emits nodeConfig warnings only once even when retries occur', async () => { + let callCount = 0; + mockQuery.mockImplementation(async function* () { + callCount++; + if (callCount <= 2) { + throw new Error('process exited with code 1'); // crash → retried + } + yield { + type: 'assistant', + message: { content: [{ type: 'text', text: 'ok' }] }, + }; + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace', undefined, { + nodeConfig: { effort: 'high' }, + })) { + chunks.push(chunk); + } + + // nodeConfig with effort doesn't produce warnings, but let's verify + // no system chunks are duplicated. Use a nodeConfig that doesn't warn. + // The point is: zero warning chunks means zero, not zero × 3 retries. + const systemChunks = chunks.filter(c => c.type === 'system'); + expect(systemChunks).toHaveLength(0); + expect(callCount).toBe(3); // Confirms retries happened + }, 5_000); + + test('abort signal cancels query across retries without listener leak', async () => { + const abortController = new AbortController(); + let callCount = 0; + + mockQuery.mockImplementation(async function* () { + callCount++; + if (callCount === 1) { + // First attempt crashes → triggers retry. Abort during the retry delay + // so the next iteration's abortSignal.aborted check catches it. + setTimeout(() => abortController.abort(), 0); + throw new Error('process exited with code 1'); + } + // Should not reach here — abort fires before retry starts + yield { + type: 'assistant', + message: { content: [{ type: 'text', text: 'should not reach' }] }, + }; + }); + + const consumeGenerator = async (): Promise => { + for await (const _ of client.sendQuery('test', '/workspace', undefined, { + abortSignal: abortController.signal, + })) { + // consume + } + }; + + await expect(consumeGenerator()).rejects.toThrow('Query aborted'); + // Single abort listener registered (not per-retry) + expect(callCount).toBe(1); + }, 5_000); + + test('enriched error (with stderr) is thrown at retry exhaustion, not raw error', async () => { + mockQuery.mockImplementation(async function* (args: { + options: { stderr?: (data: string) => void }; + }) { + if (args.options.stderr) { + args.options.stderr('diagnostic: something broke'); + } + throw new Error('process exited with code 1'); + }); + + const consumeGenerator = async (): Promise => { + for await (const _ of client.sendQuery('test', '/workspace')) { + // consume + } + }; + + const err = await consumeGenerator().catch((e: unknown) => e as Error); + expect(err).toBeInstanceOf(Error); + // Must contain stderr context, not just the raw error + expect(err.message).toContain('stderr:'); + expect(err.message).toContain('diagnostic: something broke'); + }, 5_000); + + test('PostToolUse hook handles circular reference without crashing', async () => { + mockQuery.mockImplementation(async function* (args: { + options: { + hooks?: Record Promise> }>>; + }; + }) { + // Simulate a tool use that triggers the PostToolUse hook with circular data + const hooks = args.options.hooks?.PostToolUse; + if (hooks?.[0]?.hooks?.[0]) { + const circular: Record = { key: 'val' }; + circular.self = circular; // circular reference + await hooks[0].hooks[0]({ + tool_name: 'TestTool', + tool_use_id: 'tc-circ', + tool_response: circular, + }); + } + yield { + type: 'assistant', + message: { content: [{ type: 'text', text: 'done' }] }, + }; + }); + + // Should not throw — the try/catch in PostToolUse should handle the circular ref + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + // The assistant message should still come through + expect(chunks.some(c => c.type === 'assistant')).toBe(true); + // The error should be logged + expect(mockLogger.error).toHaveBeenCalledWith( + expect.objectContaining({ err: expect.any(Error) }), + 'claude.post_tool_use_hook_error' + ); + }); + + test('logs is_error result events at error level', async () => { + mockQuery.mockImplementation(async function* () { + yield { + type: 'result', + session_id: 'sid-err', + is_error: true, + subtype: 'max_turns', + }; + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(chunks[0]).toMatchObject({ + type: 'result', + isError: true, + errorSubtype: 'max_turns', + }); + expect(mockLogger.error).toHaveBeenCalledWith( + expect.objectContaining({ sessionId: 'sid-err', errorSubtype: 'max_turns' }), + 'claude.result_is_error' + ); + }); +}); diff --git a/packages/providers/src/claude/provider.ts b/packages/providers/src/claude/provider.ts new file mode 100644 index 0000000000..57e430579b --- /dev/null +++ b/packages/providers/src/claude/provider.ts @@ -0,0 +1,970 @@ +/** + * Claude Agent SDK wrapper + * Provides async generator interface for streaming Claude responses + * + * Type Safety Pattern: + * - Uses `Options` type from SDK for query configuration + * - SDK message types have strict type checking for content blocks + * - Content blocks are typed via inline assertions for clarity + * + * Authentication: + * - CLAUDE_USE_GLOBAL_AUTH=true: Use global auth from `claude /login`, filter env tokens + * - CLAUDE_USE_GLOBAL_AUTH=false: Use explicit tokens from env vars + * - Not set: Auto-detect - use tokens if present in env, otherwise global auth + */ +import { + query, + type Options, + type HookCallback, + type HookCallbackMatcher, +} from '@anthropic-ai/claude-agent-sdk'; +import cliPath from '@anthropic-ai/claude-agent-sdk/embed'; +import type { + IAgentProvider, + SendQueryOptions, + MessageChunk, + TokenUsage, + ProviderCapabilities, + NodeConfig, +} from '../types'; +import { parseClaudeConfig } from './config'; +import { createLogger } from '@archon/paths'; +import { readFile } from 'fs/promises'; +import { resolve, isAbsolute } from 'path'; + +/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('provider.claude'); + return cachedLog; +} + +/** + * Content block type for assistant messages + */ +interface ContentBlock { + type: 'text' | 'tool_use'; + text?: string; + name?: string; + input?: Record; + id?: string; +} + +function normalizeClaudeUsage(usage?: { + input_tokens?: number; + output_tokens?: number; + total_tokens?: number; +}): TokenUsage | undefined { + if (!usage) return undefined; + const input = usage.input_tokens; + const output = usage.output_tokens; + if (typeof input !== 'number' || typeof output !== 'number') return undefined; + const total = usage.total_tokens; + return { + input, + output, + ...(typeof total === 'number' ? { total } : {}), + }; +} + +/** + * Build environment for Claude subprocess. + * + * process.env is already clean at this point: + * - stripCwdEnv() at entry point removed CWD .env keys + CLAUDECODE markers + * - ~/.archon/.env loaded with override:true as the trusted source + */ +function buildSubprocessEnv(): NodeJS.ProcessEnv { + const hasExplicitTokens = Boolean( + process.env.CLAUDE_CODE_OAUTH_TOKEN ?? process.env.CLAUDE_API_KEY + ); + const authMode = hasExplicitTokens ? 'explicit' : 'global'; + getLog().info( + { authMode }, + authMode === 'global' ? 'using_global_auth' : 'using_explicit_tokens' + ); + return { ...process.env }; +} + +/** Max retries for transient subprocess failures */ +const MAX_SUBPROCESS_RETRIES = 3; +const RETRY_BASE_DELAY_MS = 2000; + +const RATE_LIMIT_PATTERNS = ['rate limit', 'too many requests', '429', 'overloaded']; +const AUTH_PATTERNS = [ + 'credit balance', + 'unauthorized', + 'authentication', + 'invalid token', + '401', + '403', +]; +const SUBPROCESS_CRASH_PATTERNS = ['exited with code', 'killed', 'signal', 'operation aborted']; + +function classifySubprocessError( + errorMessage: string, + stderrOutput: string +): 'rate_limit' | 'auth' | 'crash' | 'unknown' { + const combined = `${errorMessage} ${stderrOutput}`.toLowerCase(); + if (RATE_LIMIT_PATTERNS.some(p => combined.includes(p))) return 'rate_limit'; + if (AUTH_PATTERNS.some(p => combined.includes(p))) return 'auth'; + if (SUBPROCESS_CRASH_PATTERNS.some(p => combined.includes(p))) return 'crash'; + return 'unknown'; +} + +function getFirstEventTimeoutMs(): number { + const raw = process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS; + if (raw) { + const parsed = Number(raw); + if (Number.isFinite(parsed) && parsed > 0) return parsed; + } + return 60_000; +} + +function buildFirstEventHangDiagnostics( + subprocessEnv: Record, + model: string | undefined +): Record { + return { + subprocessEnvKeys: Object.keys(subprocessEnv), + parentClaudeKeys: Object.keys(process.env).filter( + k => k === 'CLAUDECODE' || k.startsWith('CLAUDE_CODE_') || k.startsWith('ANTHROPIC_') + ), + model, + platform: process.platform, + uid: getProcessUid(), + isTTY: process.stdout.isTTY ?? false, + claudeCode: process.env.CLAUDECODE, + claudeCodeEntrypoint: process.env.CLAUDE_CODE_ENTRYPOINT, + }; +} + +class FirstEventTimeoutError extends Error {} + +/** + * Wraps an async generator so that the first call to .next() must resolve + * within `timeoutMs`. If it doesn't, aborts the controller and throws. + */ +export async function* withFirstMessageTimeout( + gen: AsyncGenerator, + controller: AbortController, + timeoutMs: number, + diagnostics: Record +): AsyncGenerator { + let timerId: ReturnType | undefined; + let firstValue: IteratorResult; + try { + firstValue = await Promise.race([ + gen.next(), + new Promise((_, reject) => { + timerId = setTimeout(() => { + reject(new FirstEventTimeoutError()); + }, timeoutMs); + }), + ]); + } catch (err) { + if (err instanceof FirstEventTimeoutError) { + controller.abort(); + getLog().error({ ...diagnostics, timeoutMs }, 'claude.first_event_timeout'); + throw new Error( + 'Claude Code subprocess produced no output within ' + + timeoutMs + + 'ms. ' + + 'See logs for claude.first_event_timeout diagnostic dump. ' + + 'Details: https://github.com/coleam00/Archon/issues/1067' + ); + } + throw err; + } finally { + clearTimeout(timerId); + } + + if (firstValue.done) return; + yield firstValue.value; + yield* gen; +} + +/** + * Returns the current process UID, or undefined on platforms that don't support it. + */ +export function getProcessUid(): number | undefined { + return typeof process.getuid === 'function' ? process.getuid() : undefined; +} + +// ─── MCP Config Loading (absorbed from dag-executor) ─────────────────────── + +/** + * Expand $VAR_NAME references in string-valued records from process.env. + */ +function expandEnvVarsInRecord( + record: Record, + missingVars: string[] +): Record { + const result: Record = {}; + for (const [key, val] of Object.entries(record)) { + if (typeof val !== 'string') { + getLog().warn({ key, valueType: typeof val }, 'mcp_env_value_coerced_to_string'); + result[key] = String(val); + continue; + } + result[key] = val.replace(/\$([A-Z_][A-Z0-9_]*)/g, (_, varName: string) => { + const envVal = process.env[varName]; + if (envVal === undefined) { + missingVars.push(varName); + } + return envVal ?? ''; + }); + } + return result; +} + +function expandEnvVars(config: Record): { + expanded: Record; + missingVars: string[]; +} { + const result: Record = {}; + const missingVars: string[] = []; + for (const [serverName, serverConfig] of Object.entries(config)) { + if (typeof serverConfig !== 'object' || serverConfig === null) { + getLog().warn({ serverName, valueType: typeof serverConfig }, 'mcp_server_config_not_object'); + continue; + } + const server = { ...(serverConfig as Record) }; + if (server.env && typeof server.env === 'object') { + server.env = expandEnvVarsInRecord(server.env as Record, missingVars); + } + if (server.headers && typeof server.headers === 'object') { + server.headers = expandEnvVarsInRecord( + server.headers as Record, + missingVars + ); + } + result[serverName] = server; + } + return { expanded: result, missingVars }; +} + +/** + * Load MCP server config from a JSON file and expand environment variables. + */ +export async function loadMcpConfig( + mcpPath: string, + cwd: string +): Promise<{ servers: Record; serverNames: string[]; missingVars: string[] }> { + const fullPath = isAbsolute(mcpPath) ? mcpPath : resolve(cwd, mcpPath); + + let raw: string; + try { + raw = await readFile(fullPath, 'utf-8'); + } catch (err) { + const e = err as NodeJS.ErrnoException; + if (e.code === 'ENOENT') { + throw new Error(`MCP config file not found: ${mcpPath} (resolved to ${fullPath})`); + } + throw new Error(`Failed to read MCP config file: ${mcpPath} — ${e.message}`); + } + + let parsed: Record; + try { + parsed = JSON.parse(raw) as Record; + } catch (parseErr) { + const detail = (parseErr as SyntaxError).message; + throw new Error(`MCP config file is not valid JSON: ${mcpPath} — ${detail}`); + } + + if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { + throw new Error(`MCP config must be a JSON object (Record): ${mcpPath}`); + } + + const { expanded, missingVars } = expandEnvVars(parsed); + const serverNames = Object.keys(expanded); + return { servers: expanded, serverNames, missingVars }; +} + +// ─── SDK Hooks Building (absorbed from dag-executor) ─────────────────────── + +/** YAML hook matcher shape (matches @archon/workflows/schemas/dag-node WorkflowNodeHooks) */ +interface YAMLHookMatcher { + matcher?: string; + response: unknown; + timeout?: number; +} + +type SDKHooksMap = Partial< + Record< + string, + { + matcher?: string; + hooks: (( + input: unknown, + toolUseID: string | undefined, + options: { signal: AbortSignal } + ) => Promise)[]; + timeout?: number; + }[] + > +>; + +/** + * Convert declarative YAML hook definitions to SDK HookCallbackMatcher arrays. + */ +export function buildSDKHooksFromYAML( + nodeHooks: Record +): SDKHooksMap { + const sdkHooks: SDKHooksMap = {}; + + for (const [event, matchers] of Object.entries(nodeHooks)) { + if (!matchers) continue; + sdkHooks[event] = matchers.map(m => ({ + ...(m.matcher ? { matcher: m.matcher } : {}), + hooks: [async (): Promise => m.response], + ...(m.timeout ? { timeout: m.timeout } : {}), + })); + } + + if (Object.keys(sdkHooks).length === 0) { + getLog().warn( + { nodeHooksKeys: Object.keys(nodeHooks) }, + 'claude.hooks_build_produced_empty_map' + ); + } + + return sdkHooks; +} + +// ─── Provider Warning Type ─────────────────────────────────────────────── + +/** + * Structured provider warning. Providers collect these during translation; + * callers convert them to system chunks before streaming starts. + */ +interface ProviderWarning { + code: string; + message: string; +} + +// ─── NodeConfig → SDK Options Translation ────────────────────────────────── + +/** + * Translate nodeConfig into Claude SDK-specific options. + * Called inside sendQuery when nodeConfig is present (workflow path). + * Returns structured warnings that the caller should yield as system chunks. + */ +async function applyNodeConfig( + options: Options, + nodeConfig: NodeConfig, + cwd: string +): Promise { + const warnings: ProviderWarning[] = []; + // allowed_tools → tools + if (nodeConfig.allowed_tools !== undefined) { + options.tools = nodeConfig.allowed_tools; + } + + // denied_tools → disallowedTools + if (nodeConfig.denied_tools !== undefined) { + options.disallowedTools = nodeConfig.denied_tools; + } + + // hooks → build SDK hooks + if (nodeConfig.hooks) { + const builtHooks = buildSDKHooksFromYAML( + nodeConfig.hooks as Record + ); + if (Object.keys(builtHooks).length > 0) { + // Merge with existing hooks (PostToolUse capture hook) + const existingHooks = options.hooks as SDKHooksMap | undefined; + for (const [event, matchers] of Object.entries(builtHooks)) { + if (!matchers) continue; + const existing = existingHooks?.[event] as HookCallbackMatcher[] | undefined; + if (existing) { + (options.hooks as Record)[event] = [ + ...(matchers as HookCallbackMatcher[]), + ...existing, + ]; + } else { + (options.hooks as Record)[event] = + matchers as HookCallbackMatcher[]; + } + } + } + } + + // mcp → load config and set mcpServers + allowedTools wildcards + if (nodeConfig.mcp) { + const mcpPath = nodeConfig.mcp; + const { servers, serverNames, missingVars } = await loadMcpConfig(mcpPath, cwd); + options.mcpServers = servers as Options['mcpServers']; + const mcpWildcards = serverNames.map(name => `mcp__${name}__*`); + options.allowedTools = [...(options.allowedTools ?? []), ...mcpWildcards]; + getLog().info({ serverNames, mcpPath }, 'claude.mcp_config_loaded'); + if (missingVars.length > 0) { + const uniqueVars = [...new Set(missingVars)]; + getLog().warn({ missingVars: uniqueVars }, 'claude.mcp_env_vars_missing'); + warnings.push({ + code: 'mcp_env_vars_missing', + message: `MCP config references undefined env vars: ${uniqueVars.join(', ')}. These will be empty strings — MCP servers may fail to authenticate.`, + }); + } + // Haiku models don't support tool search (lazy loading for many tools) + if (options.model?.toLowerCase().includes('haiku')) { + getLog().warn({ model: options.model }, 'claude.mcp_haiku_tool_search_unsupported'); + warnings.push({ + code: 'mcp_haiku_tool_search', + message: + 'Using Haiku model with MCP servers — tool search (lazy loading for many tools) is not supported on Haiku. Consider using Sonnet or Opus.', + }); + } + } + + // skills → AgentDefinition wrapping + if (nodeConfig.skills) { + const skills = nodeConfig.skills; + const agentId = 'dag-node-skills'; + const agentTools = options.tools ? [...(options.tools as string[]), 'Skill'] : ['Skill']; + const agentDef: { + description: string; + prompt: string; + skills: string[]; + tools: string[]; + model?: string; + } = { + description: 'DAG node with skills', + prompt: `You have preloaded skills: ${skills.join(', ')}. Use them when relevant.`, + skills, + tools: agentTools, + }; + if (options.model) agentDef.model = options.model; + options.agents = { [agentId]: agentDef }; + options.agent = agentId; + if (!options.allowedTools?.includes('Skill')) { + options.allowedTools = [...(options.allowedTools ?? []), 'Skill']; + } + getLog().info({ skills, agentId }, 'claude.skills_agent_created'); + } + + // effort + if (nodeConfig.effort !== undefined) { + options.effort = nodeConfig.effort as Options['effort']; + } + + // thinking + if (nodeConfig.thinking !== undefined) { + options.thinking = nodeConfig.thinking as Options['thinking']; + } + + // sandbox + if (nodeConfig.sandbox !== undefined) { + options.sandbox = nodeConfig.sandbox as Options['sandbox']; + } + + // betas + if (nodeConfig.betas !== undefined) { + options.betas = nodeConfig.betas as Options['betas']; + } + + // output_format (from nodeConfig, overrides base outputFormat if present) + if (nodeConfig.output_format) { + options.outputFormat = { + type: 'json_schema', + schema: nodeConfig.output_format, + } as Options['outputFormat']; + } + + // maxBudgetUsd from nodeConfig + if (nodeConfig.maxBudgetUsd !== undefined) { + options.maxBudgetUsd = nodeConfig.maxBudgetUsd; + } + + // systemPrompt from nodeConfig + if (nodeConfig.systemPrompt !== undefined) { + options.systemPrompt = nodeConfig.systemPrompt; + } + + // fallbackModel from nodeConfig + if (nodeConfig.fallbackModel !== undefined) { + options.fallbackModel = nodeConfig.fallbackModel; + } + + return warnings; +} + +// ─── Base Options Builder ──────────────────────────────────────────────── + +/** Queued tool result from SDK hooks, consumed during stream normalization. */ +interface ToolResultEntry { + toolName: string; + toolOutput: string; + toolCallId?: string; +} + +/** + * Build base Claude SDK options from cwd, request options, and assistant defaults. + * Does not include nodeConfig translation — that is handled by applyNodeConfig. + */ +function buildBaseClaudeOptions( + cwd: string, + requestOptions: SendQueryOptions | undefined, + assistantDefaults: ReturnType, + controller: AbortController, + stderrLines: string[], + toolResultQueue: ToolResultEntry[], + env: NodeJS.ProcessEnv +): Options { + return { + cwd, + pathToClaudeCodeExecutable: cliPath, + // Prevent Bun from auto-loading .env from the target repo cwd. + // Without this, the Claude Code subprocess inherits repo secrets. + executableArgs: ['--no-env-file'], + env, + model: requestOptions?.model ?? assistantDefaults.model, + abortController: controller, + ...(requestOptions?.outputFormat !== undefined + ? { outputFormat: requestOptions.outputFormat } + : {}), + ...(requestOptions?.maxBudgetUsd !== undefined + ? { maxBudgetUsd: requestOptions.maxBudgetUsd } + : {}), + ...(requestOptions?.fallbackModel !== undefined + ? { fallbackModel: requestOptions.fallbackModel } + : {}), + ...(requestOptions?.persistSession !== undefined + ? { persistSession: requestOptions.persistSession } + : {}), + ...(requestOptions?.forkSession !== undefined + ? { forkSession: requestOptions.forkSession } + : {}), + permissionMode: 'bypassPermissions', + allowDangerouslySkipPermissions: true, + systemPrompt: requestOptions?.systemPrompt ?? { type: 'preset', preset: 'claude_code' }, + settingSources: assistantDefaults.settingSources ?? ['project'], + hooks: buildToolCaptureHooks(toolResultQueue), + stderr: (data: string): void => { + const output = data.trim(); + if (!output) return; + stderrLines.push(output); + + const isError = + output.toLowerCase().includes('error') || + output.toLowerCase().includes('fatal') || + output.toLowerCase().includes('failed') || + output.toLowerCase().includes('exception') || + output.includes('at ') || + output.includes('Error:'); + + const isInfoMessage = + output.includes('Spawning Claude Code') || + output.includes('--output-format') || + output.includes('--permission-mode'); + + if (isError && !isInfoMessage) { + getLog().error({ stderr: output }, 'subprocess_error'); + } + }, + }; +} + +// ─── Tool Capture Hooks ────────────────────────────────────────────────── + +/** + * Build SDK hooks that capture tool use results into a shared queue. + * The queue is drained during stream normalization. + */ +function buildToolCaptureHooks(toolResultQueue: ToolResultEntry[]): Options['hooks'] { + return { + PostToolUse: [ + { + hooks: [ + (async (input: Record): Promise<{ continue: true }> => { + try { + const toolName = (input as { tool_name?: string }).tool_name ?? 'unknown'; + const toolUseId = (input as { tool_use_id?: string }).tool_use_id; + const toolResponse = (input as { tool_response?: unknown }).tool_response; + const output = + typeof toolResponse === 'string' + ? toolResponse + : JSON.stringify(toolResponse ?? ''); + const maxLen = 10_000; + toolResultQueue.push({ + toolName, + toolOutput: output.length > maxLen ? output.slice(0, maxLen) + '...' : output, + ...(toolUseId !== undefined ? { toolCallId: toolUseId } : {}), + }); + } catch (e) { + getLog().error({ err: e, input }, 'claude.post_tool_use_hook_error'); + } + return { continue: true }; + }) as HookCallback, + ], + }, + ], + PostToolUseFailure: [ + { + hooks: [ + (async (input: Record): Promise<{ continue: true }> => { + try { + const toolName = (input as { tool_name?: string }).tool_name ?? 'unknown'; + const toolUseId = (input as { tool_use_id?: string }).tool_use_id; + const rawError = (input as { error?: string }).error; + if (rawError === undefined) { + getLog().debug({ input }, 'claude.post_tool_use_failure_no_error_field'); + } + const errorText = rawError ?? 'tool failed'; + const isInterrupt = (input as { is_interrupt?: boolean }).is_interrupt === true; + const prefix = isInterrupt ? '⚠️ Interrupted' : '❌ Error'; + toolResultQueue.push({ + toolName, + toolOutput: `${prefix}: ${errorText}`, + ...(toolUseId !== undefined ? { toolCallId: toolUseId } : {}), + }); + } catch (e) { + getLog().error({ err: e, input }, 'claude.post_tool_use_failure_hook_error'); + } + return { continue: true }; + }) as HookCallback, + ], + }, + ], + }; +} + +// ─── Stream Normalizer ─────────────────────────────────────────────────── + +/** + * Normalize raw Claude SDK events into Archon MessageChunks. + * Drains the tool result queue between events (populated by SDK hooks). + */ +async function* streamClaudeMessages( + events: AsyncGenerator, + toolResultQueue: ToolResultEntry[] +): AsyncGenerator { + for await (const msg of events) { + // Drain tool results captured by hooks before processing the next event + while (toolResultQueue.length > 0) { + const tr = toolResultQueue.shift(); + if (tr) { + yield { + type: 'tool_result', + toolName: tr.toolName, + toolOutput: tr.toolOutput, + ...(tr.toolCallId !== undefined ? { toolCallId: tr.toolCallId } : {}), + }; + } + } + + const event = msg as { type: string }; + + if (event.type === 'assistant') { + const message = msg as { message: { content: ContentBlock[] } }; + const content = message.message.content; + + for (const block of content) { + if (block.type === 'text' && block.text) { + yield { type: 'assistant', content: block.text }; + } else if (block.type === 'tool_use' && block.name) { + yield { + type: 'tool', + toolName: block.name, + toolInput: block.input ?? {}, + ...(block.id !== undefined ? { toolCallId: block.id } : {}), + }; + } + } + } else if (event.type === 'system') { + const sysMsg = msg as { + subtype?: string; + mcp_servers?: { name: string; status: string }[]; + }; + if (sysMsg.subtype === 'init' && sysMsg.mcp_servers) { + const failed = sysMsg.mcp_servers.filter(s => s.status !== 'connected'); + if (failed.length > 0) { + const names = failed.map(s => `${s.name} (${s.status})`).join(', '); + yield { type: 'system', content: `MCP server connection failed: ${names}` }; + } + } else { + getLog().debug({ subtype: sysMsg.subtype }, 'claude.system_message_unhandled'); + } + } else if (event.type === 'rate_limit_event') { + const rateLimitMsg = msg as { rate_limit_info?: Record }; + getLog().warn({ rateLimitInfo: rateLimitMsg.rate_limit_info }, 'claude.rate_limit_event'); + yield { type: 'rate_limit', rateLimitInfo: rateLimitMsg.rate_limit_info ?? {} }; + } else if (event.type === 'result') { + const resultMsg = msg as { + session_id?: string; + is_error?: boolean; + subtype?: string; + usage?: { input_tokens?: number; output_tokens?: number; total_tokens?: number }; + structured_output?: unknown; + total_cost_usd?: number; + stop_reason?: string | null; + num_turns?: number; + model_usage?: Record< + string, + { + input_tokens: number; + output_tokens: number; + cache_read_input_tokens?: number; + cache_creation_input_tokens?: number; + } + >; + }; + const tokens = normalizeClaudeUsage(resultMsg.usage); + if (resultMsg.is_error) { + getLog().error( + { sessionId: resultMsg.session_id, errorSubtype: resultMsg.subtype }, + 'claude.result_is_error' + ); + } + yield { + type: 'result', + sessionId: resultMsg.session_id, + ...(tokens ? { tokens } : {}), + ...(resultMsg.structured_output !== undefined + ? { structuredOutput: resultMsg.structured_output } + : {}), + ...(resultMsg.is_error ? { isError: true, errorSubtype: resultMsg.subtype } : {}), + ...(resultMsg.total_cost_usd !== undefined ? { cost: resultMsg.total_cost_usd } : {}), + ...(resultMsg.stop_reason != null ? { stopReason: resultMsg.stop_reason } : {}), + ...(resultMsg.num_turns !== undefined ? { numTurns: resultMsg.num_turns } : {}), + ...(resultMsg.model_usage + ? { modelUsage: resultMsg.model_usage as Record } + : {}), + }; + } + } + + // Drain any remaining tool results after the stream ends + while (toolResultQueue.length > 0) { + const tr = toolResultQueue.shift(); + if (tr) { + yield { + type: 'tool_result', + toolName: tr.toolName, + toolOutput: tr.toolOutput, + ...(tr.toolCallId !== undefined ? { toolCallId: tr.toolCallId } : {}), + }; + } + } +} + +// ─── Error Classification & Retry ──────────────────────────────────────── + +/** + * Classify a subprocess error and enrich with stderr context. + * Returns null if the error should be retried (caller handles retry logic). + */ +function classifyAndEnrichError( + error: Error, + stderrLines: string[], + controller: AbortController +): { enrichedError: Error; errorClass: string; shouldRetry: boolean } { + // If the controller was aborted by withFirstMessageTimeout, the original + // timeout error carries the diagnostic message and #1067 breadcrumb. + // Preserve it instead of collapsing into a generic "Query aborted". + if (controller.signal.aborted) { + if (error.message.includes('produced no output within')) { + return { enrichedError: error, errorClass: 'timeout', shouldRetry: false }; + } + return { + enrichedError: new Error('Query aborted'), + errorClass: 'aborted', + shouldRetry: false, + }; + } + + const stderrContext = stderrLines.join('\n'); + const errorClass = classifySubprocessError(error.message, stderrContext); + + if (errorClass === 'auth') { + const enrichedError = new Error( + `Claude Code auth error: ${error.message}${stderrContext ? ` (${stderrContext})` : ''}` + ); + enrichedError.cause = error; + return { enrichedError, errorClass, shouldRetry: false }; + } + + const enrichedMessage = stderrContext + ? `Claude Code ${errorClass}: ${error.message} (stderr: ${stderrContext})` + : `Claude Code ${errorClass}: ${error.message}`; + const enrichedError = new Error(enrichedMessage); + enrichedError.cause = error; + const shouldRetry = errorClass === 'rate_limit' || errorClass === 'crash'; + return { enrichedError, errorClass, shouldRetry }; +} + +// ─── Claude Provider ─────────────────────────────────────────────────────── + +/** + * Claude AI agent provider. + * Implements IAgentProvider with full SDK integration. + * + * sendQuery orchestrates the following internal helpers: + * - buildBaseClaudeOptions: SDK option construction + * - applyNodeConfig: workflow nodeConfig → SDK option translation + warnings + * - streamClaudeMessages: raw SDK event normalization into MessageChunks + * - classifyAndEnrichError: error classification for retry decisions + */ +export class ClaudeProvider implements IAgentProvider { + private readonly retryBaseDelayMs: number; + + constructor(options?: { retryBaseDelayMs?: number }) { + if (getProcessUid() === 0 && process.env.IS_SANDBOX !== '1') { + throw new Error( + 'Claude Code SDK does not support bypassPermissions when running as root (UID 0). ' + + 'Run as a non-root user, set IS_SANDBOX=1, or use the Dockerfile which creates a non-root appuser.' + ); + } + this.retryBaseDelayMs = options?.retryBaseDelayMs ?? RETRY_BASE_DELAY_MS; + } + + getCapabilities(): ProviderCapabilities { + return { + sessionResume: true, + mcp: true, + hooks: true, + skills: true, + toolRestrictions: true, + structuredOutput: true, + envInjection: true, + costControl: true, + effortControl: true, + thinkingControl: true, + fallbackModel: true, + sandbox: true, + }; + } + + /** + * Send a query to Claude and stream responses. + * Orchestrates option building, nodeConfig translation, streaming, and retry. + */ + // TODO(#1135): Pre-spawn env-leak gate was removed during provider extraction. + // Caller-side enforcement (orchestrator, dag-executor) is tracked in #1135. + // Providers must NOT implement security gates — the platform guarantees safety + // before a provider runs. + async *sendQuery( + prompt: string, + cwd: string, + resumeSessionId?: string, + requestOptions?: SendQueryOptions + ): AsyncGenerator { + let lastError: Error | undefined; + const assistantDefaults = parseClaudeConfig(requestOptions?.assistantConfig ?? {}); + + // Build subprocess env once (avoids re-logging auth mode per retry) + const subprocessEnv = buildSubprocessEnv(); + const env = requestOptions?.env ? { ...subprocessEnv, ...requestOptions.env } : subprocessEnv; + + // Apply nodeConfig translation once (deterministic, not retry-dependent) + // We need a throwaway Options to extract warnings from applyNodeConfig, + // then re-apply per attempt. But nodeConfig warnings are deterministic, + // so we compute them once and yield them before the first attempt. + let nodeConfigWarnings: ProviderWarning[] = []; + if (requestOptions?.nodeConfig) { + const tempOptions: Options = {} as Options; + nodeConfigWarnings = await applyNodeConfig(tempOptions, requestOptions.nodeConfig, cwd); + } + + // Yield provider warnings once before retries + for (const warning of nodeConfigWarnings) { + yield { type: 'system' as const, content: `⚠️ ${warning.message}` }; + } + + // Track the current attempt's controller so a single abort listener + // can forward cancellation without accumulating per-retry listeners. + let currentController: AbortController | undefined; + const onAbort = (): void => { + currentController?.abort(); + }; + if (requestOptions?.abortSignal) { + requestOptions.abortSignal.addEventListener('abort', onAbort, { once: true }); + } + + for (let attempt = 0; attempt <= MAX_SUBPROCESS_RETRIES; attempt++) { + if (requestOptions?.abortSignal?.aborted) { + throw new Error('Query aborted'); + } + + const stderrLines: string[] = []; + const toolResultQueue: ToolResultEntry[] = []; + const controller = new AbortController(); + currentController = controller; + + // 1. Build SDK options (env pre-computed above) + const options = buildBaseClaudeOptions( + cwd, + requestOptions, + assistantDefaults, + controller, + stderrLines, + toolResultQueue, + env + ); + + // 2. Apply nodeConfig translation (re-applied per attempt since options are fresh) + if (requestOptions?.nodeConfig) { + await applyNodeConfig(options, requestOptions.nodeConfig, cwd); + } + + // 3. Set session resume + if (resumeSessionId) { + options.resume = resumeSessionId; + getLog().debug( + { sessionId: resumeSessionId, forkSession: requestOptions?.forkSession }, + 'resuming_session' + ); + } else { + getLog().debug({ cwd, attempt }, 'starting_new_session'); + } + + try { + // 4. Run query with first-event timeout protection + const rawEvents = query({ prompt, options }); + const timeoutMs = getFirstEventTimeoutMs(); + const diagnostics = buildFirstEventHangDiagnostics( + options.env as Record, + options.model + ); + const events = withFirstMessageTimeout(rawEvents, controller, timeoutMs, diagnostics); + + // 5. Stream normalized events + yield* streamClaudeMessages(events, toolResultQueue); + return; + } catch (error) { + const err = error as Error; + const { enrichedError, errorClass, shouldRetry } = classifyAndEnrichError( + err, + stderrLines, + controller + ); + + getLog().error( + { + err, + stderrContext: stderrLines.join('\n'), + errorClass, + attempt, + maxRetries: MAX_SUBPROCESS_RETRIES, + }, + 'query_error' + ); + + if (!shouldRetry || attempt >= MAX_SUBPROCESS_RETRIES) { + throw enrichedError; + } + + const delayMs = this.retryBaseDelayMs * Math.pow(2, attempt); + getLog().info({ attempt, delayMs, errorClass }, 'retrying_subprocess'); + await new Promise(resolve => setTimeout(resolve, delayMs)); + lastError = enrichedError; + } + } + + throw lastError ?? new Error('Claude Code query failed after retries'); + } + + getType(): string { + return 'claude'; + } +} diff --git a/packages/core/src/clients/codex-binary-guard.test.ts b/packages/providers/src/codex/binary-guard.test.ts similarity index 73% rename from packages/core/src/clients/codex-binary-guard.test.ts rename to packages/providers/src/codex/binary-guard.test.ts index c235caf5fd..891262cf47 100644 --- a/packages/core/src/clients/codex-binary-guard.test.ts +++ b/packages/providers/src/codex/binary-guard.test.ts @@ -2,7 +2,7 @@ * Tests for Codex binary resolution in compiled binary mode. * * Separate file because mock.module('@archon/paths') with BUNDLED_IS_BINARY=true - * conflicts with codex.test.ts which mocks it without BUNDLED_IS_BINARY. + * conflicts with provider.test.ts which mocks it without BUNDLED_IS_BINARY. * Must run in its own bun test invocation (see package.json test script). */ import { describe, test, expect, mock, beforeEach } from 'bun:test'; @@ -45,63 +45,35 @@ mock.module('@openai/codex-sdk', () => ({ Codex: MockCodex, })); -// Mock resolver — controls binary resolution behavior per test +// Mock resolver -- controls binary resolution behavior per test const mockResolveCodexBinaryPath = mock( (_configPath?: string): Promise => Promise.resolve('/tmp/test-archon/vendor/codex/codex') ); -mock.module('../utils/codex-binary-resolver', () => ({ +mock.module('./binary-resolver', () => ({ resolveCodexBinaryPath: mockResolveCodexBinaryPath, })); -// Config mock with configurable return value -const mockLoadConfig = mock(() => - Promise.resolve({ - allowTargetRepoKeys: false, - assistants: { codex: {} }, - }) -); - -// Mock db and config dependencies to prevent real DB access -mock.module('../db/codebases', () => ({ - findCodebaseByDefaultCwd: mock(() => Promise.resolve(null)), - findCodebaseByPathPrefix: mock(() => Promise.resolve(null)), -})); -mock.module('../config/config-loader', () => ({ - loadConfig: mockLoadConfig, -})); -mock.module('../utils/env-leak-scanner', () => ({ - scanPathForSensitiveKeys: mock(() => ({ findings: [] })), - EnvLeakError: class extends Error {}, -})); +import { CodexProvider, resetCodexSingleton } from './provider'; -import { CodexClient, resetCodexSingleton } from './codex'; - -describe('CodexClient binary mode resolution', () => { +describe('CodexProvider binary mode resolution', () => { beforeEach(() => { resetCodexSingleton(); MockCodex.mockClear(); mockStartThread.mockClear(); mockResolveCodexBinaryPath.mockClear(); - mockLoadConfig.mockClear(); capturedOptions = undefined; // Restore default mock implementations mockResolveCodexBinaryPath.mockImplementation(() => Promise.resolve('/tmp/test-archon/vendor/codex/codex') ); - mockLoadConfig.mockImplementation(() => - Promise.resolve({ - allowTargetRepoKeys: false, - assistants: { codex: {} }, - }) - ); }); test('passes resolved binary path to Codex constructor via codexPathOverride', async () => { mockResolveCodexBinaryPath.mockResolvedValueOnce('/custom/path/to/codex'); - const client = new CodexClient(); + const client = new CodexProvider(); const generator = client.sendQuery('test prompt', '/tmp/test'); // Consume events to trigger initialization @@ -118,7 +90,7 @@ describe('CodexClient binary mode resolution', () => { new Error('Codex native binary not found at /tmp/test-archon/vendor/codex/codex') ); - const client = new CodexClient(); + const client = new CodexProvider(); const generator = client.sendQuery('test prompt', '/tmp/test'); await expect(generator.next()).rejects.toThrow('Codex native binary not found'); @@ -129,7 +101,7 @@ describe('CodexClient binary mode resolution', () => { .mockRejectedValueOnce(new Error('Codex CLI binary not found')) .mockResolvedValueOnce('/tmp/test-archon/vendor/codex/codex'); - const client = new CodexClient(); + const client = new CodexProvider(); // First call fails await expect(client.sendQuery('test prompt', '/tmp/test').next()).rejects.toThrow( @@ -150,7 +122,7 @@ describe('CodexClient binary mode resolution', () => { test('does not pass codexPathOverride when resolver returns undefined', async () => { mockResolveCodexBinaryPath.mockResolvedValueOnce(undefined); - const client = new CodexClient(); + const client = new CodexProvider(); const generator = client.sendQuery('test prompt', '/tmp/test'); for await (const _chunk of generator) { @@ -161,15 +133,12 @@ describe('CodexClient binary mode resolution', () => { expect(capturedOptions?.codexPathOverride).toBeUndefined(); }); - test('passes config codexBinaryPath to resolver', async () => { - mockLoadConfig.mockResolvedValueOnce({ - allowTargetRepoKeys: false, - assistants: { codex: { codexBinaryPath: '/user/custom/codex' } }, + test('passes config codexBinaryPath to resolver via assistantConfig', async () => { + const client = new CodexProvider(); + const generator = client.sendQuery('test prompt', '/tmp/test', undefined, { + assistantConfig: { codexBinaryPath: '/user/custom/codex' }, }); - const client = new CodexClient(); - const generator = client.sendQuery('test prompt', '/tmp/test'); - for await (const _chunk of generator) { // drain } diff --git a/packages/core/src/utils/codex-binary-resolver-dev.test.ts b/packages/providers/src/codex/binary-resolver-dev.test.ts similarity index 92% rename from packages/core/src/utils/codex-binary-resolver-dev.test.ts rename to packages/providers/src/codex/binary-resolver-dev.test.ts index ac8761ee02..9635d8d59c 100644 --- a/packages/core/src/utils/codex-binary-resolver-dev.test.ts +++ b/packages/providers/src/codex/binary-resolver-dev.test.ts @@ -11,7 +11,7 @@ mock.module('@archon/paths', () => ({ getArchonHome: mock(() => '/tmp/test-archon-home'), })); -import { resolveCodexBinaryPath } from './codex-binary-resolver'; +import { resolveCodexBinaryPath } from './binary-resolver'; describe('resolveCodexBinaryPath (dev mode)', () => { test('returns undefined when BUNDLED_IS_BINARY is false', async () => { diff --git a/packages/core/src/utils/codex-binary-resolver.test.ts b/packages/providers/src/codex/binary-resolver.test.ts similarity index 98% rename from packages/core/src/utils/codex-binary-resolver.test.ts rename to packages/providers/src/codex/binary-resolver.test.ts index 3425a6fa17..1df4e7c6f6 100644 --- a/packages/core/src/utils/codex-binary-resolver.test.ts +++ b/packages/providers/src/codex/binary-resolver.test.ts @@ -16,7 +16,7 @@ mock.module('@archon/paths', () => ({ getArchonHome: mock(() => '/tmp/test-archon-home'), })); -import * as resolver from './codex-binary-resolver'; +import * as resolver from './binary-resolver'; describe('resolveCodexBinaryPath (binary mode)', () => { const originalEnv = process.env.CODEX_BIN_PATH; diff --git a/packages/core/src/utils/codex-binary-resolver.ts b/packages/providers/src/codex/binary-resolver.ts similarity index 96% rename from packages/core/src/utils/codex-binary-resolver.ts rename to packages/providers/src/codex/binary-resolver.ts index e927918c95..a1e0f01a5b 100644 --- a/packages/core/src/utils/codex-binary-resolver.ts +++ b/packages/providers/src/codex/binary-resolver.ts @@ -5,9 +5,6 @@ * native Codex CLI binary, which breaks in compiled binaries where * `import.meta.url` is frozen to the build host's path. * - * This module resolves an alternative path and passes it to the SDK's - * `codexPathOverride` constructor option, bypassing the broken resolution. - * * Resolution order: * 1. `CODEX_BIN_PATH` environment variable * 2. `assistants.codex.codexBinaryPath` in config diff --git a/packages/providers/src/codex/config.ts b/packages/providers/src/codex/config.ts new file mode 100644 index 0000000000..f8d6f2d7e6 --- /dev/null +++ b/packages/providers/src/codex/config.ts @@ -0,0 +1,46 @@ +/** + * Typed config parsing for Codex provider defaults. + * Validates and narrows the opaque assistantConfig to typed fields. + */ +import type { CodexProviderDefaults } from '../types'; + +// Re-export so consumers can import the type from either location +export type { CodexProviderDefaults } from '../types'; + +/** + * Parse raw assistantConfig into typed Codex defaults. + * Defensive: invalid fields are silently dropped. + */ +export function parseCodexConfig(raw: Record): CodexProviderDefaults { + const result: CodexProviderDefaults = {}; + + if (typeof raw.model === 'string') { + result.model = raw.model; + } + + const validEfforts = ['minimal', 'low', 'medium', 'high', 'xhigh']; + if ( + typeof raw.modelReasoningEffort === 'string' && + validEfforts.includes(raw.modelReasoningEffort) + ) { + result.modelReasoningEffort = + raw.modelReasoningEffort as CodexProviderDefaults['modelReasoningEffort']; + } + + const validSearchModes = ['disabled', 'cached', 'live']; + if (typeof raw.webSearchMode === 'string' && validSearchModes.includes(raw.webSearchMode)) { + result.webSearchMode = raw.webSearchMode as CodexProviderDefaults['webSearchMode']; + } + + if (Array.isArray(raw.additionalDirectories)) { + result.additionalDirectories = raw.additionalDirectories.filter( + (d): d is string => typeof d === 'string' + ); + } + + if (typeof raw.codexBinaryPath === 'string') { + result.codexBinaryPath = raw.codexBinaryPath; + } + + return result; +} diff --git a/packages/providers/src/codex/index.ts b/packages/providers/src/codex/index.ts new file mode 100644 index 0000000000..71302f6884 --- /dev/null +++ b/packages/providers/src/codex/index.ts @@ -0,0 +1,3 @@ +export { CodexProvider, resetCodexSingleton } from './provider'; +export { parseCodexConfig, type CodexProviderDefaults } from './config'; +export { resolveCodexBinaryPath, fileExists } from './binary-resolver'; diff --git a/packages/core/src/clients/codex.test.ts b/packages/providers/src/codex/provider.test.ts similarity index 74% rename from packages/core/src/clients/codex.test.ts rename to packages/providers/src/codex/provider.test.ts index cfa329e7c1..a92134dab6 100644 --- a/packages/core/src/clients/codex.test.ts +++ b/packages/providers/src/codex/provider.test.ts @@ -1,4 +1,4 @@ -import { describe, test, expect, mock, beforeEach, afterEach, spyOn } from 'bun:test'; +import { describe, test, expect, mock, beforeEach } from 'bun:test'; import { createMockLogger } from '../test/mocks/logger'; const mockLogger = createMockLogger(); @@ -39,15 +39,13 @@ mock.module('@openai/codex-sdk', () => ({ Codex: MockCodex, })); -import { CodexClient } from './codex'; -import * as codebaseDb from '../db/codebases'; -import * as envLeakScanner from '../utils/env-leak-scanner'; +import { CodexProvider } from './provider'; -describe('CodexClient', () => { - let client: CodexClient; +describe('CodexProvider', () => { + let client: CodexProvider; beforeEach(() => { - client = new CodexClient({ retryBaseDelayMs: 1 }); + client = new CodexProvider({ retryBaseDelayMs: 1 }); mockStartThread.mockClear(); mockResumeThread.mockClear(); mockRunStreamed.mockClear(); @@ -67,6 +65,26 @@ describe('CodexClient', () => { }); }); + describe('getCapabilities', () => { + test('returns limited capability set for Codex provider', () => { + const caps = client.getCapabilities(); + expect(caps).toEqual({ + sessionResume: true, + mcp: false, + hooks: false, + skills: false, + toolRestrictions: false, + structuredOutput: true, + envInjection: false, + costControl: false, + effortControl: false, + thinkingControl: false, + fallbackModel: false, + sandbox: false, + }); + }); + }); + describe('sendQuery', () => { test('yields text events from agent_message items', async () => { mockRunStreamed.mockResolvedValue({ @@ -114,8 +132,6 @@ describe('CodexClient', () => { chunks.push(chunk); } - // Codex item.completed fires once the command is fully done, so we emit - // start + result back-to-back to close the UI tool card immediately. expect(chunks[0]).toEqual({ type: 'tool', toolName: 'npm test' }); expect(chunks[1]).toEqual({ type: 'tool_result', @@ -184,10 +200,10 @@ describe('CodexClient', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔍 Searching: codex sdk' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50D} Searching: codex sdk' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔍 Searching: codex sdk', + toolName: '\u{1F50D} Searching: codex sdk', toolOutput: '', }); }); @@ -216,7 +232,7 @@ describe('CodexClient', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '📋 Tasks:\n✅ Scan repo\n⬜ Add tests', + content: '\u{1F4CB} Tasks:\n\u2705 Scan repo\n\u2B1C Add tests', }); expect(chunks).toHaveLength(2); }); @@ -253,11 +269,11 @@ describe('CodexClient', () => { expect(chunks).toHaveLength(3); // todoV1 + todoV2 + result expect(chunks[0]).toEqual({ type: 'system', - content: '📋 Tasks:\n⬜ Scan repo\n⬜ Add tests', + content: '\u{1F4CB} Tasks:\n\u2B1C Scan repo\n\u2B1C Add tests', }); expect(chunks[1]).toEqual({ type: 'system', - content: '📋 Tasks:\n✅ Scan repo\n⬜ Add tests', + content: '\u{1F4CB} Tasks:\n\u2705 Scan repo\n\u2B1C Add tests', }); }); @@ -287,7 +303,7 @@ describe('CodexClient', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '✅ File changes:\n➕ src/new.ts\n📝 src/app.ts\n➖ src/old.ts', + content: '\u2705 File changes:\n\u2795 src/new.ts\n\u{1F4DD} src/app.ts\n\u2796 src/old.ts', }); }); @@ -314,7 +330,7 @@ describe('CodexClient', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '❌ File changes:\n📝 src/locked.ts\nPermission denied', + content: '\u274C File changes:\n\u{1F4DD} src/locked.ts\nPermission denied', }); }); @@ -340,7 +356,7 @@ describe('CodexClient', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '❌ File change failed: Disk full', + content: '\u274C File change failed: Disk full', }); expect(mockLogger.warn).toHaveBeenCalledWith( expect.objectContaining({ status: 'failed' }), @@ -366,7 +382,7 @@ describe('CodexClient', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '❌ File change failed', + content: '\u274C File change failed', }); }); @@ -397,18 +413,18 @@ describe('CodexClient', () => { } // First mcp call (in_progress on item.completed): start + empty result - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: fs/readFile' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: fs/readFile', + toolName: '\u{1F50C} MCP: fs/readFile', toolOutput: '', }); // Second mcp call (failed): start + error result so the UI card closes - expect(chunks[2]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' }); + expect(chunks[2]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: fs/readFile' }); expect(chunks[3]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: fs/readFile', - toolOutput: '❌ Error: Permission denied', + toolName: '\u{1F50C} MCP: fs/readFile', + toolOutput: '\u274C Error: Permission denied', }); expect(mockLogger.warn).toHaveBeenCalledWith( expect.objectContaining({ server: 'fs', tool: 'readFile' }), @@ -440,19 +456,22 @@ describe('CodexClient', () => { chunks.push(chunk); } - // Each item now emits start + empty result so the UI cards always close. - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: readFile' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: readFile' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: readFile', + toolName: '\u{1F50C} MCP: readFile', + toolOutput: '', + }); + expect(chunks[2]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: fs' }); + expect(chunks[3]).toEqual({ + type: 'tool_result', + toolName: '\u{1F50C} MCP: fs', toolOutput: '', }); - expect(chunks[2]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs' }); - expect(chunks[3]).toEqual({ type: 'tool_result', toolName: '🔌 MCP: fs', toolOutput: '' }); - expect(chunks[4]).toEqual({ type: 'tool', toolName: '🔌 MCP: MCP tool' }); + expect(chunks[4]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: MCP tool' }); expect(chunks[5]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: MCP tool', + toolName: '\u{1F50C} MCP: MCP tool', toolOutput: '', }); }); @@ -473,11 +492,11 @@ describe('CodexClient', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: db/query' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: db/query' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: db/query', - toolOutput: '❌ Error: MCP tool failed', + toolName: '\u{1F50C} MCP: db/query', + toolOutput: '\u274C Error: MCP tool failed', }); }); @@ -503,12 +522,11 @@ describe('CodexClient', () => { chunks.push(chunk); } - // Completed MCP calls now emit tool + tool_result so the UI card closes. expect(chunks).toHaveLength(3); - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: fs/readFile' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: fs/readFile', + toolName: '\u{1F50C} MCP: fs/readFile', toolOutput: JSON.stringify([{ type: 'text', text: 'file contents' }]), }); expect(chunks[2]).toEqual({ @@ -525,7 +543,6 @@ describe('CodexClient', () => { })(), }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test prompt', '/my/workspace')) { // consume } @@ -548,7 +565,6 @@ describe('CodexClient', () => { })(), }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test prompt', '/workspace', 'existing-thread')) { // consume } @@ -585,7 +601,6 @@ describe('CodexClient', () => { } expect(mockResumeThread).toHaveBeenCalled(); - // Verify fallback startThread is called with correct config options expect(mockStartThread).toHaveBeenCalledWith( expect.objectContaining({ workingDirectory: '/workspace', @@ -595,7 +610,6 @@ describe('CodexClient', () => { approvalPolicy: 'never', }) ); - // Verify error was logged expect(mockLogger.error).toHaveBeenCalledWith( { err: resumeError, sessionId: 'bad-thread-id' }, 'resume_thread_failed' @@ -612,19 +626,20 @@ describe('CodexClient', () => { }); }); - test('passes model and codex options to thread options', async () => { + test('passes model and codex options via assistantConfig to thread options', async () => { mockRunStreamed.mockResolvedValue({ events: (async function* () { yield { type: 'turn.completed', usage: defaultUsage }; })(), }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test prompt', '/workspace', undefined, { model: 'gpt-5.2-codex', - modelReasoningEffort: 'medium', - webSearchMode: 'live', - additionalDirectories: ['/other/repo'], + assistantConfig: { + modelReasoningEffort: 'medium', + webSearchMode: 'live', + additionalDirectories: ['/other/repo'], + }, })) { // consume } @@ -740,13 +755,11 @@ describe('CodexClient', () => { chunks.push(chunk); } - // Verify item.started logging with correct format expect(mockLogger.debug).toHaveBeenCalledWith( { eventType: 'item.started', itemType: 'command_execution', itemId: 'item-1' }, 'item_started' ); - // Verify item.completed logging includes command context expect(mockLogger.debug).toHaveBeenCalledWith( { eventType: 'item.completed', @@ -771,7 +784,7 @@ describe('CodexClient', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'system', content: '⚠️ Something went wrong' }); + expect(chunks[0]).toEqual({ type: 'system', content: '\u26A0\uFE0F Something went wrong' }); expect(mockLogger.error).toHaveBeenCalledWith( { message: 'Something went wrong' }, 'stream_error' @@ -818,7 +831,10 @@ describe('CodexClient', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'system', content: '❌ Turn failed: Rate limit exceeded' }); + expect(chunks[0]).toEqual({ + type: 'system', + content: '\u274C Turn failed: Rate limit exceeded', + }); expect(mockLogger.error).toHaveBeenCalledWith( { errorMessage: 'Rate limit exceeded' }, 'turn_failed' @@ -837,7 +853,10 @@ describe('CodexClient', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'system', content: '❌ Turn failed: Unknown error' }); + expect(chunks[0]).toEqual({ + type: 'system', + content: '\u274C Turn failed: Unknown error', + }); expect(mockLogger.error).toHaveBeenCalledWith( { errorMessage: 'Unknown error' }, 'turn_failed' @@ -1001,109 +1020,212 @@ describe('CodexClient', () => { expect(mockRunStreamed).toHaveBeenCalledTimes(1); }); }); - }); - describe('pre-spawn env leak gate', () => { - let spyFindByDefaultCwd: ReturnType; - let spyFindByPathPrefix: ReturnType; - let spyScan: ReturnType; + describe('structured output normalization', () => { + test('populates structuredOutput on result when outputFormat is set and text is valid JSON', async () => { + const jsonPayload = { status: 'ok', count: 42 }; + mockRunStreamed.mockResolvedValueOnce({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', id: 'msg-1', text: JSON.stringify(jsonPayload) }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); - beforeEach(() => { - // Restore a working runStreamed default so retry-test bleed doesn't break gate tests - mockRunStreamed.mockResolvedValue({ - events: (async function* () { - yield { type: 'turn.completed', usage: defaultUsage }; - })(), - }); - spyFindByDefaultCwd = spyOn(codebaseDb, 'findCodebaseByDefaultCwd').mockResolvedValue(null); - spyFindByPathPrefix = spyOn(codebaseDb, 'findCodebaseByPathPrefix').mockResolvedValue(null); - spyScan = spyOn(envLeakScanner, 'scanPathForSensitiveKeys').mockReturnValue({ - path: '/workspace', - findings: [], + const chunks = []; + for await (const chunk of client.sendQuery('test', '/tmp', undefined, { + outputFormat: { type: 'json_schema', schema: { type: 'object' } }, + })) { + chunks.push(chunk); + } + + const resultChunk = chunks.find(c => c.type === 'result'); + expect(resultChunk).toBeDefined(); + expect(resultChunk!.type === 'result' && resultChunk!.structuredOutput).toEqual( + jsonPayload + ); }); - }); - afterEach(() => { - spyFindByDefaultCwd.mockRestore(); - spyFindByPathPrefix.mockRestore(); - spyScan.mockRestore(); - }); + test('yields system warning when outputFormat is set but text is not valid JSON', async () => { + mockRunStreamed.mockResolvedValueOnce({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', id: 'msg-1', text: 'not json at all' }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/tmp', undefined, { + outputFormat: { type: 'json_schema', schema: { type: 'object' } }, + })) { + chunks.push(chunk); + } + + const systemChunk = chunks.find(c => c.type === 'system'); + expect(systemChunk).toBeDefined(); + expect(systemChunk!.type === 'system' && systemChunk!.content).toContain( + 'Structured output requested but Codex returned non-JSON' + ); - test('throws EnvLeakError when .env contains sensitive keys and registered codebase has no consent', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: false, - default_cwd: '/workspace', + const resultChunk = chunks.find(c => c.type === 'result'); + expect(resultChunk).toBeDefined(); + expect(resultChunk!.type === 'result' && resultChunk!.structuredOutput).toBeUndefined(); }); - spyScan.mockReturnValueOnce({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], + + test('does not populate structuredOutput when outputFormat is not set', async () => { + mockRunStreamed.mockResolvedValueOnce({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', id: 'msg-1', text: '{"valid":"json"}' }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/tmp')) { + chunks.push(chunk); + } + + const resultChunk = chunks.find(c => c.type === 'result'); + expect(resultChunk).toBeDefined(); + expect(resultChunk!.type === 'result' && resultChunk!.structuredOutput).toBeUndefined(); }); - const consumeGenerator = async (): Promise => { - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume + test('handles nodeConfig.output_format path', async () => { + const jsonPayload = { key: 'value' }; + mockRunStreamed.mockResolvedValueOnce({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', id: 'msg-1', text: JSON.stringify(jsonPayload) }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/tmp', undefined, { + nodeConfig: { output_format: { type: 'object' } }, + })) { + chunks.push(chunk); } - }; - await expect(consumeGenerator()).rejects.toThrow('Cannot run workflow'); + const resultChunk = chunks.find(c => c.type === 'result'); + expect(resultChunk).toBeDefined(); + expect(resultChunk!.type === 'result' && resultChunk!.structuredOutput).toEqual( + jsonPayload + ); + }); }); + }); +}); - test('skips scan entirely when cwd is not a registered codebase', async () => { - // Both lookups return null (default from beforeEach). Pre-spawn safety net - // is only for registered codebases; unregistered paths go through registerRepoAtPath. - spyScan.mockReturnValue({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); +// ─── Behavioral regression tests (black-box via sendQuery) ─────────────── - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } +describe('sendQuery decomposition behaviors', () => { + let client: CodexProvider; - expect(spyScan).not.toHaveBeenCalled(); - }); + beforeEach(() => { + client = new CodexProvider({ retryBaseDelayMs: 1 }); + mockStartThread.mockClear(); + mockResumeThread.mockClear(); + mockRunStreamed.mockClear(); + mockLogger.info.mockClear(); + mockLogger.warn.mockClear(); + mockLogger.error.mockClear(); + mockLogger.debug.mockClear(); - test('skips scan when codebase has allow_env_keys: true', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: true, - default_cwd: '/workspace', - }); + mockStartThread.mockReturnValue(createMockThread('new-thread-id')); + mockResumeThread.mockReturnValue(createMockThread('resumed-thread-id')); + }); - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } + test('abort signal throws instead of silently truncating stream', async () => { + const abortController = new AbortController(); - expect(spyScan).not.toHaveBeenCalled(); + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', text: 'partial', id: '1' }, + }; + // Abort mid-stream + abortController.abort(); + yield { + type: 'item.completed', + item: { type: 'agent_message', text: 'should not appear', id: '2' }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), }); - test('proceeds without scanning when cwd has no registered codebase', async () => { - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); + const consumeGenerator = async (): Promise => { + for await (const _ of client.sendQuery('test', '/workspace', undefined, { + abortSignal: abortController.signal, + })) { + // consume } + }; - expect(spyScan).not.toHaveBeenCalled(); - }); + await expect(consumeGenerator()).rejects.toThrow('Query aborted'); + }); - test('uses prefix lookup for worktree paths when exact match returns null', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce(null); - spyFindByPathPrefix.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: true, - default_cwd: '/workspace/source', - }); + test('enriched error thrown at retry exhaustion, not raw error', async () => { + mockRunStreamed.mockRejectedValue(new Error('codex exec crashed')); - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace/worktrees/feature')) { - chunks.push(chunk); + const consumeGenerator = async (): Promise => { + for await (const _ of client.sendQuery('test', '/workspace')) { + // consume } - - expect(spyFindByPathPrefix).toHaveBeenCalledWith('/workspace/worktrees/feature'); - expect(spyScan).not.toHaveBeenCalled(); + }; + + const err = await consumeGenerator().catch((e: unknown) => e as Error); + expect(err).toBeInstanceOf(Error); + // Must contain the enriched classification prefix + expect(err.message).toContain('Codex crash'); + }, 5_000); + + test('todo_list dedup state resets between retry attempts', async () => { + const todoItem = { + type: 'todo_list', + items: [{ text: 'Task 1', completed: false }], + id: 'todo-1', + }; + + let callCount = 0; + mockRunStreamed.mockImplementation(() => { + callCount++; + if (callCount === 1) { + return Promise.resolve({ + events: (async function* () { + yield { type: 'item.completed', item: todoItem }; + throw new Error('codex exec crashed'); + })(), + }); + } + // On retry, same todo should appear again (fresh state) + return Promise.resolve({ + events: (async function* () { + yield { type: 'item.completed', item: todoItem }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); }); - }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + // The todo should appear on the retry attempt (not suppressed by dedup from attempt 1) + const systemChunks = chunks.filter(c => c.type === 'system'); + expect(systemChunks.length).toBeGreaterThanOrEqual(1); + expect(systemChunks.some(c => c.type === 'system' && c.content.includes('Task 1'))).toBe(true); + }, 5_000); }); diff --git a/packages/providers/src/codex/provider.ts b/packages/providers/src/codex/provider.ts new file mode 100644 index 0000000000..de9ffd13f4 --- /dev/null +++ b/packages/providers/src/codex/provider.ts @@ -0,0 +1,613 @@ +/** + * Codex SDK wrapper + * Provides async generator interface for streaming Codex responses + */ +import { + Codex, + type ThreadOptions, + type TurnOptions, + type TurnCompletedEvent, +} from '@openai/codex-sdk'; +import type { + IAgentProvider, + SendQueryOptions, + MessageChunk, + TokenUsage, + ProviderCapabilities, +} from '../types'; +import { parseCodexConfig } from './config'; +import { resolveCodexBinaryPath } from './binary-resolver'; +import { createLogger } from '@archon/paths'; + +/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('provider.codex'); + return cachedLog; +} + +// Singleton Codex instance (async because binary path resolution is async) +let codexInstance: Codex | null = null; +let codexInitPromise: Promise | null = null; + +/** Reset singleton state. Exported for tests only. */ +export function resetCodexSingleton(): void { + codexInstance = null; + codexInitPromise = null; +} + +/** + * Get or create Codex SDK instance. + */ +async function getCodex(configCodexBinaryPath?: string): Promise { + if (codexInstance) return codexInstance; + + if (!codexInitPromise) { + codexInitPromise = (async (): Promise => { + const codexPathOverride = await resolveCodexBinaryPath(configCodexBinaryPath); + const instance = new Codex({ codexPathOverride }); + codexInstance = instance; + return instance; + })().catch(err => { + codexInitPromise = null; + throw err; + }); + } + return codexInitPromise; +} + +/** + * Build thread options for Codex SDK + */ +function buildThreadOptions( + cwd: string, + model?: string, + assistantConfig?: Record +): ThreadOptions { + const config = parseCodexConfig(assistantConfig ?? {}); + return { + workingDirectory: cwd, + skipGitRepoCheck: true, + sandboxMode: 'danger-full-access', + networkAccessEnabled: true, + approvalPolicy: 'never', + model: model ?? config.model, + modelReasoningEffort: config.modelReasoningEffort, + webSearchMode: config.webSearchMode, + additionalDirectories: config.additionalDirectories, + }; +} + +const CODEX_MODEL_FALLBACKS: Record = { + 'gpt-5.3-codex': 'gpt-5.2-codex', +}; + +function isModelAccessError(errorMessage: string): boolean { + const m = errorMessage.toLowerCase(); + const hasModel = m.includes('model'); + const hasAvailabilitySignal = + m.includes('not available') || m.includes('not found') || m.includes('access denied'); + return hasModel && hasAvailabilitySignal; +} + +function buildModelAccessMessage(model?: string): string { + const normalizedModel = model?.trim(); + const selectedModel = normalizedModel || 'the configured model'; + const suggested = normalizedModel ? CODEX_MODEL_FALLBACKS[normalizedModel] : undefined; + + const fixLine = suggested + ? `To fix: update your model in ~/.archon/config.yaml:\n assistants:\n codex:\n model: ${suggested}` + : 'To fix: update your model in ~/.archon/config.yaml to one your account can access.'; + + const workflowLine = suggested + ? `Or set it per-workflow with \`model: ${suggested}\` in workflow YAML.` + : 'Or set it per-workflow with a valid `model:` in workflow YAML.'; + + return `❌ Model "${selectedModel}" is not available for your account.\n\n${fixLine}\n\n${workflowLine}`; +} + +const MAX_SUBPROCESS_RETRIES = 3; +const RETRY_BASE_DELAY_MS = 2000; +const RATE_LIMIT_PATTERNS = ['rate limit', 'too many requests', '429', 'overloaded']; +const AUTH_PATTERNS = [ + 'credit balance', + 'unauthorized', + 'authentication', + 'invalid token', + '401', + '403', +]; +const SUBPROCESS_CRASH_PATTERNS = ['exited with code', 'killed', 'signal', 'codex exec']; + +function classifyCodexError( + errorMessage: string +): 'rate_limit' | 'auth' | 'crash' | 'model_access' | 'unknown' { + if (isModelAccessError(errorMessage)) return 'model_access'; + const m = errorMessage.toLowerCase(); + if (RATE_LIMIT_PATTERNS.some(p => m.includes(p))) return 'rate_limit'; + if (AUTH_PATTERNS.some(p => m.includes(p))) return 'auth'; + if (SUBPROCESS_CRASH_PATTERNS.some(p => m.includes(p))) return 'crash'; + return 'unknown'; +} + +function extractUsageFromCodexEvent(event: TurnCompletedEvent): TokenUsage { + if (!event.usage) { + getLog().warn({ eventType: event.type }, 'codex.usage_null_on_turn_completed'); + return { input: 0, output: 0 }; + } + return { + input: event.usage.input_tokens, + output: event.usage.output_tokens, + }; +} + +// ─── Turn Options Builder ──────────────────────────────────────────────── + +/** + * Build turn options for a single Codex turn. + * Handles output schema from both requestOptions and nodeConfig (workflow path). + */ +function buildTurnOptions(requestOptions?: SendQueryOptions): { + turnOptions: TurnOptions; + hasOutputFormat: boolean; +} { + const turnOptions: TurnOptions = {}; + const hasOutputFormat = !!( + requestOptions?.outputFormat ?? requestOptions?.nodeConfig?.output_format + ); + if (requestOptions?.outputFormat) { + turnOptions.outputSchema = requestOptions.outputFormat.schema; + } + if (requestOptions?.nodeConfig?.output_format && !requestOptions?.outputFormat) { + turnOptions.outputSchema = requestOptions.nodeConfig.output_format; + } + if (requestOptions?.abortSignal) { + turnOptions.signal = requestOptions.abortSignal; + } + return { turnOptions, hasOutputFormat }; +} + +// ─── Stream Normalizer ─────────────────────────────────────────────────── + +/** State maintained across Codex event stream normalization. */ +interface CodexStreamState { + lastTodoListSignature?: string; +} + +/** + * Normalize raw Codex SDK events into Archon MessageChunks. + * Handles structured output normalization (Codex returns JSON inline in text). + */ +async function* streamCodexEvents( + events: AsyncIterable>, + hasOutputFormat: boolean, + threadId: string | null | undefined, + abortSignal?: AbortSignal +): AsyncGenerator { + const state: CodexStreamState = {}; + let accumulatedText = ''; + + for await (const event of events) { + if (abortSignal?.aborted) { + getLog().info('query_aborted_between_events'); + throw new Error('Query aborted'); + } + + if (event.type === 'item.started') { + const item = event.item as { type: string; id: string }; + getLog().debug( + { eventType: event.type, itemType: item.type, itemId: item.id }, + 'item_started' + ); + } + + if (event.type === 'error') { + const errorEvent = event as { message: string }; + getLog().error({ message: errorEvent.message }, 'stream_error'); + if (!errorEvent.message.includes('MCP client')) { + yield { type: 'system', content: `⚠️ ${errorEvent.message}` }; + } + continue; + } + + if (event.type === 'turn.failed') { + const errorObj = (event as { error?: { message?: string } }).error; + const errorMessage = errorObj?.message ?? 'Unknown error'; + getLog().error({ errorMessage }, 'turn_failed'); + yield { type: 'system', content: `❌ Turn failed: ${errorMessage}` }; + break; + } + + if (event.type === 'item.completed') { + const item = event.item as Record; + const itemType = item.type as string; + + const logContext: Record = { + eventType: event.type, + itemType, + itemId: item.id, + }; + if (itemType === 'command_execution' && item.command) { + logContext.command = item.command; + } + getLog().debug(logContext, 'item_completed'); + + switch (itemType) { + case 'agent_message': + if (item.text) { + if (hasOutputFormat) accumulatedText += item.text as string; + yield { type: 'assistant', content: item.text as string }; + } + break; + + case 'command_execution': + if (item.command) { + const cmd = item.command as string; + yield { type: 'tool', toolName: cmd }; + const exitCode = item.exit_code as number | null | undefined; + const exitSuffix = + exitCode != null && exitCode !== 0 ? `\n[exit code: ${String(exitCode)}]` : ''; + yield { + type: 'tool_result', + toolName: cmd, + toolOutput: ((item.aggregated_output as string) ?? '') + exitSuffix, + }; + } else { + getLog().warn({ itemId: item.id }, 'command_execution_missing_command'); + } + break; + + case 'reasoning': + if (item.text) { + yield { type: 'thinking', content: item.text as string }; + } + break; + + case 'web_search': + if (item.query) { + const searchToolName = `🔍 Searching: ${item.query as string}`; + yield { type: 'tool', toolName: searchToolName }; + yield { type: 'tool_result', toolName: searchToolName, toolOutput: '' }; + } else { + getLog().debug({ itemId: item.id }, 'web_search_missing_query'); + } + break; + + case 'todo_list': { + const items = item.items as { text?: string; completed?: boolean }[] | undefined; + if (Array.isArray(items) && items.length > 0) { + const normalizedItems = items.map(t => ({ + text: typeof t.text === 'string' ? t.text : '(unnamed task)', + completed: t.completed ?? false, + })); + const signature = JSON.stringify(normalizedItems); + if (signature !== state.lastTodoListSignature) { + state.lastTodoListSignature = signature; + const taskList = normalizedItems + .map(t => `${t.completed ? '✅' : '⬜'} ${t.text}`) + .join('\n'); + yield { type: 'system', content: `📋 Tasks:\n${taskList}` }; + } + } else { + getLog().debug({ itemId: item.id }, 'todo_list_empty_or_invalid'); + } + break; + } + + case 'file_change': { + const statusIcon = (item.status as string) === 'failed' ? '❌' : '✅'; + const rawError = 'error' in item ? (item as { error?: unknown }).error : undefined; + const fileErrorMessage = + typeof rawError === 'string' + ? rawError + : typeof rawError === 'object' && rawError !== null && 'message' in rawError + ? String((rawError as { message: unknown }).message) + : undefined; + + const changes = item.changes as { kind: string; path?: string }[] | undefined; + if (Array.isArray(changes) && changes.length > 0) { + const changeList = changes + .map(c => { + const icon = c.kind === 'add' ? '➕' : c.kind === 'delete' ? '➖' : '📝'; + return `${icon} ${c.path ?? '(unknown file)'}`; + }) + .join('\n'); + const errorSuffix = + (item.status as string) === 'failed' && fileErrorMessage + ? `\n${fileErrorMessage}` + : ''; + yield { + type: 'system', + content: `${statusIcon} File changes:\n${changeList}${errorSuffix}`, + }; + } else if ((item.status as string) === 'failed') { + getLog().warn( + { itemId: item.id, status: item.status }, + 'file_change_failed_no_changes' + ); + const failMsg = fileErrorMessage + ? `❌ File change failed: ${fileErrorMessage}` + : '❌ File change failed'; + yield { type: 'system', content: failMsg }; + } else { + getLog().debug({ itemId: item.id, status: item.status }, 'file_change_no_changes'); + } + break; + } + + case 'mcp_tool_call': { + const server = item.server as string | undefined; + const tool = item.tool as string | undefined; + const toolInfo = server && tool ? `${server}/${tool}` : (tool ?? server ?? 'MCP tool'); + const mcpToolName = `🔌 MCP: ${toolInfo}`; + + yield { type: 'tool', toolName: mcpToolName }; + + if ((item.status as string) === 'failed') { + getLog().warn( + { server, tool, error: item.error, itemId: item.id }, + 'mcp_tool_call_failed' + ); + const mcpError = item.error as { message?: string } | undefined; + const errMsg = mcpError?.message + ? `❌ Error: ${mcpError.message}` + : '❌ Error: MCP tool failed'; + yield { type: 'tool_result', toolName: mcpToolName, toolOutput: errMsg }; + } else { + let toolOutput = ''; + const mcpResult = item.result as { content?: unknown } | undefined; + if (mcpResult?.content) { + if (Array.isArray(mcpResult.content)) { + toolOutput = JSON.stringify(mcpResult.content); + } else { + getLog().warn( + { + itemId: item.id, + server, + tool, + resultType: typeof mcpResult.content, + }, + 'mcp_tool_call_unexpected_result_shape' + ); + } + } + yield { type: 'tool_result', toolName: mcpToolName, toolOutput }; + } + break; + } + } + } + + if (event.type === 'turn.completed') { + getLog().debug('turn_completed'); + const usage = extractUsageFromCodexEvent(event as TurnCompletedEvent); + + // Codex returns structured output inline in agent_message text. + // Normalize: parse as JSON and put on structuredOutput so the + // dag-executor can handle all providers uniformly. + let structuredOutput: unknown; + if (hasOutputFormat && accumulatedText) { + try { + structuredOutput = JSON.parse(accumulatedText); + getLog().debug('codex.structured_output_parsed'); + } catch { + getLog().warn( + { outputPreview: accumulatedText.slice(0, 200) }, + 'codex.structured_output_not_json' + ); + yield { + type: 'system', + content: + '⚠️ Structured output requested but Codex returned non-JSON text. ' + + 'Downstream $nodeId.output.field references may not evaluate correctly.', + }; + } + } + + yield { + type: 'result', + sessionId: threadId ?? undefined, + tokens: usage, + ...(structuredOutput !== undefined ? { structuredOutput } : {}), + }; + break; + } + } +} + +// ─── Error Classification & Retry ──────────────────────────────────────── + +/** + * Classify a Codex error and determine retry eligibility. + */ +function classifyAndEnrichCodexError( + error: Error, + model?: string +): { enrichedError: Error; errorClass: string; shouldRetry: boolean } { + const errorClass = classifyCodexError(error.message); + + if (errorClass === 'model_access') { + return { + enrichedError: new Error(buildModelAccessMessage(model)), + errorClass, + shouldRetry: false, + }; + } + + if (errorClass === 'auth') { + const enrichedError = new Error(`Codex auth error: ${error.message}`); + enrichedError.cause = error; + return { enrichedError, errorClass, shouldRetry: false }; + } + + const enrichedError = new Error(`Codex ${errorClass}: ${error.message}`); + enrichedError.cause = error; + const shouldRetry = errorClass === 'rate_limit' || errorClass === 'crash'; + return { enrichedError, errorClass, shouldRetry }; +} + +// ─── Codex Provider ────────────────────────────────────────────────────── + +/** + * Codex AI agent provider. + * Implements IAgentProvider with Codex SDK integration. + * + * sendQuery orchestrates the following internal helpers: + * - buildThreadOptions: SDK thread configuration + * - buildTurnOptions: per-turn configuration (output schema, abort signal) + * - streamCodexEvents: raw SDK event normalization into MessageChunks + * - classifyAndEnrichCodexError: error classification for retry decisions + */ +export class CodexProvider implements IAgentProvider { + private readonly retryBaseDelayMs: number; + + constructor(options?: { retryBaseDelayMs?: number }) { + this.retryBaseDelayMs = options?.retryBaseDelayMs ?? RETRY_BASE_DELAY_MS; + } + + getCapabilities(): ProviderCapabilities { + return { + sessionResume: true, + mcp: false, + hooks: false, + skills: false, + toolRestrictions: false, + structuredOutput: true, + envInjection: false, + costControl: false, + effortControl: false, + thinkingControl: false, + fallbackModel: false, + sandbox: false, + }; + } + + // Env safety: Codex inherits cleaned parent env (stripCwdEnv at boot). + // Codex native binary does not auto-load .env from CWD (E2E verified). + // Managed env injection tracked in #1161. + async *sendQuery( + prompt: string, + cwd: string, + resumeSessionId?: string, + requestOptions?: SendQueryOptions + ): AsyncGenerator { + const assistantConfig = requestOptions?.assistantConfig ?? {}; + const codexConfig = parseCodexConfig(assistantConfig); + + // 1. Initialize SDK and build thread options + const codex = await getCodex(codexConfig.codexBinaryPath); + const threadOptions = buildThreadOptions(cwd, requestOptions?.model, assistantConfig); + + if (requestOptions?.abortSignal?.aborted) { + throw new Error('Query aborted'); + } + + // 2. Create or resume thread + let sessionResumeFailed = false; + let thread; + if (resumeSessionId) { + getLog().debug({ sessionId: resumeSessionId }, 'resuming_thread'); + try { + thread = codex.resumeThread(resumeSessionId, threadOptions); + } catch (error) { + getLog().error({ err: error, sessionId: resumeSessionId }, 'resume_thread_failed'); + try { + thread = codex.startThread(threadOptions); + } catch (startError) { + const err = startError as Error; + if (isModelAccessError(err.message)) { + throw new Error(buildModelAccessMessage(requestOptions?.model)); + } + throw new Error(`Codex query failed: ${err.message}`); + } + sessionResumeFailed = true; + } + } else { + getLog().debug({ cwd }, 'starting_new_thread'); + try { + thread = codex.startThread(threadOptions); + } catch (error) { + const err = error as Error; + if (isModelAccessError(err.message)) { + throw new Error(buildModelAccessMessage(requestOptions?.model)); + } + throw new Error(`Codex query failed: ${err.message}`); + } + } + + if (sessionResumeFailed) { + yield { + type: 'system', + content: '⚠️ Could not resume previous session. Starting fresh conversation.', + }; + } + + // 3. Build turn options + const { turnOptions, hasOutputFormat } = buildTurnOptions(requestOptions); + let lastError: Error | undefined; + + for (let attempt = 0; attempt <= MAX_SUBPROCESS_RETRIES; attempt++) { + if (requestOptions?.abortSignal?.aborted) { + throw new Error('Query aborted'); + } + + if (attempt > 0) { + getLog().debug({ cwd, attempt }, 'starting_new_thread'); + try { + thread = codex.startThread(threadOptions); + } catch (startError) { + const err = startError as Error; + if (isModelAccessError(err.message)) { + throw new Error(buildModelAccessMessage(requestOptions?.model)); + } + throw new Error(`Codex query failed: ${err.message}`); + } + } + + try { + // 4. Run streamed turn + const result = await thread.runStreamed(prompt, turnOptions); + + // 5. Stream normalized events (fresh state per attempt to avoid dedup leaks) + yield* streamCodexEvents( + result.events as AsyncIterable>, + hasOutputFormat, + thread.id, + requestOptions?.abortSignal + ); + return; + } catch (error) { + const err = error as Error; + + if (requestOptions?.abortSignal?.aborted) { + throw new Error('Query aborted'); + } + + const { enrichedError, errorClass, shouldRetry } = classifyAndEnrichCodexError( + err, + requestOptions?.model + ); + + getLog().error( + { err, errorClass, attempt, maxRetries: MAX_SUBPROCESS_RETRIES }, + 'query_error' + ); + + if (!shouldRetry || attempt >= MAX_SUBPROCESS_RETRIES) { + throw enrichedError; + } + + const delayMs = this.retryBaseDelayMs * Math.pow(2, attempt); + getLog().info({ attempt, delayMs, errorClass }, 'retrying_query'); + await new Promise(resolve => setTimeout(resolve, delayMs)); + lastError = enrichedError; + } + } + + throw lastError ?? new Error('Codex query failed after retries'); + } + + getType(): string { + return 'codex'; + } +} diff --git a/packages/providers/src/errors.ts b/packages/providers/src/errors.ts new file mode 100644 index 0000000000..15849d3c92 --- /dev/null +++ b/packages/providers/src/errors.ts @@ -0,0 +1,14 @@ +/** + * Standardized error for unknown provider types. + * Thrown by getAgentProvider() — all surfaces (CLI, server, orchestrator, workflows) + * get the same error shape and message format. + */ +export class UnknownProviderError extends Error { + constructor( + public readonly requestedProvider: string, + public readonly registeredProviders: string[] + ) { + super(`Unknown provider: '${requestedProvider}'. Available: ${registeredProviders.join(', ')}`); + this.name = 'UnknownProviderError'; + } +} diff --git a/packages/providers/src/factory.test.ts b/packages/providers/src/factory.test.ts new file mode 100644 index 0000000000..fcc62c09a6 --- /dev/null +++ b/packages/providers/src/factory.test.ts @@ -0,0 +1,65 @@ +import { describe, test, expect } from 'bun:test'; +import { getAgentProvider } from './factory'; +import { UnknownProviderError } from './errors'; + +describe('factory', () => { + describe('getAgentProvider', () => { + test('returns ClaudeProvider for claude type', () => { + const provider = getAgentProvider('claude'); + + expect(provider).toBeDefined(); + expect(provider.getType()).toBe('claude'); + expect(typeof provider.sendQuery).toBe('function'); + }); + + test('returns CodexProvider for codex type', () => { + const provider = getAgentProvider('codex'); + + expect(provider).toBeDefined(); + expect(provider.getType()).toBe('codex'); + expect(typeof provider.sendQuery).toBe('function'); + }); + + test('throws UnknownProviderError for unknown type', () => { + expect(() => getAgentProvider('unknown')).toThrow(UnknownProviderError); + expect(() => getAgentProvider('unknown')).toThrow( + "Unknown provider: 'unknown'. Available: claude, codex" + ); + }); + + test('throws UnknownProviderError for empty string', () => { + expect(() => getAgentProvider('')).toThrow(UnknownProviderError); + expect(() => getAgentProvider('')).toThrow("Unknown provider: ''"); + }); + + test('is case sensitive - Claude throws', () => { + expect(() => getAgentProvider('Claude')).toThrow(UnknownProviderError); + expect(() => getAgentProvider('Claude')).toThrow("Unknown provider: 'Claude'"); + }); + + test('each call returns new instance', () => { + const provider1 = getAgentProvider('claude'); + const provider2 = getAgentProvider('claude'); + + // Each call should return a new instance + expect(provider1).not.toBe(provider2); + }); + + test('providers expose getCapabilities', () => { + const claude = getAgentProvider('claude'); + const codex = getAgentProvider('codex'); + + expect(typeof claude.getCapabilities).toBe('function'); + expect(typeof codex.getCapabilities).toBe('function'); + + const claudeCaps = claude.getCapabilities(); + const codexCaps = codex.getCapabilities(); + + // Claude supports more features than Codex + expect(claudeCaps.mcp).toBe(true); + expect(codexCaps.mcp).toBe(false); + expect(claudeCaps.hooks).toBe(true); + expect(codexCaps.hooks).toBe(false); + }); + }); +}); diff --git a/packages/providers/src/factory.ts b/packages/providers/src/factory.ts new file mode 100644 index 0000000000..836f3edce5 --- /dev/null +++ b/packages/providers/src/factory.ts @@ -0,0 +1,41 @@ +/** + * Agent Provider Factory + * + * Dynamically instantiates the appropriate agent provider based on type string. + * Built-in providers only: Claude and Codex. + */ +import type { IAgentProvider } from './types'; +import { ClaudeProvider } from './claude/provider'; +import { CodexProvider } from './codex/provider'; +import { UnknownProviderError } from './errors'; +import { createLogger } from '@archon/paths'; + +/** Built-in provider types. */ +const REGISTERED_PROVIDERS = ['claude', 'codex'] as const; + +/** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ +let cachedLog: ReturnType | undefined; +function getLog(): ReturnType { + if (!cachedLog) cachedLog = createLogger('provider.factory'); + return cachedLog; +} + +/** + * Get the appropriate agent provider based on type. + * + * @param type - Provider type identifier ('claude' or 'codex') + * @returns Instantiated agent provider + * @throws UnknownProviderError if provider type is not registered + */ +export function getAgentProvider(type: string): IAgentProvider { + switch (type) { + case 'claude': + getLog().debug({ provider: 'claude' }, 'provider_selected'); + return new ClaudeProvider(); + case 'codex': + getLog().debug({ provider: 'codex' }, 'provider_selected'); + return new CodexProvider(); + default: + throw new UnknownProviderError(type, [...REGISTERED_PROVIDERS]); + } +} diff --git a/packages/providers/src/index.ts b/packages/providers/src/index.ts new file mode 100644 index 0000000000..b46cb84111 --- /dev/null +++ b/packages/providers/src/index.ts @@ -0,0 +1,31 @@ +// Types (contract layer — re-exported for convenience) +export type { + IAgentProvider, + AgentRequestOptions, + SendQueryOptions, + NodeConfig, + ProviderCapabilities, + MessageChunk, + TokenUsage, +} from './types'; + +// Provider config types (canonical definitions in ./types, re-exported via config modules) +// Import from ./types directly or from the config modules — both work. + +// Factory +export { getAgentProvider } from './factory'; + +// Error +export { UnknownProviderError } from './errors'; + +// Provider classes +export { ClaudeProvider } from './claude/provider'; +export { CodexProvider } from './codex/provider'; + +// Config parsers +export { parseClaudeConfig, type ClaudeProviderDefaults } from './claude/config'; +export { parseCodexConfig, type CodexProviderDefaults } from './codex/config'; + +// Utilities (needed by consumers) +export { resetCodexSingleton } from './codex/provider'; +export { resolveCodexBinaryPath, fileExists } from './codex/binary-resolver'; diff --git a/packages/providers/src/test/mocks/logger.ts b/packages/providers/src/test/mocks/logger.ts new file mode 100644 index 0000000000..79e1198b8a --- /dev/null +++ b/packages/providers/src/test/mocks/logger.ts @@ -0,0 +1,28 @@ +import { mock } from 'bun:test'; +import type { Logger } from 'pino'; + +export interface MockLogger extends Logger { + fatal: ReturnType; + error: ReturnType; + warn: ReturnType; + info: ReturnType; + debug: ReturnType; + trace: ReturnType; + child: ReturnType; +} + +export function createMockLogger(): MockLogger { + const logger = { + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(() => logger), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', + } as unknown as MockLogger; + return logger; +} diff --git a/packages/providers/src/types.ts b/packages/providers/src/types.ts new file mode 100644 index 0000000000..e0f196a500 --- /dev/null +++ b/packages/providers/src/types.ts @@ -0,0 +1,178 @@ +// CONTRACT LAYER — no SDK imports, no runtime deps. +// @archon/workflows and @archon/core import from this subpath (@archon/providers/types). +// HARD RULE: This file must never import SDK packages or other @archon/* packages. + +// ─── Provider Config Defaults ────────────────────────────────────────────── +// Canonical definitions — @archon/core/config/config-types.ts imports from here. +// Single source of truth for provider-specific config shapes. + +export interface ClaudeProviderDefaults { + model?: string; + /** Claude Code settingSources — controls which CLAUDE.md files are loaded. + * @default ['project'] + */ + settingSources?: ('project' | 'user')[]; +} + +export interface CodexProviderDefaults { + model?: string; + /** Structurally matches @archon/workflows ModelReasoningEffort */ + modelReasoningEffort?: 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'; + /** Structurally matches @archon/workflows WebSearchMode */ + webSearchMode?: 'disabled' | 'cached' | 'live'; + additionalDirectories?: string[]; + /** Path to the Codex CLI binary. Overrides auto-detection in compiled Archon builds. */ + codexBinaryPath?: string; +} + +/** + * Token usage statistics from AI provider responses. + */ +export interface TokenUsage { + input: number; + output: number; + total?: number; + cost?: number; +} + +/** + * Message chunk from AI assistant. + * Discriminated union with per-type required fields for type safety. + */ +export type MessageChunk = + | { type: 'assistant'; content: string } + | { type: 'system'; content: string } + | { type: 'thinking'; content: string } + | { + type: 'result'; + sessionId?: string; + tokens?: TokenUsage; + structuredOutput?: unknown; + isError?: boolean; + errorSubtype?: string; + cost?: number; + stopReason?: string; + numTurns?: number; + modelUsage?: Record; + } + | { type: 'rate_limit'; rateLimitInfo: Record } + | { + type: 'tool'; + toolName: string; + toolInput?: Record; + /** Stable per-call ID from the underlying SDK (e.g. Claude `tool_use_id`). + * When present, the platform adapter uses it directly instead of generating + * one — guarantees `tool_call`/`tool_result` pair correctly even when + * multiple tools with the same name run concurrently. */ + toolCallId?: string; + } + | { + type: 'tool_result'; + toolName: string; + toolOutput: string; + /** Matching ID for the originating `tool` chunk. See `tool` variant above. */ + toolCallId?: string; + } + | { type: 'workflow_dispatch'; workerConversationId: string; workflowName: string }; + +/** + * Universal request options accepted by all providers. + * Provider-specific fields go through `nodeConfig` and `assistantConfig` in SendQueryOptions. + */ +export interface AgentRequestOptions { + model?: string; + abortSignal?: AbortSignal; + systemPrompt?: string; + outputFormat?: { type: 'json_schema'; schema: Record }; + env?: Record; + maxBudgetUsd?: number; + fallbackModel?: string; + /** Session fork flag — when true, copies prior session history before appending. */ + forkSession?: boolean; + /** When false, skip writing session transcript to disk. */ + persistSession?: boolean; +} + +/** + * Raw node configuration from workflow YAML. + * Providers translate fields they understand; unknown fields are ignored. + */ +export interface NodeConfig { + mcp?: string; + hooks?: unknown; + skills?: string[]; + allowed_tools?: string[]; + denied_tools?: string[]; + effort?: string; + thinking?: unknown; + sandbox?: unknown; + betas?: string[]; + output_format?: Record; + maxBudgetUsd?: number; + systemPrompt?: string; + fallbackModel?: string; + idle_timeout?: number; + [key: string]: unknown; +} + +/** + * Extended options for sendQuery, adding workflow-specific context. + * The orchestrator path uses base AgentRequestOptions fields only. + * The workflow path additionally passes nodeConfig and assistantConfig. + */ +export interface SendQueryOptions extends AgentRequestOptions { + /** Raw YAML node config — provider translates internally to SDK-specific options. */ + nodeConfig?: NodeConfig; + /** Per-provider defaults from .archon/config.yaml assistants section. */ + assistantConfig?: Record; +} + +/** + * Provider capability flags. The dag-executor uses these for capability warnings + * when a node specifies features the target provider doesn't support. + */ +export interface ProviderCapabilities { + sessionResume: boolean; + mcp: boolean; + hooks: boolean; + skills: boolean; + toolRestrictions: boolean; + structuredOutput: boolean; + envInjection: boolean; + costControl: boolean; + effortControl: boolean; + thinkingControl: boolean; + fallbackModel: boolean; + sandbox: boolean; +} + +/** + * Generic agent provider interface. + * Allows supporting multiple agent providers (Claude, Codex, etc.) + */ +export interface IAgentProvider { + /** + * Send a message and get streaming response. + * @param prompt - User message or prompt + * @param cwd - Working directory for the provider + * @param resumeSessionId - Optional session ID to resume + * @param options - Optional request options (universal + nodeConfig + assistantConfig) + */ + sendQuery( + prompt: string, + cwd: string, + resumeSessionId?: string, + options?: SendQueryOptions + ): AsyncGenerator; + + /** + * Get the provider type identifier (e.g. 'claude', 'codex'). + */ + getType(): string; + + /** + * Get the provider's capability flags. + * Used by the dag-executor to warn when nodes specify unsupported features. + */ + getCapabilities(): ProviderCapabilities; +} diff --git a/packages/providers/tsconfig.json b/packages/providers/tsconfig.json new file mode 100644 index 0000000000..144d879a1c --- /dev/null +++ b/packages/providers/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "noEmit": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.test.ts"] +} diff --git a/packages/server/package.json b/packages/server/package.json index 58fd364c6f..ac5c4b7187 100644 --- a/packages/server/package.json +++ b/packages/server/package.json @@ -15,6 +15,7 @@ "@archon/core": "workspace:*", "@archon/git": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/workflows": "workspace:*", "@hono/zod-openapi": "^0.19.6", "dotenv": "^17.2.3", diff --git a/packages/server/src/adapters/web.ts b/packages/server/src/adapters/web.ts index 20570824e3..50d3c0e5f3 100644 --- a/packages/server/src/adapters/web.ts +++ b/packages/server/src/adapters/web.ts @@ -2,7 +2,8 @@ * Web platform adapter implementing IPlatformAdapter with SSE stream management. * Bridge between the orchestrator and the React frontend via Server-Sent Events. */ -import type { IWebPlatformAdapter, MessageChunk, MessageMetadata } from '@archon/core'; +import type { IWebPlatformAdapter, MessageMetadata } from '@archon/core'; +import type { MessageChunk } from '@archon/providers/types'; import { createLogger } from '@archon/paths'; import { MessagePersistence } from './web/persistence'; import { SSETransport, type SSEWriter } from './web/transport'; diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 0b502008d6..8099a8a9bd 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -73,9 +73,7 @@ import { logConfig, getPort, createWorkflowStore, - scanPathForSensitiveKeys, } from '@archon/core'; -import * as codebaseDb from '@archon/core/db/codebases'; import type { IPlatformAdapter } from '@archon/core'; import { createLogger, logArchonPaths, validateAppDefaultsPaths } from '@archon/paths'; @@ -199,58 +197,9 @@ export async function startServer(opts: ServerOptions = {}): Promise { process.exit(1); } - // Load configuration early so the startup env-leak scan can honor the - // global bypass. Without this, users who set `allow_target_repo_keys: true` - // would get a per-codebase warn spam on every boot even though the gate - // is intentionally disabled. const config = await loadConfig(); logConfig(config); - // Startup env-leak scan: warn for codebases that would be blocked at next - // spawn by the env-leak-gate. Skipped entirely when the global bypass is - // active. Best-effort — failures are surfaced but never block startup. - if (config.allowTargetRepoKeys) { - getLog().info('startup_env_leak_scan_skipped — allow_target_repo_keys is true'); - } else { - try { - const codebases = await codebaseDb.listCodebases(); - for (const cb of codebases) { - if (cb.allow_env_keys) continue; - try { - const report = scanPathForSensitiveKeys(cb.default_cwd); - if (report.findings.length > 0) { - const files = report.findings.map(f => f.file); - const keys = Array.from(new Set(report.findings.flatMap(f => f.keys))); - getLog().warn( - { - codebaseId: cb.id, - name: cb.name, - path: cb.default_cwd, - files, - keys, - }, - 'startup_env_leak_gate_will_block' - ); - } - } catch (scanErr) { - // Path may no longer exist (codebase moved/deleted on disk) — - // log at debug, do not abort the loop. This is the only quiet path. - getLog().debug( - { err: scanErr, codebaseId: cb.id, path: cb.default_cwd }, - 'startup_env_leak_scan_path_unavailable' - ); - } - } - } catch (error) { - // listCodebases() failed — the entire startup safety net is silently - // absent. Surface at error level so operators see it. - getLog().error( - { err: error }, - 'startup_env_leak_scan_failed — startup migration warnings suppressed' - ); - } - } - // Start cleanup scheduler startCleanupScheduler(); diff --git a/packages/server/src/routes/api.codebases.test.ts b/packages/server/src/routes/api.codebases.test.ts index 0265a359e1..d06615968b 100644 --- a/packages/server/src/routes/api.codebases.test.ts +++ b/packages/server/src/routes/api.codebases.test.ts @@ -48,15 +48,6 @@ mock.module('@archon/core', () => ({ this.name = 'ConversationNotFoundError'; } }, - scanPathForSensitiveKeys: mock((_p: string) => ({ path: _p, findings: [] })), - EnvLeakError: class EnvLeakError extends Error { - constructor(public report: { path: string; findings: { file: string; keys: string[] }[] }) { - super( - `Cannot add codebase — ${report.path} contains keys that will leak into AI subprocesses` - ); - this.name = 'EnvLeakError'; - } - }, getArchonWorkspacesPath: () => '/tmp/.archon/workspaces', generateAndSetTitle: mock(async () => {}), createLogger: () => ({ @@ -123,12 +114,10 @@ mock.module('@archon/core/db/conversations', () => ({ getConversationById: mock(async () => null), })); -const mockUpdateCodebaseAllowEnvKeys = mock(async (_id: string, _v: boolean) => {}); mock.module('@archon/core/db/codebases', () => ({ listCodebases: mockListCodebases, getCodebase: mockGetCodebase, deleteCodebase: mockDeleteCodebase, - updateCodebaseAllowEnvKeys: mockUpdateCodebaseAllowEnvKeys, })); mock.module('@archon/core/db/isolation-environments', () => ({ @@ -181,7 +170,6 @@ const MOCK_CODEBASE = { repository_url: 'https://github.com/user/repo', default_cwd: '/home/user/projects/my-project', ai_assistant_type: 'claude', - allow_env_keys: false, commands: {}, created_at: new Date().toISOString(), updated_at: new Date().toISOString(), @@ -399,7 +387,7 @@ describe('POST /api/codebases', () => { const body = (await response.json()) as { id: string }; expect(body.id).toBe('codebase-uuid-1'); - expect(mockCloneRepository).toHaveBeenCalledWith('https://github.com/user/repo', undefined); + expect(mockCloneRepository).toHaveBeenCalledWith('https://github.com/user/repo'); }); test('registers existing URL codebase with 200', async () => { @@ -436,7 +424,7 @@ describe('POST /api/codebases', () => { body: JSON.stringify({ path: '/home/user/my-repo' }), }); expect(response.status).toBe(201); - expect(mockRegisterRepository).toHaveBeenCalledWith('/home/user/my-repo', undefined); + expect(mockRegisterRepository).toHaveBeenCalledWith('/home/user/my-repo'); }); test('returns 400 when both url and path are provided', async () => { @@ -508,101 +496,6 @@ describe('POST /api/codebases', () => { const body = (await response.json()) as { error: string }; expect(body.error).toContain('authentication required'); }); - - test('returns 422 when cloneRepository throws EnvLeakError', async () => { - const { EnvLeakError } = await import('@archon/core'); - mockCloneRepository.mockImplementationOnce(async () => { - throw new EnvLeakError({ - path: '/repo/path', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - }); - - const app = makeApp(); - const response = await app.request('/api/codebases', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ url: 'https://github.com/user/repo' }), - }); - expect(response.status).toBe(422); - - const body = (await response.json()) as { error: string }; - expect(body.error).toContain('Cannot add codebase'); - }); - - test('passes allowEnvKeys=true to cloneRepository when body includes it', async () => { - mockCloneRepository.mockImplementationOnce(async () => ({ - codebaseId: 'clone-uuid-2', - alreadyExisted: false, - })); - mockGetCodebase.mockImplementationOnce(async () => MOCK_CODEBASE); - - const app = makeApp(); - const response = await app.request('/api/codebases', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ url: 'https://github.com/user/repo', allowEnvKeys: true }), - }); - expect(response.status).toBe(201); - expect(mockCloneRepository).toHaveBeenCalledWith('https://github.com/user/repo', true); - }); -}); - -// --------------------------------------------------------------------------- -// Tests: PATCH /api/codebases/:id -// --------------------------------------------------------------------------- - -describe('PATCH /api/codebases/:id', () => { - beforeEach(() => { - mockGetCodebase.mockReset(); - mockUpdateCodebaseAllowEnvKeys.mockReset(); - }); - - test('grants consent and returns updated codebase', async () => { - mockGetCodebase - .mockImplementationOnce(async () => MOCK_CODEBASE) - .mockImplementationOnce(async () => ({ ...MOCK_CODEBASE, allow_env_keys: true })); - mockUpdateCodebaseAllowEnvKeys.mockImplementationOnce(async () => {}); - - const app = makeApp(); - const response = await app.request('/api/codebases/codebase-uuid-1', { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ allowEnvKeys: true }), - }); - expect(response.status).toBe(200); - const body = (await response.json()) as { allow_env_keys: boolean }; - expect(body.allow_env_keys).toBe(true); - expect(mockUpdateCodebaseAllowEnvKeys).toHaveBeenCalledWith('codebase-uuid-1', true); - }); - - test('revokes consent', async () => { - mockGetCodebase - .mockImplementationOnce(async () => ({ ...MOCK_CODEBASE, allow_env_keys: true })) - .mockImplementationOnce(async () => MOCK_CODEBASE); - mockUpdateCodebaseAllowEnvKeys.mockImplementationOnce(async () => {}); - - const app = makeApp(); - const response = await app.request('/api/codebases/codebase-uuid-1', { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ allowEnvKeys: false }), - }); - expect(response.status).toBe(200); - expect(mockUpdateCodebaseAllowEnvKeys).toHaveBeenCalledWith('codebase-uuid-1', false); - }); - - test('returns 404 when codebase not found', async () => { - mockGetCodebase.mockImplementationOnce(async () => null); - - const app = makeApp(); - const response = await app.request('/api/codebases/missing', { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ allowEnvKeys: true }), - }); - expect(response.status).toBe(404); - }); }); // --------------------------------------------------------------------------- diff --git a/packages/server/src/routes/api.ts b/packages/server/src/routes/api.ts index ed267c1d41..4bc814f685 100644 --- a/packages/server/src/routes/api.ts +++ b/packages/server/src/routes/api.ts @@ -27,8 +27,6 @@ import { registerRepository, ConversationNotFoundError, generateAndSetTitle, - EnvLeakError, - scanPathForSensitiveKeys, } from '@archon/core'; import { removeWorktree, toRepoPath, toWorktreePath } from '@archon/git'; import { @@ -109,7 +107,6 @@ import { codebaseSchema, codebaseIdParamsSchema, addCodebaseBodySchema, - updateCodebaseBodySchema, deleteCodebaseResponseSchema, codebaseEnvVarsResponseSchema, setEnvVarBodySchema, @@ -467,28 +464,6 @@ const addCodebaseRoute = createRoute({ }, }); -const updateCodebaseRoute = createRoute({ - method: 'patch', - path: '/api/codebases/{id}', - tags: ['Codebases'], - summary: 'Update codebase consent flags (e.g. allow_env_keys)', - request: { - params: codebaseIdParamsSchema, - body: { - content: { 'application/json': { schema: updateCodebaseBodySchema } }, - required: true, - }, - }, - responses: { - 200: { - content: { 'application/json': { schema: codebaseSchema } }, - description: 'Updated codebase', - }, - 404: jsonError('Not found'), - 500: jsonError('Server error'), - }, -}); - const deleteCodebaseRoute = createRoute({ method: 'delete', path: '/api/codebases/{id}', @@ -1531,8 +1506,8 @@ export function registerApiRoutes( try { // .refine() guarantees exactly one of url/path is present const result = body.url - ? await cloneRepository(body.url, body.allowEnvKeys) - : await registerRepository(body.path ?? '', body.allowEnvKeys); + ? await cloneRepository(body.url) + : await registerRepository(body.path ?? ''); // Fetch the full codebase record for a consistent response const codebase = await codebaseDb.getCodebase(result.codebaseId); @@ -1542,12 +1517,6 @@ export function registerApiRoutes( return c.json(codebase, result.alreadyExisted ? 200 : 201); } catch (error) { - if (error instanceof EnvLeakError) { - const path = body.url ?? body.path ?? ''; - const files = error.report.findings.map(f => f.file); - getLog().warn({ path, files }, 'add_codebase_env_leak_refused'); - return apiError(c, 422, error.message); - } getLog().error({ err: error }, 'add_codebase_failed'); return apiError( c, @@ -1557,71 +1526,6 @@ export function registerApiRoutes( } }); - // PATCH /api/codebases/:id - Update consent flags - registerOpenApiRoute(updateCodebaseRoute, async c => { - const id = c.req.param('id') ?? ''; - const body = getValidatedBody(c, updateCodebaseBodySchema); - try { - const codebase = await codebaseDb.getCodebase(id); - if (!codebase) { - return apiError(c, 404, 'Codebase not found'); - } - - // Capture scanner findings for the audit log (best-effort — path may be gone) - let files: string[] = []; - let keys: string[] = []; - let scanStatus: 'ok' | 'skipped' = 'ok'; - try { - const report = scanPathForSensitiveKeys(codebase.default_cwd); - files = report.findings.map(f => f.file); - keys = Array.from(new Set(report.findings.flatMap(f => f.keys))); - } catch (scanErr) { - scanStatus = 'skipped'; - getLog().warn( - { err: scanErr, codebaseId: id, path: codebase.default_cwd }, - 'env_leak_consent_scan_skipped' - ); - } - - await codebaseDb.updateCodebaseAllowEnvKeys(id, body.allowEnvKeys); - - // Audit log: emitted unconditionally on every grant/revoke. `scanStatus` - // distinguishes "scanned and these are the findings" from "could not - // scan, files/keys are empty for that reason" — important for later - // security review of the audit trail. - getLog().warn( - { - codebaseId: id, - name: codebase.name, - path: codebase.default_cwd, - files, - keys, - scanStatus, - actor: 'user-ui', - }, - body.allowEnvKeys ? 'env_leak_consent_granted' : 'env_leak_consent_revoked' - ); - - const updated = await codebaseDb.getCodebase(id); - if (!updated) { - return apiError(c, 500, 'Codebase updated but not found'); - } - let commands = updated.commands; - if (typeof commands === 'string') { - try { - commands = JSON.parse(commands); - } catch (parseErr) { - getLog().error({ err: parseErr, codebaseId: id }, 'corrupted_commands_json'); - commands = {}; - } - } - return c.json({ ...updated, commands }); - } catch (error) { - getLog().error({ err: error, codebaseId: id }, 'update_codebase_failed'); - return apiError(c, 500, 'Failed to update codebase'); - } - }); - // DELETE /api/codebases/:id - Delete a project and clean up registerOpenApiRoute(deleteCodebaseRoute, async c => { const id = c.req.param('id') ?? ''; diff --git a/packages/server/src/routes/schemas/codebase.schemas.ts b/packages/server/src/routes/schemas/codebase.schemas.ts index e8a6dea887..d2880a6be1 100644 --- a/packages/server/src/routes/schemas/codebase.schemas.ts +++ b/packages/server/src/routes/schemas/codebase.schemas.ts @@ -16,7 +16,6 @@ export const codebaseSchema = z repository_url: z.string().nullable(), default_cwd: z.string(), ai_assistant_type: z.string(), - allow_env_keys: z.boolean(), commands: z.record(codebaseCommandSchema), created_at: z.string(), updated_at: z.string(), @@ -34,20 +33,12 @@ export const addCodebaseBodySchema = z .object({ url: z.string().min(1).optional(), path: z.string().min(1).optional(), - allowEnvKeys: z.boolean().optional(), }) .refine(b => (b.url !== undefined) !== (b.path !== undefined), { message: 'Provide either "url" or "path", not both and not neither', }) .openapi('AddCodebaseBody'); -/** PATCH /api/codebases/:id request body. */ -export const updateCodebaseBodySchema = z - .object({ - allowEnvKeys: z.boolean(), - }) - .openapi('UpdateCodebaseBody'); - /** DELETE /api/codebases/:id response. */ export const deleteCodebaseResponseSchema = z .object({ success: z.boolean() }) diff --git a/packages/web/src/lib/api.generated.d.ts b/packages/web/src/lib/api.generated.d.ts index 193c619588..bb2ed58aef 100644 --- a/packages/web/src/lib/api.generated.d.ts +++ b/packages/web/src/lib/api.generated.d.ts @@ -549,51 +549,7 @@ export interface paths { }; options?: never; head?: never; - /** Update codebase consent flags (e.g. allow_env_keys) */ - patch: { - parameters: { - query?: never; - header?: never; - path: { - id: string; - }; - cookie?: never; - }; - requestBody: { - content: { - 'application/json': components['schemas']['UpdateCodebaseBody']; - }; - }; - responses: { - /** @description Updated codebase */ - 200: { - headers: { - [name: string]: unknown; - }; - content: { - 'application/json': components['schemas']['Codebase']; - }; - }; - /** @description Not found */ - 404: { - headers: { - [name: string]: unknown; - }; - content: { - 'application/json': components['schemas']['Error']; - }; - }; - /** @description Server error */ - 500: { - headers: { - [name: string]: unknown; - }; - content: { - 'application/json': components['schemas']['Error']; - }; - }; - }; - }; + patch?: never; trace?: never; }; '/api/codebases/{id}/env': { @@ -2010,7 +1966,6 @@ export interface components { repository_url: string | null; default_cwd: string; ai_assistant_type: string; - allow_env_keys: boolean; commands: { [key: string]: components['schemas']['CodebaseCommand']; }; @@ -2021,10 +1976,6 @@ export interface components { AddCodebaseBody: { url?: string; path?: string; - allowEnvKeys?: boolean; - }; - UpdateCodebaseBody: { - allowEnvKeys: boolean; }; DeleteCodebaseResponse: { success: boolean; diff --git a/packages/web/src/lib/api.ts b/packages/web/src/lib/api.ts index 6c81aa66b1..81a3529833 100644 --- a/packages/web/src/lib/api.ts +++ b/packages/web/src/lib/api.ts @@ -38,7 +38,6 @@ export interface CodebaseResponse { repository_url: string | null; default_cwd: string; ai_assistant_type: string; - allow_env_keys: boolean; commands: Record; created_at: string; updated_at: string; @@ -158,7 +157,7 @@ export async function getCodebase(id: string): Promise { } export async function addCodebase( - input: { url: string; allowEnvKeys?: boolean } | { path: string; allowEnvKeys?: boolean } + input: { url: string } | { path: string } ): Promise { return fetchJSON('/api/codebases', { method: 'POST', @@ -167,17 +166,6 @@ export async function addCodebase( }); } -export async function updateCodebase( - id: string, - input: { allowEnvKeys: boolean } -): Promise { - return fetchJSON(`/api/codebases/${id}`, { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(input), - }); -} - export async function deleteCodebase(id: string): Promise<{ success: boolean }> { return fetchJSON<{ success: boolean }>(`/api/codebases/${id}`, { method: 'DELETE' }); } diff --git a/packages/web/src/routes/SettingsPage.tsx b/packages/web/src/routes/SettingsPage.tsx index 07a07690fc..0b9c7b6e60 100644 --- a/packages/web/src/routes/SettingsPage.tsx +++ b/packages/web/src/routes/SettingsPage.tsx @@ -10,7 +10,6 @@ import { getHealth, listCodebases, addCodebase, - updateCodebase, deleteCodebase, updateAssistantConfig, getCodebaseEnvVars, @@ -251,22 +250,11 @@ function EnvVarsPanel({ codebaseId }: { codebaseId: string }): React.ReactElemen ); } -function isEnvLeakError(error: unknown): boolean { - return ( - error instanceof Error && - 'status' in error && - (error as Error & { status: number }).status === 422 && - error.message.startsWith('Cannot add codebase') - ); -} - function ProjectsSection(): React.ReactElement { const queryClient = useQueryClient(); const [addPath, setAddPath] = useState(''); const [showAdd, setShowAdd] = useState(false); - const [allowEnvKeys, setAllowEnvKeys] = useState(false); const [expandedEnvVars, setExpandedEnvVars] = useState(null); - const [toggleError, setToggleError] = useState(null); const { data: codebases } = useQuery({ queryKey: ['codebases'], @@ -274,13 +262,11 @@ function ProjectsSection(): React.ReactElement { }); const addMutation = useMutation({ - mutationFn: ({ path, allowEnvKeys }: { path: string; allowEnvKeys?: boolean }) => - addCodebase({ path, allowEnvKeys }), + mutationFn: ({ path }: { path: string }) => addCodebase({ path }), onSuccess: () => { void queryClient.invalidateQueries({ queryKey: ['codebases'] }); setAddPath(''); setShowAdd(false); - setAllowEnvKeys(false); }, }); @@ -291,24 +277,10 @@ function ProjectsSection(): React.ReactElement { }, }); - const toggleEnvKeysMutation = useMutation({ - mutationFn: ({ id, allowEnvKeys }: { id: string; allowEnvKeys: boolean }) => - updateCodebase(id, { allowEnvKeys }), - onSuccess: () => { - setToggleError(null); - void queryClient.invalidateQueries({ queryKey: ['codebases'] }); - }, - onError: (err: Error) => { - // Without this the user clicks "Revoke env keys", confirms the - // destructive dialog, and gets no feedback if the PATCH fails. - setToggleError(err.message); - }, - }); - function handleAddSubmit(e: React.FormEvent): void { e.preventDefault(); if (addPath.trim()) { - addMutation.mutate({ path: addPath.trim(), allowEnvKeys: allowEnvKeys || undefined }); + addMutation.mutate({ path: addPath.trim() }); } } @@ -318,11 +290,6 @@ function ProjectsSection(): React.ReactElement { Projects - {toggleError && ( -
- Failed to update env-key consent: {toggleError} -
- )} {!codebases || codebases.length === 0 ? (
No projects registered.
) : ( @@ -331,40 +298,10 @@ function ProjectsSection(): React.ReactElement {
-
-
{cb.name}
- {cb.allow_env_keys && ( - - env keys allowed - - )} -
+
{cb.name}
{cb.default_cwd}
-
)} diff --git a/packages/workflows/package.json b/packages/workflows/package.json index 7126c5ffff..1c0e89514c 100644 --- a/packages/workflows/package.json +++ b/packages/workflows/package.json @@ -25,6 +25,7 @@ "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@hono/zod-openapi": "^0.19.6", "zod": "^3.25.28" }, diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts index 150ea4eeb7..0df80c93df 100644 --- a/packages/workflows/src/dag-executor.test.ts +++ b/packages/workflows/src/dag-executor.test.ts @@ -31,8 +31,8 @@ import { checkTriggerRule, substituteNodeOutputRefs, executeDagWorkflow, - loadMcpConfig, } from './dag-executor'; +import { loadMcpConfig } from '@archon/providers/claude/provider'; import type { DagNode, BashNode, ScriptNode, NodeOutput, WorkflowRun } from './schemas'; import { discoverWorkflows } from './workflow-discovery'; import { parseWorkflow } from './loader'; @@ -93,22 +93,54 @@ function createMockStore(): IWorkflowStore { }; } +/** All-true capabilities for Claude mock */ +const mockClaudeCapabilities = () => ({ + sessionResume: true, + mcp: true, + hooks: true, + skills: true, + toolRestrictions: true, + structuredOutput: true, + envInjection: true, + costControl: true, + effortControl: true, + thinkingControl: true, + fallbackModel: true, + sandbox: true, +}); +/** Limited capabilities for Codex mock */ +const mockCodexCapabilities = () => ({ + sessionResume: true, + mcp: false, + hooks: false, + skills: false, + toolRestrictions: false, + structuredOutput: true, + envInjection: false, + costControl: false, + effortControl: false, + thinkingControl: false, + fallbackModel: false, + sandbox: false, +}); + /** Mock AI sendQuery generator */ const mockSendQueryDag = mock(function* () { yield { type: 'assistant', content: 'DAG AI response' }; yield { type: 'result', sessionId: 'dag-session-id' }; }); -const mockGetAssistantClientDag = mock(() => ({ +const mockGetAgentProviderDag = mock(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); function createMockDeps(storeOverride?: IWorkflowStore): WorkflowDeps { const store = storeOverride ?? createMockStore(); return { store, - getAssistantClient: mockGetAssistantClientDag, + getAgentProvider: mockGetAgentProviderDag, loadConfig: mock(() => Promise.resolve({ assistant: 'claude' as const, @@ -749,7 +781,7 @@ describe('executeDagWorkflow -- tool restrictions', () => { await writeFile(join(commandsDir, 'my-cmd.md'), 'My command prompt for $USER_MESSAGE'); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); + mockGetAgentProviderDag.mockClear(); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'DAG AI response' }; @@ -759,9 +791,10 @@ describe('executeDagWorkflow -- tool restrictions', () => { afterEach(async () => { // Restore default claude client - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -796,13 +829,15 @@ describe('executeDagWorkflow -- tool restrictions', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - expect(optionsArg?.tools).toEqual(['Read', 'Grep']); + const nodeConfig = optionsArg?.nodeConfig as Record; + expect(nodeConfig?.allowed_tools).toEqual(['Read', 'Grep']); }); it('warns user when Codex DAG node has denied_tools only', async () => { - mockGetAssistantClientDag.mockReturnValue({ + mockGetAgentProviderDag.mockReturnValue({ sendQuery: mockSendQueryDag, getType: () => 'codex', + getCapabilities: mockCodexCapabilities, }); const mockDeps = createMockDeps(); @@ -832,7 +867,9 @@ describe('executeDagWorkflow -- tool restrictions', () => { const sendMessage = platform.sendMessage as ReturnType; const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1] as string); - const warning = messages.find(m => m.includes('denied_tools') && m.includes('Codex')); + const warning = messages.find( + m => m.includes('allowed_tools/denied_tools') && m.includes('codex') + ); expect(warning).toBeDefined(); }); @@ -859,7 +896,8 @@ describe('executeDagWorkflow -- tool restrictions', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - expect(optionsArg?.tools).toEqual([]); + const nodeConfig = optionsArg?.nodeConfig as Record; + expect(nodeConfig?.allowed_tools).toEqual([]); }); it('passes hooks to sendQuery options for Claude node', async () => { @@ -896,15 +934,17 @@ describe('executeDagWorkflow -- tool restrictions', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - expect(optionsArg?.hooks).toBeDefined(); - const hooks = optionsArg?.hooks as Record; + const nodeConfig = optionsArg?.nodeConfig as Record; + expect(nodeConfig?.hooks).toBeDefined(); + const hooks = nodeConfig?.hooks as Record; expect(hooks.PreToolUse).toHaveLength(1); }); it('warns user when Codex DAG node has hooks', async () => { - mockGetAssistantClientDag.mockReturnValue({ + mockGetAgentProviderDag.mockReturnValue({ sendQuery: mockSendQueryDag, getType: () => 'codex', + getCapabilities: mockCodexCapabilities, }); const mockDeps = createMockDeps(); @@ -941,7 +981,7 @@ describe('executeDagWorkflow -- tool restrictions', () => { const sendMessage = platform.sendMessage as ReturnType; const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1] as string); - const warning = messages.find(m => m.includes('hooks') && m.includes('Codex')); + const warning = messages.find(m => m.includes('hooks') && m.includes('codex')); expect(warning).toBeDefined(); }); }); @@ -954,16 +994,17 @@ describe('executeDagWorkflow -- bash nodes', () => { await mkdir(testDir, { recursive: true }); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); + mockGetAgentProviderDag.mockClear(); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'DAG AI response' }; yield { type: 'result', sessionId: 'dag-session-id' }; }); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -1221,13 +1262,14 @@ describe('executeDagWorkflow -- output_format structured output', () => { await writeFile(join(commandsDir, 'classify.md'), 'Classify this: $USER_MESSAGE'); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); + mockGetAgentProviderDag.mockClear(); }); afterEach(async () => { - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -1393,15 +1435,16 @@ describe('executeDagWorkflow -- output_format structured output', () => { }); it('passes outputFormat to Codex nodes and uses inline JSON response', async () => { - // Codex returns structured output inline as agent_message text (no structuredOutput field) + // Codex provider normalizes inline JSON into structuredOutput on the result chunk const classifyJson = { run_code_review: 'true', run_tests: 'false' }; - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'codex', + getCapabilities: mockCodexCapabilities, })); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: JSON.stringify(classifyJson) }; - yield { type: 'result', sessionId: 'codex-sid-1' }; + yield { type: 'result', sessionId: 'codex-sid-1', structuredOutput: classifyJson }; }); const mockDeps = createMockDeps(); @@ -1464,14 +1507,15 @@ describe('executeDagWorkflow -- output_format structured output', () => { }); it('does not warn about missing structuredOutput for Codex nodes', async () => { - // Codex returns structured output inline — no structuredOutput field on result - mockGetAssistantClientDag.mockImplementation(() => ({ + // Codex provider normalizes inline JSON into structuredOutput on the result chunk + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'codex', + getCapabilities: mockCodexCapabilities, })); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: '{"status":"ok"}' }; - yield { type: 'result', sessionId: 'codex-sid-2' }; + yield { type: 'result', sessionId: 'codex-sid-2', structuredOutput: { status: 'ok' } }; }); const mockDeps = createMockDeps(); @@ -1524,10 +1568,11 @@ describe('executeDagWorkflow -- when condition parse errors (fail-closed)', () = await writeFile(join(commandsDir, 'my-cmd.md'), 'Do something for $USER_MESSAGE'); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockClear(); + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'AI response' }; @@ -1536,9 +1581,10 @@ describe('executeDagWorkflow -- when condition parse errors (fail-closed)', () = }); afterEach(async () => { - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -1652,10 +1698,11 @@ describe('executeDagWorkflow -- node-level retry for transient errors', () => { await writeFile(join(commandsDir, 'my-cmd.md'), 'Do something for $USER_MESSAGE'); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockClear(); + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'DAG AI response' }; @@ -1664,9 +1711,10 @@ describe('executeDagWorkflow -- node-level retry for transient errors', () => { }); afterEach(async () => { - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -1841,10 +1889,11 @@ describe('executeDagWorkflow -- tool_called event persistence', () => { await writeFile(join(commandsDir, 'my-cmd.md'), 'My command prompt for $USER_MESSAGE'); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockClear(); + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -1949,10 +1998,11 @@ describe('executeDagWorkflow -- tool_completed event emission', () => { await writeFile(join(commandsDir, 'my-cmd.md'), 'My command prompt for $USER_MESSAGE'); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockClear(); + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -2210,7 +2260,7 @@ describe('executeDagWorkflow -- skills options', () => { await writeFile(join(commandsDir, 'my-cmd.md'), 'My command prompt for $USER_MESSAGE'); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); + mockGetAgentProviderDag.mockClear(); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'DAG AI response' }; @@ -2219,9 +2269,10 @@ describe('executeDagWorkflow -- skills options', () => { }); afterEach(async () => { - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -2256,17 +2307,9 @@ describe('executeDagWorkflow -- skills options', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - // agents contains the agent definition - const agents = optionsArg?.agents as Record>; - expect(agents).toBeDefined(); - expect(agents['dag-node-review']).toBeDefined(); - expect(agents['dag-node-review'].skills).toEqual(['codebase-search', 'test-runner']); - // tools always includes 'Skill' explicitly - expect(agents['dag-node-review'].tools).toEqual(['Skill']); - // agent references the key - expect(optionsArg?.agent).toBe('dag-node-review'); - // allowedTools includes 'Skill' for the parent session - expect(optionsArg?.allowedTools).toContain('Skill'); + const nodeConfig = optionsArg?.nodeConfig as Record; + // skills are passed in nodeConfig — provider translates to agents internally + expect(nodeConfig?.skills).toEqual(['codebase-search', 'test-runner']); }); it('appends Skill to existing allowed_tools list when node has both', async () => { @@ -2302,17 +2345,17 @@ describe('executeDagWorkflow -- skills options', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - const agents = optionsArg?.agents as Record>; - // Agent tools = allowed_tools + Skill - expect(agents['dag-node-review'].tools).toEqual(['Read', 'Grep', 'Skill']); - // Parent session also gets Skill - expect(optionsArg?.allowedTools).toContain('Skill'); + const nodeConfig = optionsArg?.nodeConfig as Record; + // skills and allowed_tools are both in nodeConfig — provider merges internally + expect(nodeConfig?.skills).toEqual(['codebase-search']); + expect(nodeConfig?.allowed_tools).toEqual(['Read', 'Grep']); }); it('warns user when Codex DAG node has skills and does not pass agents', async () => { - mockGetAssistantClientDag.mockReturnValue({ + mockGetAgentProviderDag.mockReturnValue({ sendQuery: mockSendQueryDag, getType: () => 'codex', + getCapabilities: mockCodexCapabilities, }); const mockDeps = createMockDeps(); @@ -2343,15 +2386,8 @@ describe('executeDagWorkflow -- skills options', () => { // Warning sent to user const sendMessage = platform.sendMessage as ReturnType; const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1] as string); - const warning = messages.find(m => m.includes('skills') && m.includes('Codex')); + const warning = messages.find(m => m.includes('skills') && m.includes('codex')); expect(warning).toBeDefined(); - - // No agents/agent passed to Codex sendQuery - if (mockSendQueryDag.mock.calls.length > 0) { - const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - expect(optionsArg?.agents).toBeUndefined(); - expect(optionsArg?.agent).toBeUndefined(); - } }); }); @@ -2457,7 +2493,7 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { await writeFile(join(commandsDir, 'step2.md'), 'Step 2 prompt using $step1.output'); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); + mockGetAgentProviderDag.mockClear(); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'AI response' }; @@ -2466,9 +2502,10 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { }); afterEach(async () => { - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -3578,11 +3615,12 @@ describe('executeDagWorkflow -- break after result (no hang on subprocess exit)' await writeFile(join(commandsDir, 'my-cmd.md'), 'Command prompt $ARGUMENTS'); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); + mockGetAgentProviderDag.mockClear(); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -3592,9 +3630,10 @@ describe('executeDagWorkflow -- break after result (no hang on subprocess exit)' yield { type: 'assistant', content: 'DAG AI response' }; yield { type: 'result', sessionId: 'dag-session-id' }; }); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -3700,11 +3739,12 @@ describe('executeDagWorkflow -- terminal node output selection', () => { await writeFile(join(commandsDir, 'my-cmd.md'), 'Command prompt $ARGUMENTS'); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); + mockGetAgentProviderDag.mockClear(); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -3713,9 +3753,10 @@ describe('executeDagWorkflow -- terminal node output selection', () => { yield { type: 'assistant', content: 'DAG AI response' }; yield { type: 'result', sessionId: 'dag-session-id' }; }); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -3951,13 +3992,14 @@ describe('executeDagWorkflow -- credit exhaustion', () => { await mkdir(commandsDir, { recursive: true }); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); + mockGetAgentProviderDag.mockClear(); }); afterEach(async () => { - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'DAG AI response' }; @@ -3975,9 +4017,10 @@ describe('executeDagWorkflow -- credit exhaustion', () => { yield { type: 'assistant', content: "You're out of extra usage · resets in 2h" }; yield { type: 'result', sessionId: 'dag-session-credit' }; }); - mockGetAssistantClientDag.mockReturnValue({ + mockGetAgentProviderDag.mockReturnValue({ sendQuery: creditExhaustedQuery, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, }); const store = createMockStore(); @@ -4025,17 +4068,19 @@ describe('executeDagWorkflow -- approval node', () => { ); await mkdir(join(testDir, '.archon', 'commands'), { recursive: true }); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockClear(); + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); afterEach(async () => { - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -4333,16 +4378,18 @@ describe('executeDagWorkflow -- env var injection', () => { await writeFile(join(testDir, '.archon', 'commands', 'my-cmd.md'), '# Test'); }); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); afterEach(async () => { - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -4417,16 +4464,17 @@ describe('executeDagWorkflow -- Claude SDK advanced options', () => { await writeFile(join(commandsDir, 'my-cmd.md'), 'My command prompt'); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); + mockGetAgentProviderDag.mockClear(); mockLogFn.mockClear(); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'DAG AI response' }; yield { type: 'result', sessionId: 'dag-session-id' }; }); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -4558,7 +4606,8 @@ describe('executeDagWorkflow -- Claude SDK advanced options', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - expect(optionsArg?.effort).toBe('high'); + const nodeConfig = optionsArg?.nodeConfig as Record; + expect(nodeConfig?.effort).toBe('high'); }); it('per-node effort overrides workflow-level effort', async () => { @@ -4588,13 +4637,15 @@ describe('executeDagWorkflow -- Claude SDK advanced options', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - expect(optionsArg?.effort).toBe('max'); + const nodeConfig = optionsArg?.nodeConfig as Record; + expect(nodeConfig?.effort).toBe('max'); }); it('warns user when Codex node has Claude-only options (effort)', async () => { - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'codex', + getCapabilities: mockCodexCapabilities, })); const mockDeps = createMockDeps(); @@ -4637,12 +4688,13 @@ describe('executeDagWorkflow -- cost tracking', () => { await writeFile(join(commandsDir, 'my-cmd.md'), 'My command prompt'); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); + mockGetAgentProviderDag.mockClear(); mockLogFn.mockClear(); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -4835,16 +4887,17 @@ describe('executeDagWorkflow -- script nodes', () => { await mkdir(testDir, { recursive: true }); mockSendQueryDag.mockClear(); - mockGetAssistantClientDag.mockClear(); + mockGetAgentProviderDag.mockClear(); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'DAG AI response' }; yield { type: 'result', sessionId: 'dag-session-id' }; }); - mockGetAssistantClientDag.mockImplementation(() => ({ + mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -5209,4 +5262,53 @@ describe('executeDagWorkflow -- script nodes', () => { const notFoundMsg = messages.find((m: string) => m.includes('not found in .archon/scripts/')); expect(notFoundMsg).toBeDefined(); }); + + it('bun script node does not leak repo .env from execution cwd (#1135)', async () => { + // Regression test: place a .env with a marker in the execution cwd. + // The bun script must NOT see it because --no-env-file is passed. + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('env-leak-run-id', { + workflow_name: 'env-leak-test', + conversation_id: 'conv-env-leak', + user_message: 'env leak test', + }); + + // Write a .env with a marker in the script execution cwd + await writeFile(join(testDir, '.env'), 'LEAKED_REPO_SECRET=should_not_appear\n'); + + const scriptNode: ScriptNode = { + id: 'env-check', + script: 'console.log(process.env.LEAKED_REPO_SECRET ?? "CLEAN")', + runtime: 'bun', + }; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-env-leak', + testDir, + { name: 'env-leak-test', nodes: [scriptNode] }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // The node output should be "CLEAN" — the repo .env was not loaded + const eventCalls = (mockDeps.store.createWorkflowEvent as ReturnType).mock.calls; + const completedEvent = eventCalls.find( + (call: unknown[]) => + (call[0] as { event_type: string }).event_type === 'node_completed' && + (call[0] as { step_name: string }).step_name === 'env-check' + ); + expect(completedEvent).toBeDefined(); + expect((completedEvent![0] as { data: { node_output: string } }).data.node_output).toBe( + 'CLEAN' + ); + }); }); diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index facfbd1068..c0af88a140 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -5,18 +5,21 @@ * Independent nodes within the same layer run concurrently via Promise.allSettled. * Captures all assistant output regardless of streaming mode for $node_id.output substitution. */ -import { readFile } from 'fs/promises'; -import { resolve, isAbsolute } from 'path'; +import { resolve } from 'path'; import { execFileAsync } from '@archon/git'; import { discoverScripts } from './script-discovery'; import type { - WorkflowAssistantOptions, IWorkflowPlatform, WorkflowMessageMetadata, - WorkflowTokenUsage, WorkflowConfig, WorkflowDeps, } from './deps'; +import type { + SendQueryOptions, + NodeConfig, + ProviderCapabilities, + TokenUsage, +} from '@archon/providers/types'; import type { DagNode, ApprovalNode, @@ -28,7 +31,6 @@ import type { NodeOutput, TriggerRule, WorkflowRun, - WorkflowNodeHooks, EffortLevel, ThinkingConfig, SandboxSettings, @@ -228,137 +230,16 @@ export function substituteNodeOutputRefs( ); } -/** SDK-compatible hook structure returned by buildSDKHooksFromYAML */ -type SDKHooksMap = NonNullable; - -/** - * Convert declarative YAML hook definitions to SDK HookCallbackMatcher arrays. - * Each YAML matcher's `response` is wrapped in `async () => response`. - */ -export function buildSDKHooksFromYAML(nodeHooks: WorkflowNodeHooks): SDKHooksMap { - const sdkHooks: SDKHooksMap = {}; - - for (const [event, matchers] of Object.entries(nodeHooks)) { - if (!matchers) continue; - sdkHooks[event] = matchers.map(m => ({ - ...(m.matcher ? { matcher: m.matcher } : {}), - hooks: [async (): Promise => m.response], - ...(m.timeout ? { timeout: m.timeout } : {}), - })); - } - - if (Object.keys(sdkHooks).length === 0) { - getLog().warn({ nodeHooksKeys: Object.keys(nodeHooks) }, 'dag.hooks_build_produced_empty_map'); - } - - return sdkHooks; -} - -/** - * Load MCP server config from a JSON file and expand environment variables. - * Format: Record matching the SDK's expected shape. - * $VAR_NAME references in env/headers values are expanded from process.env. - * Secrets are NEVER logged. - */ -export async function loadMcpConfig( - mcpPath: string, - cwd: string -): Promise<{ servers: Record; serverNames: string[]; missingVars: string[] }> { - const fullPath = isAbsolute(mcpPath) ? mcpPath : resolve(cwd, mcpPath); - - let raw: string; - try { - raw = await readFile(fullPath, 'utf-8'); - } catch (err) { - const e = err as NodeJS.ErrnoException; - if (e.code === 'ENOENT') { - throw new Error(`MCP config file not found: ${mcpPath} (resolved to ${fullPath})`); - } - throw new Error(`Failed to read MCP config file: ${mcpPath} — ${e.message}`); - } - - let parsed: Record; - try { - parsed = JSON.parse(raw) as Record; - } catch (parseErr) { - const detail = (parseErr as SyntaxError).message; - throw new Error(`MCP config file is not valid JSON: ${mcpPath} — ${detail}`); - } - - if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { - throw new Error(`MCP config must be a JSON object (Record): ${mcpPath}`); - } - - const { expanded, missingVars } = expandEnvVars(parsed); - const serverNames = Object.keys(expanded); - - return { servers: expanded, serverNames, missingVars }; -} - -/** - * Expand $VAR_NAME references in a string-valued record from process.env. - * Undefined env vars are replaced with empty string; their names are collected in missingVars. - * Non-string values are coerced to string with a warning. - */ -function expandEnvVarsInRecord( - record: Record, - missingVars: string[] -): Record { - const result: Record = {}; - for (const [key, val] of Object.entries(record)) { - if (typeof val !== 'string') { - getLog().warn({ key, valueType: typeof val }, 'dag.mcp_env_value_coerced_to_string'); - result[key] = String(val); - continue; - } - result[key] = val.replace(/\$([A-Z_][A-Z0-9_]*)/g, (_, varName: string) => { - const envVal = process.env[varName]; - if (envVal === undefined) { - missingVars.push(varName); - } - return envVal ?? ''; - }); - } - return result; -} - -/** - * Expand $VAR_NAME references in 'env' and 'headers' string values from process.env. - * Other fields (command, args, url) are left untouched. - * Undefined env vars are replaced with empty string and collected in missingVars. - */ -function expandEnvVars(config: Record): { - expanded: Record; - missingVars: string[]; -} { - const result: Record = {}; - const missingVars: string[] = []; - for (const [serverName, serverConfig] of Object.entries(config)) { - if (typeof serverConfig !== 'object' || serverConfig === null) { - getLog().warn( - { serverName, valueType: typeof serverConfig }, - 'dag.mcp_server_config_not_object' - ); - continue; - } - const server = { ...(serverConfig as Record) }; - if (server.env && typeof server.env === 'object') { - server.env = expandEnvVarsInRecord(server.env as Record, missingVars); - } - if (server.headers && typeof server.headers === 'object') { - server.headers = expandEnvVarsInRecord( - server.headers as Record, - missingVars - ); - } - result[serverName] = server; - } - return { expanded: result, missingVars }; -} +// buildSDKHooksFromYAML moved to @archon/providers/src/claude/provider.ts +// loadMcpConfig moved to @archon/providers/src/claude/provider.ts /** * Resolve per-node provider and model. * Node-level overrides take precedence over workflow defaults. + * + * Provider-agnostic: builds universal base options + raw nodeConfig. + * The provider internally translates nodeConfig to SDK-specific options. + * Capability warnings inform users when features are unsupported. */ async function resolveNodeProviderAndModel( node: DagNode, @@ -368,12 +249,13 @@ async function resolveNodeProviderAndModel( platform: IWorkflowPlatform, conversationId: string, workflowRunId: string, - cwd: string, - workflowLevelOptions: WorkflowLevelOptions + _cwd: string, + workflowLevelOptions: WorkflowLevelOptions, + deps: WorkflowDeps ): Promise<{ provider: 'claude' | 'codex'; model: string | undefined; - options: WorkflowAssistantOptions | undefined; + options: SendQueryOptions | undefined; }> { let provider: 'claude' | 'codex'; @@ -397,225 +279,90 @@ async function resolveNodeProviderAndModel( ); } - // Warn if Codex node has allowed_tools or denied_tools (unsupported per-call) - if ( - provider === 'codex' && - (node.allowed_tools !== undefined || node.denied_tools !== undefined) - ) { - getLog().warn({ nodeId: node.id }, 'dag_node_tool_restrictions_ignored_codex'); - const delivered = await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' has allowed_tools/denied_tools set but uses Codex — per-node tool restrictions are not supported for Codex. Configure MCP servers globally in the Codex CLI config instead.`, - { workflowId: workflowRunId, nodeName: node.id } - ); - if (!delivered) { - getLog().error({ nodeId: node.id, workflowRunId }, 'dag_node_codex_warning_delivery_failed'); + // Get provider capabilities for capability warnings + const aiClient = deps.getAgentProvider(provider); + const caps = aiClient.getCapabilities(); + + // Capability warnings — inform users when features are unsupported + const capChecks: [string, keyof ProviderCapabilities, boolean][] = [ + [ + 'allowed_tools/denied_tools', + 'toolRestrictions', + node.allowed_tools !== undefined || node.denied_tools !== undefined, + ], + ['hooks', 'hooks', node.hooks !== undefined], + ['mcp', 'mcp', node.mcp !== undefined], + ['skills', 'skills', node.skills !== undefined && node.skills.length > 0], + ['effort', 'effortControl', (node.effort ?? workflowLevelOptions.effort) !== undefined], + ['thinking', 'thinkingControl', (node.thinking ?? workflowLevelOptions.thinking) !== undefined], + ['maxBudgetUsd', 'costControl', node.maxBudgetUsd !== undefined], + [ + 'fallbackModel', + 'fallbackModel', + (node.fallbackModel ?? workflowLevelOptions.fallbackModel) !== undefined, + ], + ['sandbox', 'sandbox', (node.sandbox ?? workflowLevelOptions.sandbox) !== undefined], + ]; + + const unsupported: string[] = []; + for (const [field, cap, isSet] of capChecks) { + if (isSet && !caps[cap]) { + unsupported.push(field); } } - // Warn if Codex node has hooks (unsupported) - if (provider === 'codex' && node.hooks) { - getLog().warn({ nodeId: node.id }, 'dag_node_hooks_ignored_codex'); + if (unsupported.length > 0) { + getLog().warn({ nodeId: node.id, provider, unsupported }, 'dag.unsupported_capabilities'); const delivered = await safeSendMessage( platform, conversationId, - `Warning: Node '${node.id}' has hooks set but uses Codex provider — hooks are Claude-only and will be ignored.`, + `Warning: Node '${node.id}' uses ${unsupported.join(', ')} but ${provider} doesn't support ${unsupported.length === 1 ? 'it' : 'them'} — ${unsupported.length === 1 ? 'this will be' : 'these will be'} ignored.`, { workflowId: workflowRunId, nodeName: node.id } ); if (!delivered) { - getLog().error({ nodeId: node.id, workflowRunId }, 'dag_node_hooks_warning_delivery_failed'); + getLog().error({ nodeId: node.id, workflowRunId }, 'dag.capability_warning_delivery_failed'); } } - // Warn if Codex node has mcp (unsupported per-call) - if (provider === 'codex' && node.mcp) { - getLog().warn({ nodeId: node.id }, 'dag.mcp_ignored_codex'); - const delivered = await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' has mcp config but uses Codex — per-node MCP servers are not supported for Codex. Configure MCP servers globally in the Codex CLI config instead.`, - { workflowId: workflowRunId, nodeName: node.id } - ); - if (!delivered) { - getLog().error({ nodeId: node.id, workflowRunId }, 'dag.mcp_warning_delivery_failed'); - } + // Build universal base options + const baseOptions: SendQueryOptions = {}; + if (model) baseOptions.model = model; + if (config.envVars && Object.keys(config.envVars).length > 0) { + baseOptions.env = config.envVars; } - - // Warn if Codex node has skills (unsupported) - if (provider === 'codex' && node.skills) { - getLog().warn({ nodeId: node.id }, 'dag.skills_ignored_codex'); - const delivered = await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' has skills set but uses Codex — per-node skills are not supported for Codex.`, - { workflowId: workflowRunId, nodeName: node.id } - ); - if (!delivered) { - getLog().error({ nodeId: node.id, workflowRunId }, 'dag.skills_warning_delivery_failed'); - } + if (node.systemPrompt !== undefined) baseOptions.systemPrompt = node.systemPrompt; + if (node.maxBudgetUsd !== undefined) baseOptions.maxBudgetUsd = node.maxBudgetUsd; + const fb = node.fallbackModel ?? workflowLevelOptions.fallbackModel; + if (fb) baseOptions.fallbackModel = fb; + if (node.output_format) { + baseOptions.outputFormat = { type: 'json_schema', schema: node.output_format }; } - // Warn if Codex node has Claude-only SDK options (effort, thinking, maxBudgetUsd, systemPrompt, fallbackModel, betas, sandbox) - if (provider === 'codex') { - const claudeOnlyFields = [ - ['effort', node.effort ?? workflowLevelOptions.effort], - ['thinking', node.thinking ?? workflowLevelOptions.thinking], - ['maxBudgetUsd', node.maxBudgetUsd], - ['systemPrompt', node.systemPrompt], - ['fallbackModel', node.fallbackModel ?? workflowLevelOptions.fallbackModel], - ['betas', node.betas ?? workflowLevelOptions.betas], - ['sandbox', node.sandbox ?? workflowLevelOptions.sandbox], - ] as const; - const present = claudeOnlyFields.filter(([, val]) => val !== undefined).map(([name]) => name); - if (present.length > 0) { - getLog().warn({ nodeId: node.id, fields: present }, 'dag.claude_options_ignored_codex'); - const delivered = await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' has Claude-only options (${present.join(', ')}) but uses Codex — these will be ignored.`, - { workflowId: workflowRunId, nodeName: node.id } - ); - if (!delivered) { - getLog().error( - { nodeId: node.id, workflowRunId }, - 'dag.claude_options_warning_delivery_failed' - ); - } - } - } - - let options: WorkflowAssistantOptions | undefined; - if (provider === 'codex') { - options = { - model, - modelReasoningEffort: config.assistants.codex.modelReasoningEffort, - webSearchMode: config.assistants.codex.webSearchMode, - additionalDirectories: config.assistants.codex.additionalDirectories, - }; - if (node.output_format) { - options.outputFormat = { type: 'json_schema', schema: node.output_format }; - } - } else { - const claudeOptions: WorkflowAssistantOptions = {}; - if (model) claudeOptions.model = model; - // Propagate settingSources from config (controls which CLAUDE.md files the SDK loads) - if (config.assistants.claude.settingSources) { - claudeOptions.settingSources = config.assistants.claude.settingSources; - } - if (provider === 'claude' && node.output_format) { - claudeOptions.outputFormat = { - type: 'json_schema', - schema: node.output_format, - }; - } - if (node.allowed_tools !== undefined) claudeOptions.tools = node.allowed_tools; - if (node.denied_tools !== undefined) claudeOptions.disallowedTools = node.denied_tools; - if (node.hooks) { - const builtHooks = buildSDKHooksFromYAML(node.hooks); - if (Object.keys(builtHooks).length > 0) claudeOptions.hooks = builtHooks; - } - // Load MCP config if specified - if (node.mcp) { - try { - const { servers, serverNames, missingVars } = await loadMcpConfig(node.mcp, cwd); - // loadMcpConfig returns Record from JSON; cast to the structural - // union type — the SDK validates server configs at connection time - claudeOptions.mcpServers = servers as unknown as WorkflowAssistantOptions['mcpServers']; - // Auto-allow all MCP tools via wildcards - const mcpWildcards = serverNames.map(name => `mcp__${name}__*`); - claudeOptions.allowedTools = [...(claudeOptions.allowedTools ?? []), ...mcpWildcards]; - getLog().info({ nodeId: node.id, serverNames, mcpPath: node.mcp }, 'dag.mcp_config_loaded'); - // Warn user about missing env vars (likely secrets that will cause auth failures) - if (missingVars.length > 0) { - const uniqueVars = [...new Set(missingVars)]; - getLog().warn({ nodeId: node.id, missingVars: uniqueVars }, 'dag.mcp_env_vars_missing'); - const delivered = await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' MCP config references undefined env vars: ${uniqueVars.join(', ')}. These will be empty strings — MCP servers may fail to authenticate.`, - { workflowId: workflowRunId, nodeName: node.id } - ); - if (!delivered) { - getLog().error( - { nodeId: node.id, workflowRunId }, - 'dag.mcp_env_vars_warning_delivery_failed' - ); - } - } - // Warn if Haiku model is used with MCP (tool search not supported) - if (model?.toLowerCase().includes('haiku')) { - getLog().warn({ nodeId: node.id, model }, 'dag.mcp_haiku_tool_search_unsupported'); - const haikuDelivered = await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' uses Haiku model with MCP servers — tool search (lazy loading for many tools) is not supported on Haiku. Consider using Sonnet or Opus.`, - { workflowId: workflowRunId, nodeName: node.id } - ); - if (!haikuDelivered) { - getLog().error( - { nodeId: node.id, workflowRunId }, - 'dag.mcp_haiku_warning_delivery_failed' - ); - } - } - } catch (mcpErr) { - const errMsg = (mcpErr as Error).message; - getLog().error( - { nodeId: node.id, mcpPath: node.mcp, error: errMsg }, - 'dag.mcp_config_load_failed' - ); - throw new Error(`Node '${node.id}': ${errMsg}`); - } - } - // Wrap node in AgentDefinition when skills are specified - if (node.skills) { - const agentId = `dag-node-${node.id}`; - // Always include 'Skill' explicitly — SDK behavior for undefined tools is undocumented - const agentTools = claudeOptions.tools ? [...claudeOptions.tools, 'Skill'] : ['Skill']; - const agentDef: { - description: string; - prompt: string; - skills: string[]; - tools: string[]; - model?: string; - } = { - description: `DAG node '${node.id}'`, - prompt: `You have preloaded skills: ${node.skills.join(', ')}. Use them when relevant.`, - skills: node.skills, - tools: agentTools, - }; - if (claudeOptions.model) agentDef.model = claudeOptions.model; + // Build raw nodeConfig — provider translates internally + const nodeConfig: NodeConfig = { + mcp: node.mcp, + hooks: node.hooks, + skills: node.skills, + allowed_tools: node.allowed_tools, + denied_tools: node.denied_tools, + effort: node.effort ?? workflowLevelOptions.effort, + thinking: node.thinking ?? workflowLevelOptions.thinking, + sandbox: node.sandbox ?? workflowLevelOptions.sandbox, + betas: node.betas ?? workflowLevelOptions.betas, + output_format: node.output_format, + maxBudgetUsd: node.maxBudgetUsd, + systemPrompt: node.systemPrompt, + fallbackModel: fb, + }; - claudeOptions.agents = { [agentId]: agentDef }; - claudeOptions.agent = agentId; - // Ensure 'Skill' is in allowedTools for the parent session - if (!claudeOptions.allowedTools?.includes('Skill')) { - claudeOptions.allowedTools = [...(claudeOptions.allowedTools ?? []), 'Skill']; - } - getLog().info({ nodeId: node.id, skills: node.skills, agentId }, 'dag.skills_agent_created'); - } - // Inject per-project env vars (config file + DB) into subprocess env - if (config.envVars && Object.keys(config.envVars).length > 0) { - claudeOptions.env = config.envVars; - } + // Pass assistantConfig from config — provider parses internally + const assistantConfig = config.assistants[provider] ?? {}; - // Per-node overrides take precedence over workflow-level defaults; maxBudgetUsd and systemPrompt are per-node only - const effort = node.effort ?? workflowLevelOptions.effort; - if (effort !== undefined) claudeOptions.effort = effort; - const thinking = node.thinking ?? workflowLevelOptions.thinking; - if (thinking !== undefined) claudeOptions.thinking = thinking; - if (node.maxBudgetUsd !== undefined) claudeOptions.maxBudgetUsd = node.maxBudgetUsd; - if (node.systemPrompt !== undefined) claudeOptions.systemPrompt = node.systemPrompt; - const fallbackModel = node.fallbackModel ?? workflowLevelOptions.fallbackModel; - if (fallbackModel !== undefined) claudeOptions.fallbackModel = fallbackModel; - const betas = node.betas ?? workflowLevelOptions.betas; - if (betas !== undefined) claudeOptions.betas = betas; - const sandbox = node.sandbox ?? workflowLevelOptions.sandbox; - if (sandbox !== undefined) claudeOptions.sandbox = sandbox; - - options = Object.keys(claudeOptions).length > 0 ? claudeOptions : undefined; - } + const options: SendQueryOptions = { + ...baseOptions, + nodeConfig, + assistantConfig: assistantConfig as Record, + }; return { provider, model, options }; } @@ -717,7 +464,7 @@ async function executeNodeInternal( workflowRun: WorkflowRun, node: CommandNode | PromptNode, provider: 'claude' | 'codex', - nodeOptions: WorkflowAssistantOptions | undefined, + nodeOptions: SendQueryOptions | undefined, artifactsDir: string, logDir: string, baseBranch: string, @@ -819,13 +566,13 @@ async function executeNodeInternal( // Substitute upstream node output references const finalPrompt = substituteNodeOutputRefs(substitutedPrompt, nodeOutputs); - const aiClient = deps.getAssistantClient(provider); + const aiClient = deps.getAgentProvider(provider); const streamingMode = platform.getStreamingMode(); let nodeOutputText = ''; // Always accumulate regardless of streaming mode let structuredOutput: unknown; let newSessionId: string | undefined; - let nodeTokens: WorkflowTokenUsage | undefined; + let nodeTokens: TokenUsage | undefined; let nodeCostUsd: number | undefined; let nodeStopReason: string | undefined; let nodeNumTurns: number | undefined; @@ -836,7 +583,7 @@ async function executeNodeInternal( const nodeAbortController = new AbortController(); // Fork when resuming — leaves the source session untouched so retries are safe. const shouldForkSession = resumeSessionId !== undefined; - const nodeOptionsWithAbort: WorkflowAssistantOptions | undefined = { + const nodeOptionsWithAbort: SendQueryOptions | undefined = { ...nodeOptions, abortSignal: nodeAbortController.signal, ...(shouldForkSession ? { forkSession: true } : {}), @@ -1026,11 +773,16 @@ async function executeNodeInternal( } break; // Result is the "I'm done" signal — don't wait for subprocess to exit } else if (msg.type === 'system' && msg.content) { - // Surface MCP connection failures to the user - if (msg.content.startsWith('MCP server connection failed:')) { + // Forward provider warnings (⚠️) and MCP connection failures to the user. + // Providers yield system chunks for user-actionable issues (missing env vars, + // Haiku+MCP, structured output failures, etc.) + if ( + msg.content.startsWith('MCP server connection failed:') || + msg.content.startsWith('⚠️') + ) { getLog().warn( - { nodeId: node.id, mcpStatus: msg.content }, - 'dag.mcp_server_connection_failed' + { nodeId: node.id, systemContent: msg.content }, + 'dag.provider_warning_forwarded' ); const delivered = await safeSendMessage( platform, @@ -1040,8 +792,8 @@ async function executeNodeInternal( ); if (!delivered) { getLog().error( - { nodeId: node.id, mcpStatus: msg.content, workflowRunId: workflowRun.id }, - 'dag.mcp_connection_failure_delivery_failed' + { nodeId: node.id, workflowRunId: workflowRun.id }, + 'dag.provider_warning_delivery_failed' ); } } else { @@ -1054,8 +806,10 @@ async function executeNodeInternal( // rate_limit chunks: already log.warn'd in claude.ts; not surfaced to SSE per design } - // When output_format is set and the SDK returned structured_output, - // use it instead of the concatenated assistant text (which includes prose) + // When output_format is set and the provider returned structured_output, + // use it instead of the concatenated assistant text (which includes prose). + // Each provider normalizes its own structured output onto the result chunk — + // no provider-specific branching here. if (nodeOptions?.outputFormat) { if (structuredOutput !== undefined) { try { @@ -1070,26 +824,9 @@ async function executeNodeInternal( ); } getLog().debug({ nodeId: node.id, streamingMode }, 'dag.structured_output_override'); - } else if (provider === 'codex') { - // Codex returns structured output inline in agent_message text - // (already accumulated in nodeOutputText). Validate it is valid JSON - // so downstream $nodeId.output.field references can parse it. - try { - JSON.parse(nodeOutputText); - getLog().debug({ nodeId: node.id }, 'dag.codex_structured_output_valid_json'); - } catch { - getLog().warn( - { nodeId: node.id, outputPreview: nodeOutputText.slice(0, 200) }, - 'dag.codex_structured_output_not_json' - ); - await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' requested output_format but Codex returned non-JSON output. Downstream conditions referencing \`$${node.id}.output.field\` may not evaluate correctly.`, - nodeContext - ); - } } else { + // Provider did not populate structuredOutput — warn the user. + // If the provider detected invalid output, it already yielded a system warning. getLog().warn( { nodeId: node.id, workflowRunId: workflowRun.id }, 'dag.structured_output_missing' @@ -1097,7 +834,7 @@ async function executeNodeInternal( await safeSendMessage( platform, conversationId, - `Warning: Node '${node.id}' requested output_format but the SDK did not return structured output. Downstream conditions may not evaluate correctly.`, + `Warning: Node '${node.id}' requested output_format but the provider did not return structured output. Downstream conditions may not evaluate correctly.`, nodeContext ); } @@ -1519,7 +1256,10 @@ async function executeScriptNode( // Inline code execution if (node.runtime === 'bun') { cmd = 'bun'; - args = ['-e', finalScript]; + // --no-env-file prevents Bun from auto-loading .env from the execution + // cwd (the target repo). Without this, repo .env leaks into the script + // subprocess despite Archon's parent process cleanup. + args = ['--no-env-file', '-e', finalScript]; } else { // uv run --with dep1 --with dep2 python -c cmd = 'uv'; @@ -1569,7 +1309,7 @@ async function executeScriptNode( args = ['run', ...withFlags, scriptDef.path]; } else { cmd = 'bun'; - args = ['run', scriptDef.path]; + args = ['--no-env-file', 'run', scriptDef.path]; } } @@ -1663,30 +1403,32 @@ async function executeScriptNode( } /** - * Build WorkflowAssistantOptions from resolved provider, model, and config. - * Caller is responsible for resolving per-node overrides before passing model. + * Build SendQueryOptions from resolved provider, model, and config. + * Uses the same nodeConfig + assistantConfig pattern as resolveNodeProviderAndModel. */ function buildLoopNodeOptions( provider: 'claude' | 'codex', model: string | undefined, - config: WorkflowConfig -): WorkflowAssistantOptions | undefined { - const codexOptions = - provider === 'codex' - ? { - modelReasoningEffort: config.assistants.codex.modelReasoningEffort, - webSearchMode: config.assistants.codex.webSearchMode, - additionalDirectories: config.assistants.codex.additionalDirectories, - } - : undefined; - - const claudeOptions = - provider === 'claude' && config.assistants.claude.settingSources - ? { settingSources: config.assistants.claude.settingSources } - : undefined; - - if (!model && !codexOptions && !claudeOptions) return undefined; - return { ...(model ? { model } : {}), ...codexOptions, ...claudeOptions }; + config: WorkflowConfig, + workflowLevelOptions?: WorkflowLevelOptions +): SendQueryOptions { + const options: SendQueryOptions = {}; + if (model) options.model = model; + if (config.envVars && Object.keys(config.envVars).length > 0) { + options.env = config.envVars; + } + options.assistantConfig = (config.assistants[provider] ?? {}) as Record; + // Pass workflow-level options as nodeConfig so providers can apply them + if (workflowLevelOptions) { + options.nodeConfig = { + effort: workflowLevelOptions.effort, + thinking: workflowLevelOptions.thinking, + sandbox: workflowLevelOptions.sandbox, + betas: workflowLevelOptions.betas, + fallbackModel: workflowLevelOptions.fallbackModel, + }; + } + return options; } /** @@ -1712,15 +1454,16 @@ async function executeLoopNode( docsDir: string, nodeOutputs: Map, config: WorkflowConfig, - issueContext?: string + issueContext?: string, + workflowLevelOptions?: WorkflowLevelOptions ): Promise { const loop = node.loop; const msgContext = { workflowId: workflowRun.id, nodeName: node.id }; // Resolve AI client — fail fast with descriptive error - let aiClient: ReturnType; + let aiClient: ReturnType; try { - aiClient = deps.getAssistantClient(workflowProvider); + aiClient = deps.getAgentProvider(workflowProvider); } catch (error) { const err = error as Error; const errorMsg = `Invalid provider '${workflowProvider}' for loop node '${node.id}'. Check workflow YAML or .archon/config.yaml. Original: ${err.message}`; @@ -1745,7 +1488,12 @@ async function executeLoopNode( let loopTotalCostUsd: number | undefined; let loopFinalStopReason: string | undefined; let loopTotalNumTurns: number | undefined; - const resolvedOptions = buildLoopNodeOptions(workflowProvider, workflowModel, config); + const resolvedOptions = buildLoopNodeOptions( + workflowProvider, + workflowModel, + config, + workflowLevelOptions + ); // Helper to log event store errors consistently const logEventStoreError = (err: Error, iteration: number): void => { @@ -1817,7 +1565,7 @@ async function executeLoopNode( ); const finalPrompt = substituteNodeOutputRefs(substitutedPrompt, nodeOutputs); - const iterationOptions: WorkflowAssistantOptions | undefined = { + const iterationOptions: SendQueryOptions | undefined = { ...resolvedOptions, abortSignal: iterationAbortController.signal, }; @@ -2283,7 +2031,8 @@ async function executeApprovalNode( conversationId, workflowRun.id, cwd, - workflowLevelOptions + workflowLevelOptions, + deps ); const output = await executeNodeInternal( @@ -2643,7 +2392,8 @@ export async function executeDagWorkflow( docsDir, nodeOutputs, config, - issueContext + issueContext, + workflowLevelOptions ); return { nodeId: node.id, output }; } @@ -2733,7 +2483,8 @@ export async function executeDagWorkflow( conversationId, workflowRun.id, cwd, - workflowLevelOptions + workflowLevelOptions, + deps ); // 5. Determine session — parallel or context:fresh → always fresh diff --git a/packages/workflows/src/deps.ts b/packages/workflows/src/deps.ts index ce586a177b..171c653be7 100644 --- a/packages/workflows/src/deps.ts +++ b/packages/workflows/src/deps.ts @@ -3,50 +3,37 @@ * * Defines narrow interfaces for what the workflow engine needs from external systems. * Callers in @archon/core satisfy these structurally — no adapter wrappers needed. + * + * Provider types are imported directly from @archon/providers/types (contract layer). + * No more mirror copies — single source of truth for IAgentProvider, MessageChunk, etc. */ import type { IWorkflowStore } from './store'; +import type { ModelReasoningEffort, WebSearchMode } from './schemas'; import type { - ModelReasoningEffort, - WebSearchMode, - EffortLevel, - ThinkingConfig, - SandboxSettings, -} from './schemas'; + IAgentProvider, + MessageChunk, + TokenUsage, + SendQueryOptions, + NodeConfig, + ProviderCapabilities, +} from '@archon/providers/types'; -// --------------------------------------------------------------------------- -// Workflow-local type copies — structurally identical to the originals in -// @archon/core/types, but duplicated here to avoid a circular dependency -// (@archon/workflows must not depend on @archon/core). -// Keep these in sync with their counterparts if the originals change. -// --------------------------------------------------------------------------- +// Re-export provider types so existing workflow engine consumers don't break +export type { + IAgentProvider, + MessageChunk, + TokenUsage, + SendQueryOptions, + NodeConfig, + ProviderCapabilities, +}; -export interface WorkflowTokenUsage { - input: number; - output: number; - total?: number; - cost?: number; -} +// Backwards compat alias — deprecated, prefer direct import from @archon/providers/types +export type WorkflowTokenUsage = TokenUsage; -export type WorkflowMessageChunk = - | { type: 'assistant'; content: string } - | { type: 'system'; content: string } - | { type: 'thinking'; content: string } - | { - type: 'result'; - sessionId?: string; - tokens?: WorkflowTokenUsage; - structuredOutput?: unknown; - isError?: boolean; - errorSubtype?: string; - cost?: number; - stopReason?: string; - numTurns?: number; - modelUsage?: Record; - } - | { type: 'rate_limit'; rateLimitInfo: Record } - | { type: 'tool'; toolName: string; toolInput?: Record } - | { type: 'tool_result'; toolName: string; toolOutput: string } - | { type: 'workflow_dispatch'; workerConversationId: string; workflowName: string }; +// --------------------------------------------------------------------------- +// Platform-specific types (NOT mirrors — unique to workflow engine) +// --------------------------------------------------------------------------- export interface WorkflowMessageMetadata { category?: @@ -60,144 +47,8 @@ export interface WorkflowMessageMetadata { workflowResult?: { workflowName: string; runId: string }; } -export interface WorkflowAssistantOptions { - model?: string; - modelReasoningEffort?: ModelReasoningEffort; - webSearchMode?: WebSearchMode; - additionalDirectories?: string[]; - /** - * Controls which CLAUDE.md files the SDK loads. - * Mirrors Claude Agent SDK Options.settingSources. - * Claude only — ignored for Codex. - */ - settingSources?: ('project' | 'user')[]; - tools?: string[]; - disallowedTools?: string[]; - outputFormat?: { type: 'json_schema'; schema: Record }; - /** - * SDK hooks callbacks. Structural match for Partial>. - * Inline type avoids @archon/workflows depending on @anthropic-ai/claude-agent-sdk. - * Claude only — ignored for Codex. - */ - hooks?: Partial< - Record< - string, - { - matcher?: string; - hooks: (( - input: unknown, - toolUseID: string | undefined, - options: { signal: AbortSignal } - ) => Promise)[]; - timeout?: number; - }[] - > - >; - /** - * MCP server configuration. Structural match for Record. - * Discriminated union mirrors the SDK types so that WorkflowAssistantOptions is - * assignable to AssistantRequestOptions without casts. - * @archon/workflows must not depend on @anthropic-ai/claude-agent-sdk. - * Claude only — ignored for Codex. - */ - mcpServers?: Record< - string, - | { type?: 'stdio'; command: string; args?: string[]; env?: Record } - | { type: 'sse'; url: string; headers?: Record } - | { type: 'http'; url: string; headers?: Record } - >; - /** - * Tools to auto-allow without permission prompts. - * Used for MCP tool wildcards (e.g., 'mcp__github__*'). - * Claude only — ignored for Codex. - */ - allowedTools?: string[]; - /** - * Custom subagent definitions. Structural match for Record. - * Used when a DAG node has skills — the node is wrapped in an AgentDefinition. - * @archon/workflows must not depend on @anthropic-ai/claude-agent-sdk. - * Claude only — ignored for Codex. - */ - agents?: Record< - string, - { - description: string; - prompt: string; - tools?: string[]; - model?: string; - skills?: string[]; - } - >; - /** - * Name of the agent definition to use for the main thread. - * References a key in `agents`. Claude only. - */ - agent?: string; - /** - * Additional env vars to merge into the Claude subprocess environment. - * Merged after buildSubprocessEnv() (auth tokens conditionally filtered): { ...buildSubprocessEnv(), ...env }. - * Claude only — ignored for Codex (Codex SDK does not expose env injection). - */ - env?: Record; - abortSignal?: AbortSignal; - /** - * When false (default), skips writing session transcript to ~/.claude/projects/. - * Claude Agent SDK v0.2.74+. The SDK default is true, but Archon overrides it to false - * to avoid disk pollution. Set to true only when session persistence is explicitly needed. - */ - persistSession?: boolean; - /** - * When true, the SDK copies the prior session's history into a new session file - * before appending, leaving the original untouched. Use with `resume` to safely - * preserve conversation context without risk of corrupting the source session. - * Claude only — ignored for Codex. - */ - forkSession?: boolean; - /** - * Controls reasoning depth for Claude. Claude only — ignored for Codex. - * Maps to SDK Options.effort. - */ - effort?: EffortLevel; - /** - * Controls Claude's thinking/reasoning behavior. Claude only — ignored for Codex. - * Maps to SDK Options.thinking (ThinkingConfig). - * String shorthand is resolved at the schema level before reaching here. - */ - thinking?: ThinkingConfig; - /** - * Maximum USD cost for this node. SDK returns error_max_budget_usd if exceeded. - * Claude only — ignored for Codex. - */ - maxBudgetUsd?: number; - /** - * Per-node system prompt override. Replaces the default claude_code preset. - * Claude only — ignored for Codex. - */ - systemPrompt?: string; - /** - * Fallback model if primary model fails. Claude only — ignored for Codex. - */ - fallbackModel?: string; - /** - * SDK beta features to enable (e.g., 'context-1m-2025-08-07'). - * Claude only — ignored for Codex. - */ - betas?: string[]; - /** - * OS-level sandbox restrictions for the Claude subprocess. - * Layers on top of worktree isolation — NOT a replacement for it. - * Claude only — ignored for Codex. - * Structural match for SDK SandboxSettings. - */ - sandbox?: SandboxSettings; -} - // --------------------------------------------------------------------------- // Narrow platform interface (subset of IPlatformAdapter) -// -// Intentionally excludes ensureThread(), start(), and stop() — the workflow -// engine operates within an already-established conversation context and -// never manages platform lifecycle or threading itself. // --------------------------------------------------------------------------- export interface IWorkflowPlatform { @@ -208,32 +59,12 @@ export interface IWorkflowPlatform { ): Promise; getStreamingMode(): 'stream' | 'batch'; getPlatformType(): string; - sendStructuredEvent?(conversationId: string, event: WorkflowMessageChunk): Promise; + sendStructuredEvent?(conversationId: string, event: MessageChunk): Promise; emitRetract?(conversationId: string): Promise; } -// --------------------------------------------------------------------------- -// Narrow assistant client interface (subset of IAssistantClient) -// --------------------------------------------------------------------------- - -export interface IWorkflowAssistantClient { - sendQuery( - prompt: string, - cwd: string, - resumeSessionId?: string, - options?: WorkflowAssistantOptions - ): AsyncGenerator; - getType(): string; -} - -export type AssistantClientFactory = (provider: 'claude' | 'codex') => IWorkflowAssistantClient; - // --------------------------------------------------------------------------- // Narrow config interface (subset of MergedConfig) -// -// Only includes fields the workflow engine actually reads. Platform-level -// concerns (streaming modes, concurrency, botName, paths, copyDefaults) are -// deliberately excluded — those are @archon/core's responsibility. // --------------------------------------------------------------------------- export interface WorkflowConfig { @@ -241,10 +72,6 @@ export interface WorkflowConfig { assistant: 'claude' | 'codex'; baseBranch?: string; docsPath?: string; - /** - * Merged per-project env vars (config file + DB). Injected into Options.env on Claude SDK calls. - * Populated by executeWorkflow — loadConfig returns file-based vars; DB vars merged on top after. - */ envVars?: Record; commands: { folder?: string }; defaults?: { @@ -254,7 +81,6 @@ export interface WorkflowConfig { assistants: { claude: { model?: string; - /** Controls which CLAUDE.md files are loaded by the SDK. Claude only. */ settingSources?: ('project' | 'user')[]; }; codex: { @@ -266,12 +92,18 @@ export interface WorkflowConfig { }; } +// --------------------------------------------------------------------------- +// Agent provider factory type +// --------------------------------------------------------------------------- + +export type AgentProviderFactory = (provider: 'claude' | 'codex') => IAgentProvider; + // --------------------------------------------------------------------------- // WorkflowDeps — the single injection point // --------------------------------------------------------------------------- export interface WorkflowDeps { store: IWorkflowStore; - getAssistantClient: AssistantClientFactory; + getAgentProvider: AgentProviderFactory; loadConfig: (cwd: string) => Promise; } diff --git a/packages/workflows/src/executor-preamble.test.ts b/packages/workflows/src/executor-preamble.test.ts index fd2b44ec3b..822759040f 100644 --- a/packages/workflows/src/executor-preamble.test.ts +++ b/packages/workflows/src/executor-preamble.test.ts @@ -114,7 +114,7 @@ function makeDeps(store?: IWorkflowStore): WorkflowDeps { commands: { folder: '' }, }) ), - createAssistantClient: mock(() => ({ + getAgentProvider: mock(() => ({ run: mock(async () => {}), })), } as unknown as WorkflowDeps; diff --git a/packages/workflows/src/executor-shared.ts b/packages/workflows/src/executor-shared.ts index 0537609417..e1978ae106 100644 --- a/packages/workflows/src/executor-shared.ts +++ b/packages/workflows/src/executor-shared.ts @@ -67,13 +67,8 @@ export function matchesPattern(message: string, patterns: string[]): boolean { * Classify an error to determine if it's transient (can retry) or fatal (should fail). * FATAL patterns take priority over TRANSIENT patterns to prevent an error message * containing both (e.g. "unauthorized: process exited with code 1") from being retried. - * - * First-party named error types are checked by name (immune to message rewording). */ export function classifyError(error: Error): ErrorType { - // Named first-party errors checked by name — immune to message rewording - if (error.name === 'EnvLeakError') return 'FATAL'; - const message = error.message.toLowerCase(); if (matchesPattern(message, FATAL_PATTERNS)) { diff --git a/packages/workflows/src/executor.test.ts b/packages/workflows/src/executor.test.ts index 0a91ac8299..e3acb784b2 100644 --- a/packages/workflows/src/executor.test.ts +++ b/packages/workflows/src/executor.test.ts @@ -101,7 +101,7 @@ function makeDeps(store?: IWorkflowStore): WorkflowDeps { commands: { folder: '' }, }) ), - createAssistantClient: mock(() => ({ + getAgentProvider: mock(() => ({ run: mock(async () => {}), })), } as unknown as WorkflowDeps; @@ -291,7 +291,7 @@ describe('executeWorkflow', () => { docsPath: 'packages/docs-web/src/content/docs', }) ), - createAssistantClient: mock(() => ({ + getAgentProvider: mock(() => ({ run: mock(async () => {}), })), } as unknown as WorkflowDeps; diff --git a/packages/workflows/src/hooks.test.ts b/packages/workflows/src/hooks.test.ts index 6bdaa6085a..eac6076bac 100644 --- a/packages/workflows/src/hooks.test.ts +++ b/packages/workflows/src/hooks.test.ts @@ -1,6 +1,6 @@ import { describe, test, expect } from 'bun:test'; import { parseNodeHooks } from './loader'; -import { buildSDKHooksFromYAML } from './dag-executor'; +import { buildSDKHooksFromYAML } from '@archon/providers/claude/provider'; import type { WorkflowNodeHooks } from './schemas'; import { parseWorkflow } from './loader'; diff --git a/packages/workflows/src/script-node-deps.test.ts b/packages/workflows/src/script-node-deps.test.ts index 5387daf029..1c1fbf5a81 100644 --- a/packages/workflows/src/script-node-deps.test.ts +++ b/packages/workflows/src/script-node-deps.test.ts @@ -109,7 +109,7 @@ const mockSendQuery = mock(function* () { yield { type: 'result', sessionId: 'session-id' }; }); -const mockGetAssistantClient = mock(() => ({ +const mockGetAgentProvider = mock(() => ({ sendQuery: mockSendQuery, getType: () => 'claude', })); @@ -117,7 +117,7 @@ const mockGetAssistantClient = mock(() => ({ function createMockDeps(): WorkflowDeps { return { store: createMockStore(), - getAssistantClient: mockGetAssistantClient, + getAgentProvider: mockGetAgentProvider, loadConfig: mock(() => Promise.resolve({ assistant: 'claude' as const, @@ -173,7 +173,7 @@ describe('script node deps field — command construction', () => { await mkdir(testDir, { recursive: true }); mockExecFileAsync.mockClear(); mockSendQuery.mockClear(); - mockGetAssistantClient.mockClear(); + mockGetAgentProvider.mockClear(); }); afterEach(async () => { @@ -287,7 +287,7 @@ describe('script node deps field — command construction', () => { expect(args).toEqual(['run', 'python', '-c', 'print("no deps")']); }); - it('bun inline with deps uses bun -e (no extra flags — bun auto-installs)', async () => { + it('bun inline with deps uses bun --no-env-file -e (no extra dep flags — bun auto-installs)', async () => { const node: ScriptNode = { id: 'bun-with-deps', script: 'import { z } from "zod"; console.log(z.string().parse("hello"))', @@ -316,13 +316,13 @@ describe('script node deps field — command construction', () => { expect(scriptCall).toBeDefined(); const [cmd, args] = scriptCall as [string, string[]]; expect(cmd).toBe('bun'); - // No --packages or extra flags — bun auto-installs at runtime - expect(args).toEqual(['-e', node.script]); + // --no-env-file prevents repo .env auto-load; no dep flags — bun auto-installs + expect(args).toEqual(['--no-env-file', '-e', node.script]); expect(args).not.toContain('--packages'); expect(args).not.toContain('--with'); }); - it('bun inline without deps uses bun -e (no extra flags)', async () => { + it('bun inline without deps uses bun --no-env-file -e', async () => { const node: ScriptNode = { id: 'bun-no-deps', script: 'console.log("hello")', @@ -350,7 +350,7 @@ describe('script node deps field — command construction', () => { expect(scriptCall).toBeDefined(); const [cmd, args] = scriptCall as [string, string[]]; expect(cmd).toBe('bun'); - expect(args).toEqual(['-e', 'console.log("hello")']); + expect(args).toEqual(['--no-env-file', '-e', 'console.log("hello")']); }); it('uv named script with deps uses uv run --with flags', async () => { diff --git a/scripts/pr-maintenance-cron.sh b/scripts/pr-maintenance-cron.sh new file mode 100755 index 0000000000..27f01ec8a2 --- /dev/null +++ b/scripts/pr-maintenance-cron.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# pr-maintenance-cron.sh — Run from cron every 15 minutes. +# Zero AI cost when nothing to do. Processes one PR per project per run. +# +# Usage: +# ./scripts/pr-maintenance-cron.sh # all projects +# ./scripts/pr-maintenance-cron.sh cosmic-match reli # specific projects +# +# Crontab entry: +# */15 * * * * /mnt/ext-fast/archon/scripts/pr-maintenance-cron.sh >> /tmp/pr-maintenance.log 2>&1 + +set -euo pipefail + +# Cron runs with a minimal PATH (/usr/bin:/bin). archon, gh, bun, git +# often live in user-local bins; prepend them so the script works from +# both cron and an interactive shell. +export PATH="$HOME/.bun/bin:$HOME/.local/bin:/usr/local/bin:$PATH" + +# --- Configuration --- +source /home/asiri/gt/mayor/scripts/lib/archon-projects.sh +load_archon_projects DEFAULT_PROJECTS +BASE_DIR="/mnt/ext-fast" +LOG_PREFIX="[pr-maintenance]" + +# Use arguments if provided, otherwise all projects +if [ $# -gt 0 ]; then + PROJECTS=("$@") +else + PROJECTS=("${DEFAULT_PROJECTS[@]}") +fi + +log() { echo "$(date -Is) $LOG_PREFIX $*"; } + +for PROJECT in "${PROJECTS[@]}"; do + REPO_DIR="$BASE_DIR/$PROJECT" + + if [ ! -d "$REPO_DIR/.git" ]; then + log "$PROJECT: not a git repo, skipping" + continue + fi + + cd "$REPO_DIR" + + # --- Phase 0: Promote CLEAN draft PRs to ready-for-review --- + # Archon workflows create PRs as drafts by default. When CI is green the + # draft has nothing left to gate on, but the Phase 1 merge filter skips + # drafts — so left alone a green draft sits forever. Flip it to ready so + # Phase 1 can merge it on this same tick. + GREEN_DRAFTS=$(gh pr list --state open --json number,mergeStateStatus,isDraft \ + --jq '[.[] | select(.isDraft == true and .mergeStateStatus == "CLEAN")] | .[].number' 2>/dev/null || true) + + for PR in $GREEN_DRAFTS; do + log "$PROJECT: promoting draft PR #$PR to ready (CI CLEAN)" + if ! gh pr ready "$PR" 2>>"/tmp/pr-maintenance-errors.log"; then + log "$PROJECT: PR #$PR — could not mark ready (see /tmp/pr-maintenance-errors.log)" + fi + done + + # --- Phase 1: Merge CLEAN PRs directly (bash only, zero AI cost) --- + CLEAN_PRS=$(gh pr list --state open --json number,mergeStateStatus,isDraft \ + --jq '[.[] | select(.isDraft == false and .mergeStateStatus == "CLEAN")] | .[].number' 2>/dev/null || true) + + for PR in $CLEAN_PRS; do + log "$PROJECT: PR #$PR is CLEAN — merging directly" + # Surface stderr to the cron log so actual failures (permissions, branch + # protection, etc.) are diagnosable on the next tick instead of vanishing. + if ! gh pr merge "$PR" --squash --auto --delete-branch 2>&1; then + if ! gh pr merge "$PR" --squash --delete-branch 2>&1; then + log "$PROJECT: PR #$PR — could not merge, skipping" + fi + fi + done + + # --- Phase 2: Check for one PR needing AI attention --- + ACTIONABLE=$(gh pr list --state open --json number,mergeStateStatus,isDraft \ + --jq '[.[] | select(.isDraft == false and (.mergeStateStatus == "BEHIND" or .mergeStateStatus == "DIRTY" or .mergeStateStatus == "UNSTABLE" or .mergeStateStatus == "UNKNOWN"))] | .[0].number // empty' 2>/dev/null || true) + + if [ -z "$ACTIONABLE" ]; then + log "$PROJECT: no PRs need AI maintenance" + continue + fi + + log "$PROJECT: PR #$ACTIONABLE needs maintenance — launching archon" + archon workflow run archon-pr-maintenance --cwd "$REPO_DIR" "PR #$ACTIONABLE" & + +done + +# Wait for any background archon runs to complete +wait +log "Done"