diff --git a/.archon/workflows/defaults/archon-architect.yaml b/.archon/workflows/defaults/archon-architect.yaml index a41a75cd33..3c1c42dcdf 100644 --- a/.archon/workflows/defaults/archon-architect.yaml +++ b/.archon/workflows/defaults/archon-architect.yaml @@ -98,7 +98,7 @@ nodes: - For each finding: file, what's wrong, why it matters, estimated effort depends_on: [scan-metrics] context: fresh - denied_tools: [Write, Edit, Bash] + denied_tools: [Edit, Bash] hooks: PostToolUse: - matcher: "Read" diff --git a/.gitignore b/.gitignore index 4b225843ea..07268da412 100644 --- a/.gitignore +++ b/.gitignore @@ -109,3 +109,6 @@ packages/server/.env skills-lock.json test-results/ .archon/ralph/ + +# Unrelated local project +md-quick-view/ diff --git a/eslint.config.mjs b/eslint.config.mjs index 152c4245dd..33e1d1b3fd 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -28,6 +28,7 @@ export default tseslint.config( 'packages/web/components.json', 'packages/web/src/components/ui/**', // shadcn/ui auto-generated components 'packages/web/src/lib/utils.ts', // shadcn/ui utility file + 'md-quick-view/**', // Unrelated local project ], }, diff --git a/packages/workflows/src/defaults/bundled-defaults.generated.ts b/packages/workflows/src/defaults/bundled-defaults.generated.ts index 3c74c57b04..7c7e41db58 100644 --- a/packages/workflows/src/defaults/bundled-defaults.generated.ts +++ b/packages/workflows/src/defaults/bundled-defaults.generated.ts @@ -56,7 +56,7 @@ export const BUNDLED_COMMANDS: Record = { // Bundled default workflows (20 total) export const BUNDLED_WORKFLOWS: Record = { "archon-adversarial-dev": "name: archon-adversarial-dev\ndescription: |\n Use when: User wants to build a complete application from scratch using adversarial development.\n Triggers: \"adversarial dev\", \"adversarial development\", \"build with adversarial\", \"gan dev\",\n \"adversarial build\", \"build app adversarially\", \"adversarial coding\".\n Does: Three-role GAN-inspired development — Planner creates spec with sprints, then a state-machine\n loop alternates between Generator (builds code) and Evaluator (attacks it) with hard pass/fail\n thresholds. The evaluator's job is to BREAK what the generator builds. If any criterion scores\n below 7/10, the sprint goes back to the generator with adversarial feedback. Stops on sprint\n failure after max retries.\n NOT for: Bug fixes, PR reviews, refactoring existing code, simple one-off tasks.\n\n Based on Anthropic's harness design article for long-running application development.\n Separates planning, building, and evaluation into distinct roles with adversarial tension.\nprovider: claude\nmodel: sonnet\n\nnodes:\n # ─── Phase 1: Planning ───────────────────────────────────────────────\n - id: plan\n prompt: |\n You are a product planning expert. Your job is to take a short user prompt and expand it\n into a comprehensive product specification.\n\n ## User Request\n\n $ARGUMENTS\n\n ## Your Task\n\n Write a comprehensive product specification to the file `$ARTIFACTS_DIR/spec.md` using the Write tool.\n\n The spec MUST include ALL of the following sections:\n\n ### 1. Product Overview\n What the product does, who it's for, core value proposition.\n\n ### 2. Tech Stack\n Specific technologies, frameworks, and libraries. Be opinionated — pick concrete choices,\n not \"a modern framework.\" Include exact package names and versions where relevant.\n\n ### 3. Design Language\n Visual style, specific color hex codes, typography choices, component patterns, spacing system.\n\n ### 4. Feature List\n Every feature organized by priority. Be exhaustive.\n\n ### 5. Sprint Plan\n Features broken into 3-6 sprints, ordered by dependency and importance:\n - **Sprint 1** should establish the foundation (project setup, core data models, basic UI shell)\n - Each subsequent sprint builds on the previous\n - Label each sprint clearly: \"Sprint 1: Foundation\", \"Sprint 2: Core Features\", etc.\n - List the specific features/deliverables for each sprint\n\n Be specific and opinionated. The more concrete the spec (exact API paths, specific color codes,\n named libraries), the better the generator can build and the evaluator can test.\n\n IMPORTANT: Write the spec to `$ARTIFACTS_DIR/spec.md` using the Write tool. Do NOT just output\n it as conversation text.\n allowed_tools: [Read, Write, Glob, Grep]\n\n # ─── Phase 2: Workspace Initialization ───────────────────────────────\n - id: init-workspace\n depends_on: [plan]\n bash: |\n ARTIFACTS=\"$ARTIFACTS_DIR\"\n\n # Create directory structure for harness communication\n mkdir -p \"$ARTIFACTS/contracts\"\n mkdir -p \"$ARTIFACTS/feedback\"\n mkdir -p \"$ARTIFACTS/app\"\n\n # Initialize isolated git repo in app directory\n cd \"$ARTIFACTS/app\"\n git init -q\n git commit --allow-empty -m \"Initial commit: adversarial-dev workspace\" -q\n\n # Extract sprint count from spec (find highest \"Sprint N\" reference)\n SPEC=\"$ARTIFACTS/spec.md\"\n SPRINT_COUNT=3\n if [ -f \"$SPEC\" ]; then\n FOUND=$(grep -ioE 'sprint\\s+[0-9]+' \"$SPEC\" | grep -oE '[0-9]+' | sort -n | tail -1)\n if [ -n \"$FOUND\" ] && [ \"$FOUND\" -ge 1 ] 2>/dev/null; then\n SPRINT_COUNT=$FOUND\n fi\n if [ \"$SPRINT_COUNT\" -gt 10 ]; then\n SPRINT_COUNT=10\n fi\n fi\n\n # Write initial state machine file\n cat > \"$ARTIFACTS/state.json\" << 'STATEEOF'\n {\n \"phase\": \"negotiating\",\n \"sprint\": 1,\n \"totalSprints\": SPRINT_COUNT_PLACEHOLDER,\n \"retry\": 0,\n \"maxRetries\": 3,\n \"passThreshold\": 7,\n \"completedSprints\": [],\n \"status\": \"running\"\n }\n STATEEOF\n sed -i \"s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/\" \"$ARTIFACTS/state.json\"\n\n echo \"{\\\"totalSprints\\\": $SPRINT_COUNT, \\\"appDir\\\": \\\"$ARTIFACTS/app\\\", \\\"artifactsDir\\\": \\\"$ARTIFACTS\\\"}\"\n timeout: 30000\n\n # ─── Phase 3: Adversarial Sprint Loop ────────────────────────────────\n #\n # State machine driven by $ARTIFACTS_DIR/state.json\n # Each iteration plays ONE role: negotiator, generator, or evaluator\n # fresh_context ensures genuine separation between roles\n #\n - id: adversarial-sprint\n depends_on: [init-workspace]\n idle_timeout: 600000\n model: claude-opus-4-6[1m]\n loop:\n prompt: |\n # Adversarial Development — Sprint Loop\n\n You are part of a GAN-inspired adversarial development system with three distinct roles.\n Each iteration you play ONE role, determined by the current phase in the state file.\n\n ## FIRST: Read State\n\n Read `$ARTIFACTS_DIR/state.json` to determine:\n - `phase` — which role you play this iteration\n - `sprint` — current sprint number\n - `totalSprints` — how many sprints total\n - `retry` — current retry attempt (0 = first try)\n - `maxRetries` — max retries before hard failure (default 3)\n - `passThreshold` — minimum score to pass (default 7)\n\n Then read `$ARTIFACTS_DIR/spec.md` for product context.\n\n ## Directory Layout\n\n - App source code: `$ARTIFACTS_DIR/app/`\n - Sprint contracts: `$ARTIFACTS_DIR/contracts/sprint-{N}.json`\n - Evaluation feedback: `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`\n - State machine: `$ARTIFACTS_DIR/state.json`\n\n ---\n\n ## ROLE: CONTRACT NEGOTIATOR (phase = \"negotiating\")\n\n You negotiate the success criteria for the current sprint. Play BOTH sides sequentially:\n\n **Step 1 — Generator's Proposal:**\n Read the spec carefully. Identify what Sprint {N} should deliver based on the sprint plan.\n Propose a sprint contract with 5-15 specific, testable criteria.\n\n Each criterion MUST be concrete and verifiable. Examples:\n - GOOD: \"GET /api/tasks returns 200 with JSON array; each item has id (number), title (string), status (string), createdAt (ISO date)\"\n - GOOD: \"Clicking the Add Task button opens a modal with title input, priority dropdown (low/medium/high), and due date picker\"\n - BAD: \"The API works well\"\n - BAD: \"Tasks can be managed\"\n\n **Step 2 — Evaluator's Tightening:**\n Now review your proposal as an adversary. For EACH criterion ask:\n - Is it specific enough to test programmatically?\n - What edge cases are missing? (empty inputs, special characters, concurrent requests)\n - Is the bar high enough, or would sloppy code pass?\n\n Tighten vague criteria. Add edge cases. Raise the bar.\n\n **Write the final contract** to `$ARTIFACTS_DIR/contracts/sprint-{N}.json`:\n ```json\n {\n \"sprintNumber\": ,\n \"features\": [\"feature1\", \"feature2\", ...],\n \"criteria\": [\n {\n \"name\": \"short-kebab-name\",\n \"description\": \"Specific, testable description of what must be true\",\n \"threshold\": 7\n }\n ]\n }\n ```\n\n **Update state.json**: Set `\"phase\": \"building\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: GENERATOR (phase = \"building\")\n\n You are a software engineer. Build features that MUST survive an adversarial evaluator\n who will actively try to break your code.\n\n **Read these files:**\n 1. `$ARTIFACTS_DIR/spec.md` — full product spec (design language, tech stack, all features)\n 2. `$ARTIFACTS_DIR/contracts/sprint-{N}.json` — the contract you must satisfy\n 3. If `retry` > 0: read `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R-1}.json` for the\n evaluator's previous feedback\n\n **If this is a RETRY (retry > 0):**\n Read the feedback CAREFULLY. Every failed criterion must be addressed.\n - If scores were close (5-6) and trending up: REFINE your approach\n - If scores were low (1-4) or the approach is fundamentally broken: PIVOT to a new strategy\n - Address EVERY feedback item — the evaluator WILL check\n - Re-verify each fix by running the code before committing\n\n **Build rules:**\n - All code goes in `$ARTIFACTS_DIR/app/`\n - Build ONE feature at a time, verify it works, then commit:\n ```bash\n cd $ARTIFACTS_DIR/app && git add -A && git commit -m \"feat: description of what was built\"\n ```\n - Install dependencies as needed (npm/bun/pip/etc)\n - Test your code — start the server, hit the endpoints, verify the UI renders\n - Think about what the evaluator will attack: edge cases, error handling, input validation\n - Build defensively — the evaluator's job is to break you\n\n **Update state.json**: Set `\"phase\": \"evaluating\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: EVALUATOR (phase = \"evaluating\")\n\n You are an ADVERSARIAL QA agent. Your mandate is to BREAK what the generator built.\n You are not helpful. You are not generous. You are an attacker.\n\n **CRITICAL CONSTRAINTS:**\n - You are READ-ONLY for source code. NEVER use Write or Edit on files in `$ARTIFACTS_DIR/app/`.\n - You MAY use Bash to run the app, curl endpoints, run test scripts, check behavior.\n - You MUST kill any background processes (servers, watchers) you start BEFORE finishing.\n Use: `pkill -f \"node\\|bun\\|python\\|npm\" 2>/dev/null || true`\n - You MUST score EVERY criterion in the contract. No skipping.\n\n **Scoring guidelines:**\n - **9-10**: Exceptional. Works perfectly including edge cases the contract didn't mention.\n - **7-8**: Solid. Meets the criterion as stated. Minor polish issues at most.\n - **5-6**: Partial. Core functionality exists but fails important edge cases or has bugs.\n - **3-4**: Weak. Barely functional. Major gaps.\n - **1-2**: Broken. Does not work or is not implemented.\n\n Do NOT grade on a curve. Do NOT give benefit of the doubt. A 7 means \"genuinely meets the bar.\"\n If something is broken, say it's broken.\n\n **Read**: `$ARTIFACTS_DIR/contracts/sprint-{N}.json` for the criteria.\n\n **For each criterion:**\n 1. Read the relevant source code\n 2. Run the application (start server, test endpoints, check rendered UI)\n 3. Try to BREAK it — invalid inputs, missing fields, edge cases, error handling gaps\n 4. Score it honestly\n\n **Write evaluation** to `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`:\n ```json\n {\n \"passed\": = passThreshold, false otherwise>,\n \"scores\": {\n \"criterion-name\": ,\n ...\n },\n \"feedback\": [\n {\n \"criterion\": \"criterion-name\",\n \"score\": <1-10>,\n \"details\": \"Specific findings. Include file paths, line numbers, exact error messages, curl commands that failed.\"\n }\n ],\n \"overallSummary\": \"What worked, what didn't, what the generator must fix.\"\n }\n ```\n\n **Determine pass/fail** — `passed` is `true` ONLY if every single score >= `passThreshold`.\n\n **Update state.json based on result:**\n\n **If PASSED (all criteria >= threshold):**\n - Add current sprint number to `completedSprints` array\n - If `sprint` < `totalSprints`: set `\"phase\": \"negotiating\"`, increment `\"sprint\"` by 1, set `\"retry\": 0`\n - If `sprint` == `totalSprints`: set `\"phase\": \"complete\"`, set `\"status\": \"complete\"`\n\n **If FAILED:**\n - If `retry` < `maxRetries`: set `\"phase\": \"building\"`, increment `\"retry\"` by 1\n - If `retry` >= `maxRetries`: set `\"phase\": \"failed\"`, set `\"status\": \"failed\"`\n\n **IMPORTANT**: Kill all background processes before finishing:\n ```bash\n pkill -f \"node|bun|python|npm|next|vite|webpack\" 2>/dev/null || true\n ```\n\n ---\n\n ## COMPLETION\n\n After updating state.json, check the `status` field:\n - If `\"status\": \"complete\"` → all sprints passed! Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"failed\"` → sprint failed after max retries. Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"running\"` → more work to do. Do NOT output any completion signal.\n\n until: ALL_SPRINTS_COMPLETE\n max_iterations: 60\n fresh_context: true\n until_bash: |\n grep -qE '\"status\"\\s*:\\s*\"(complete|failed)\"' \"$ARTIFACTS_DIR/state.json\"\n\n # ─── Phase 4: Report ─────────────────────────────────────────────────\n - id: report\n depends_on: [adversarial-sprint]\n trigger_rule: all_done\n context: fresh\n model: haiku\n prompt: |\n You are a project reporter. Generate a comprehensive summary of the adversarial development run.\n\n ## Read ALL of these files:\n 1. `$ARTIFACTS_DIR/state.json` — final state (tells you success/failure, sprint count)\n 2. `$ARTIFACTS_DIR/spec.md` — the original product spec\n 3. All files in `$ARTIFACTS_DIR/contracts/` — sprint contracts (use Glob to find them)\n 4. All files in `$ARTIFACTS_DIR/feedback/` — evaluation results (use Glob to find them)\n\n ## Generate a report covering:\n\n ### Build Summary\n - What application was built (from the spec)\n - Final status: did all sprints pass or did it fail? On which sprint?\n - Total sprints completed vs planned\n\n ### Per-Sprint Breakdown\n For each sprint that was attempted:\n - What the contract required (features + key criteria)\n - How many attempts were needed (retry count)\n - Final scores for each criterion\n - Key feedback that drove retries and improvements\n\n ### Quality Metrics\n - Average score across all final-round criteria\n - Which criteria required the most retries\n - Where the adversarial evaluator pushed quality the highest\n\n ### How to Run\n - The application code lives in: `$ARTIFACTS_DIR/app/`\n - Include the tech stack and how to start the app (from the spec)\n - Include any setup steps (install deps, env vars, etc.)\n\n Write this report to `$ARTIFACTS_DIR/report.md` AND output it as your response so the user\n sees it directly.\n allowed_tools: [Read, Write, Glob, Grep]\n", - "archon-architect": "name: archon-architect\ndescription: |\n Use when: User wants an architectural sweep, complexity reduction, or codebase health improvement.\n Triggers: \"architect\", \"simplify codebase\", \"reduce complexity\", \"architectural sweep\",\n \"clean up architecture\", \"codebase health\", \"fix architecture\".\n Does: Scans codebase metrics -> analyzes architecture with principled lens -> plans targeted\n simplifications -> executes fixes with self-review loops (hooks) -> validates -> creates PR.\n NOT for: Single-file fixes, feature development, bug fixes, PR reviews.\n\n DAG workflow showcasing per-node hooks:\n - PostToolUse hooks create organic quality loops (lint after write, self-review)\n - PreToolUse hooks inject architectural principles before changes\n - Different nodes have different trust levels and steering\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: MEASURE\n # Gather raw metrics — file sizes, complexity hotspots, dependency fan-out\n # ═══════════════════════════════════════════════════════════════\n\n - id: scan-metrics\n bash: |\n echo \"=== FILE SIZE HOTSPOTS (top 30 largest source files) ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' \\\n -exec wc -l {} + 2>/dev/null | sort -rn | head -30\n\n echo \"\"\n echo \"=== IMPORT FAN-OUT (files with most imports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*'); do\n count=$(grep -c \"^import \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 8 ]; then\n echo \"$count imports: $f\"\n fi\n done | sort -rn | head -20\n\n echo \"\"\n echo \"=== EXPORT FAN-OUT (files with most exports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*'); do\n count=$(grep -c \"^export \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count exports: $f\"\n fi\n done | sort -rn | head -20\n\n echo \"\"\n echo \"=== FUNCTION LENGTH HOTSPOTS (functions over 50 lines) ===\"\n grep -rn \"^\\(export \\)\\?\\(async \\)\\?function \\|=> {$\" \\\n --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null \\\n | head -30\n\n echo \"\"\n echo \"=== TYPE SAFETY GAPS ===\"\n echo \"any usage:\"\n grep -rn \": any\\b\\|as any\\b\" --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | wc -l\n echo \"eslint-disable comments:\"\n grep -rn \"eslint-disable\" --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | wc -l\n timeout: 60000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: ANALYZE\n # Read through hotspots with an architectural lens\n # Hooks inject assessment criteria after every file read\n # ═══════════════════════════════════════════════════════════════\n\n - id: analyze\n prompt: |\n You are a senior software architect performing a codebase health assessment.\n\n ## Codebase Metrics\n\n $scan-metrics.output\n\n ## User Focus\n\n $ARGUMENTS\n\n ## Instructions\n\n 1. Read the top 10-15 files flagged by the metrics above (largest, most imports, most exports)\n 2. For each file, assess the criteria injected after you read it (you'll see them)\n 3. Build a running list of architectural concerns\n 4. Focus on:\n - Modules doing too many things (SRP violations)\n - Abstractions that don't earn their complexity\n - Duplicated patterns that should be consolidated (Rule of Three)\n - God files or god functions\n - Leaky abstractions or tight coupling between layers\n - Dead code or unused exports\n 5. Do NOT suggest changes yet — only diagnose\n\n ## Output\n\n Write a structured assessment to $ARTIFACTS_DIR/architecture-assessment.md with:\n - Executive summary (3-5 sentences)\n - Top findings ranked by impact\n - For each finding: file, what's wrong, why it matters, estimated effort\n depends_on: [scan-metrics]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n hooks:\n PostToolUse:\n - matcher: \"Read\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n For the file you just read, assess:\n (1) Single responsibility — does this module do exactly one thing?\n (2) Cognitive load — could a new team member understand this in 5 minutes?\n (3) Abstraction value — does every abstraction earn its complexity, or is it premature?\n (4) Dependency direction — does this file depend on things at its own level or below, not above?\n Add any concerns to your running list. Be specific — cite line ranges and function names.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: PLAN\n # Prioritize and scope the changes — pure reasoning, no tools\n # ═══════════════════════════════════════════════════════════════\n\n - id: plan\n prompt: |\n You are planning targeted architectural improvements.\n\n ## Assessment\n\n $analyze.output\n\n ## Principles\n\n - KISS: prefer straightforward over clever\n - YAGNI: remove speculative abstractions\n - Rule of Three: only extract when a pattern appears 3+ times\n - Each change must be independently revertable\n - Do NOT mix refactoring with behavior changes\n - Scope to what can be done safely in one pass (max 5-7 files)\n\n ## Instructions\n\n 1. From the assessment, select the top 3-5 highest-impact, lowest-risk improvements\n 2. For each, write a precise plan: which file, what to change, why\n 3. Order them so each change is independent (no cascading dependencies between changes)\n 4. Estimate blast radius — how many other files are affected\n\n ## Output\n\n Write the plan as a numbered list. Be specific about exactly what code to change.\n Keep it concise — the implement node will follow this literally.\n depends_on: [analyze]\n allowed_tools: [Read]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: EXECUTE\n # Make the changes with hooks creating quality feedback loops\n # ═══════════════════════════════════════════════════════════════\n\n - id: simplify\n prompt: |\n You are implementing targeted architectural simplifications.\n\n ## Plan\n\n $plan.output\n\n ## Rules\n\n - Follow the plan exactly — do not add extra improvements you notice along the way\n - Each change must preserve existing behavior (refactor only, no feature changes)\n - After each file edit, you'll be prompted to validate — follow those instructions\n - If a change turns out to be harder than expected, skip it and move on\n - Commit each logical change separately with a clear commit message\n\n ## Instructions\n\n 1. Work through the plan items in order\n 2. For each item: read the file, make the change, follow the post-edit checklist\n 3. After all changes, do a final `git diff --stat` to verify scope\n depends_on: [plan]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n Before writing: Is this file in your plan? If not, explain why you're\n touching it. Check how many files import from this module — changes to\n widely-imported modules need extra scrutiny.\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just modified a file. Do these things NOW before moving on:\n 1. Run the type checker to verify your change compiles\n 2. Re-read the file you changed — is it ACTUALLY simpler, or did you just move complexity around?\n 3. State in ONE sentence why this change reduces complexity. If you cannot justify it, revert it.\n - matcher: \"Read\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Before modifying this file, consider: will your change reduce or increase\n the number of concepts a reader needs to hold in their head?\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Check the exit code. If the command failed, diagnose the root cause\n before attempting a fix. Do not blindly retry.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE\n # Run full validation suite — bash only, cannot edit to \"fix\" failures\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n bash: |\n echo \"=== TYPE CHECK ===\"\n bun run type-check 2>&1\n TC_EXIT=$?\n\n echo \"\"\n echo \"=== LINT ===\"\n bun run lint 2>&1\n LINT_EXIT=$?\n\n echo \"\"\n echo \"=== TESTS ===\"\n bun run test 2>&1\n TEST_EXIT=$?\n\n echo \"\"\n echo \"=== RESULTS ===\"\n echo \"Type check: $([ $TC_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Lint: $([ $LINT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Tests: $([ $TEST_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n\n # Always exit 0 so downstream nodes can read output and decide\n if [ $TC_EXIT -eq 0 ] && [ $LINT_EXIT -eq 0 ] && [ $TEST_EXIT -eq 0 ]; then\n echo \"VALIDATION_STATUS: PASS\"\n else\n echo \"VALIDATION_STATUS: FAIL\"\n fi\n depends_on: [simplify]\n timeout: 300000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: FIX VALIDATION FAILURES (if any)\n # Only runs if validate failed — focused fix with same quality hooks\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-failures\n prompt: |\n Review the validation output below.\n\n ## Validation Output\n\n $validate.output\n\n ## Instructions\n\n If the output ends with \"VALIDATION_STATUS: PASS\", respond with\n \"All checks passed — no fixes needed.\" and stop.\n\n If there are failures:\n\n 1. Read the validation failures carefully\n 2. Fix ONLY what's broken — do not make additional improvements\n 3. If a fix requires changing behavior (not just fixing a type/lint error),\n revert the original change instead\n 4. Run the specific failing check after each fix to confirm it passes\n 5. After all fixes, run the full validation suite: `bun run validate`\n depends_on: [validate]\n context: fresh\n hooks:\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just made a fix. Run the specific failing validation check NOW\n to verify your fix works. Do not batch fixes — verify each one.\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n You are fixing validation failures only. Do not make any changes\n beyond what's needed to pass the failing checks. If in doubt, revert\n the original change that caused the failure.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: CREATE PR\n # Hooks ensure this node only does git operations\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a pull request for the architectural improvements.\n\n ## Context\n\n - Architecture assessment: $analyze.output\n - Plan: $plan.output\n - Validation: $validate.output\n\n ## Instructions\n\n 1. Stage all changes and create a single commit (or verify existing commits)\n 2. Push the branch: `git push -u origin HEAD`\n 3. Check if a PR already exists: `gh pr list --head $(git branch --show-current)`\n 4. Create the PR with:\n - Title: concise description of what was simplified (under 70 chars)\n - Body: use the format below\n 5. Save the PR URL to `$ARTIFACTS_DIR/.pr-url`\n\n ## PR Body Format\n\n ```markdown\n ## Architectural Sweep\n\n **Focus**: $ARGUMENTS\n\n ### Assessment\n\n [3-5 sentence summary from the architecture assessment]\n\n ### Changes\n\n [For each change: what file, what was simplified, why]\n\n ### Validation\n\n - [x] Type check passes\n - [x] Lint passes\n - [x] Tests pass\n - [x] Each change preserves existing behavior\n ```\n depends_on: [fix-failures]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n permissionDecision: deny\n permissionDecisionReason: \"PR creation node — do not modify source files. Use only git and gh commands.\"\n PostToolUse:\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Verify this command succeeded. If git push or gh pr create failed,\n read the error message carefully before retrying.\n", + "archon-architect": "name: archon-architect\ndescription: |\n Use when: User wants an architectural sweep, complexity reduction, or codebase health improvement.\n Triggers: \"architect\", \"simplify codebase\", \"reduce complexity\", \"architectural sweep\",\n \"clean up architecture\", \"codebase health\", \"fix architecture\".\n Does: Scans codebase metrics -> analyzes architecture with principled lens -> plans targeted\n simplifications -> executes fixes with self-review loops (hooks) -> validates -> creates PR.\n NOT for: Single-file fixes, feature development, bug fixes, PR reviews.\n\n DAG workflow showcasing per-node hooks:\n - PostToolUse hooks create organic quality loops (lint after write, self-review)\n - PreToolUse hooks inject architectural principles before changes\n - Different nodes have different trust levels and steering\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: MEASURE\n # Gather raw metrics — file sizes, complexity hotspots, dependency fan-out\n # ═══════════════════════════════════════════════════════════════\n\n - id: scan-metrics\n bash: |\n echo \"=== FILE SIZE HOTSPOTS (top 30 largest source files) ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' \\\n -exec wc -l {} + 2>/dev/null | sort -rn | head -30\n\n echo \"\"\n echo \"=== IMPORT FAN-OUT (files with most imports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*'); do\n count=$(grep -c \"^import \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 8 ]; then\n echo \"$count imports: $f\"\n fi\n done | sort -rn | head -20\n\n echo \"\"\n echo \"=== EXPORT FAN-OUT (files with most exports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*'); do\n count=$(grep -c \"^export \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count exports: $f\"\n fi\n done | sort -rn | head -20\n\n echo \"\"\n echo \"=== FUNCTION LENGTH HOTSPOTS (functions over 50 lines) ===\"\n grep -rn \"^\\(export \\)\\?\\(async \\)\\?function \\|=> {$\" \\\n --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null \\\n | head -30\n\n echo \"\"\n echo \"=== TYPE SAFETY GAPS ===\"\n echo \"any usage:\"\n grep -rn \": any\\b\\|as any\\b\" --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | wc -l\n echo \"eslint-disable comments:\"\n grep -rn \"eslint-disable\" --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | wc -l\n timeout: 60000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: ANALYZE\n # Read through hotspots with an architectural lens\n # Hooks inject assessment criteria after every file read\n # ═══════════════════════════════════════════════════════════════\n\n - id: analyze\n prompt: |\n You are a senior software architect performing a codebase health assessment.\n\n ## Codebase Metrics\n\n $scan-metrics.output\n\n ## User Focus\n\n $ARGUMENTS\n\n ## Instructions\n\n 1. Read the top 10-15 files flagged by the metrics above (largest, most imports, most exports)\n 2. For each file, assess the criteria injected after you read it (you'll see them)\n 3. Build a running list of architectural concerns\n 4. Focus on:\n - Modules doing too many things (SRP violations)\n - Abstractions that don't earn their complexity\n - Duplicated patterns that should be consolidated (Rule of Three)\n - God files or god functions\n - Leaky abstractions or tight coupling between layers\n - Dead code or unused exports\n 5. Do NOT suggest changes yet — only diagnose\n\n ## Output\n\n Write a structured assessment to $ARTIFACTS_DIR/architecture-assessment.md with:\n - Executive summary (3-5 sentences)\n - Top findings ranked by impact\n - For each finding: file, what's wrong, why it matters, estimated effort\n depends_on: [scan-metrics]\n context: fresh\n denied_tools: [Edit, Bash]\n hooks:\n PostToolUse:\n - matcher: \"Read\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n For the file you just read, assess:\n (1) Single responsibility — does this module do exactly one thing?\n (2) Cognitive load — could a new team member understand this in 5 minutes?\n (3) Abstraction value — does every abstraction earn its complexity, or is it premature?\n (4) Dependency direction — does this file depend on things at its own level or below, not above?\n Add any concerns to your running list. Be specific — cite line ranges and function names.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: PLAN\n # Prioritize and scope the changes — pure reasoning, no tools\n # ═══════════════════════════════════════════════════════════════\n\n - id: plan\n prompt: |\n You are planning targeted architectural improvements.\n\n ## Assessment\n\n $analyze.output\n\n ## Principles\n\n - KISS: prefer straightforward over clever\n - YAGNI: remove speculative abstractions\n - Rule of Three: only extract when a pattern appears 3+ times\n - Each change must be independently revertable\n - Do NOT mix refactoring with behavior changes\n - Scope to what can be done safely in one pass (max 5-7 files)\n\n ## Instructions\n\n 1. From the assessment, select the top 3-5 highest-impact, lowest-risk improvements\n 2. For each, write a precise plan: which file, what to change, why\n 3. Order them so each change is independent (no cascading dependencies between changes)\n 4. Estimate blast radius — how many other files are affected\n\n ## Output\n\n Write the plan as a numbered list. Be specific about exactly what code to change.\n Keep it concise — the implement node will follow this literally.\n depends_on: [analyze]\n allowed_tools: [Read]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: EXECUTE\n # Make the changes with hooks creating quality feedback loops\n # ═══════════════════════════════════════════════════════════════\n\n - id: simplify\n prompt: |\n You are implementing targeted architectural simplifications.\n\n ## Plan\n\n $plan.output\n\n ## Rules\n\n - Follow the plan exactly — do not add extra improvements you notice along the way\n - Each change must preserve existing behavior (refactor only, no feature changes)\n - After each file edit, you'll be prompted to validate — follow those instructions\n - If a change turns out to be harder than expected, skip it and move on\n - Commit each logical change separately with a clear commit message\n\n ## Instructions\n\n 1. Work through the plan items in order\n 2. For each item: read the file, make the change, follow the post-edit checklist\n 3. After all changes, do a final `git diff --stat` to verify scope\n depends_on: [plan]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n Before writing: Is this file in your plan? If not, explain why you're\n touching it. Check how many files import from this module — changes to\n widely-imported modules need extra scrutiny.\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just modified a file. Do these things NOW before moving on:\n 1. Run the type checker to verify your change compiles\n 2. Re-read the file you changed — is it ACTUALLY simpler, or did you just move complexity around?\n 3. State in ONE sentence why this change reduces complexity. If you cannot justify it, revert it.\n - matcher: \"Read\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Before modifying this file, consider: will your change reduce or increase\n the number of concepts a reader needs to hold in their head?\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Check the exit code. If the command failed, diagnose the root cause\n before attempting a fix. Do not blindly retry.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE\n # Run full validation suite — bash only, cannot edit to \"fix\" failures\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n bash: |\n echo \"=== TYPE CHECK ===\"\n bun run type-check 2>&1\n TC_EXIT=$?\n\n echo \"\"\n echo \"=== LINT ===\"\n bun run lint 2>&1\n LINT_EXIT=$?\n\n echo \"\"\n echo \"=== TESTS ===\"\n bun run test 2>&1\n TEST_EXIT=$?\n\n echo \"\"\n echo \"=== RESULTS ===\"\n echo \"Type check: $([ $TC_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Lint: $([ $LINT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Tests: $([ $TEST_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n\n # Always exit 0 so downstream nodes can read output and decide\n if [ $TC_EXIT -eq 0 ] && [ $LINT_EXIT -eq 0 ] && [ $TEST_EXIT -eq 0 ]; then\n echo \"VALIDATION_STATUS: PASS\"\n else\n echo \"VALIDATION_STATUS: FAIL\"\n fi\n depends_on: [simplify]\n timeout: 300000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: FIX VALIDATION FAILURES (if any)\n # Only runs if validate failed — focused fix with same quality hooks\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-failures\n prompt: |\n Review the validation output below.\n\n ## Validation Output\n\n $validate.output\n\n ## Instructions\n\n If the output ends with \"VALIDATION_STATUS: PASS\", respond with\n \"All checks passed — no fixes needed.\" and stop.\n\n If there are failures:\n\n 1. Read the validation failures carefully\n 2. Fix ONLY what's broken — do not make additional improvements\n 3. If a fix requires changing behavior (not just fixing a type/lint error),\n revert the original change instead\n 4. Run the specific failing check after each fix to confirm it passes\n 5. After all fixes, run the full validation suite: `bun run validate`\n depends_on: [validate]\n context: fresh\n hooks:\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just made a fix. Run the specific failing validation check NOW\n to verify your fix works. Do not batch fixes — verify each one.\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n You are fixing validation failures only. Do not make any changes\n beyond what's needed to pass the failing checks. If in doubt, revert\n the original change that caused the failure.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: CREATE PR\n # Hooks ensure this node only does git operations\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a pull request for the architectural improvements.\n\n ## Context\n\n - Architecture assessment: $analyze.output\n - Plan: $plan.output\n - Validation: $validate.output\n\n ## Instructions\n\n 1. Stage all changes and create a single commit (or verify existing commits)\n 2. Push the branch: `git push -u origin HEAD`\n 3. Check if a PR already exists: `gh pr list --head $(git branch --show-current)`\n 4. Create the PR with:\n - Title: concise description of what was simplified (under 70 chars)\n - Body: use the format below\n 5. Save the PR URL to `$ARTIFACTS_DIR/.pr-url`\n\n ## PR Body Format\n\n ```markdown\n ## Architectural Sweep\n\n **Focus**: $ARGUMENTS\n\n ### Assessment\n\n [3-5 sentence summary from the architecture assessment]\n\n ### Changes\n\n [For each change: what file, what was simplified, why]\n\n ### Validation\n\n - [x] Type check passes\n - [x] Lint passes\n - [x] Tests pass\n - [x] Each change preserves existing behavior\n ```\n depends_on: [fix-failures]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n permissionDecision: deny\n permissionDecisionReason: \"PR creation node — do not modify source files. Use only git and gh commands.\"\n PostToolUse:\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Verify this command succeeded. If git push or gh pr create failed,\n read the error message carefully before retrying.\n", "archon-assist": "name: archon-assist\ndescription: |\n Use when: No other workflow matches the request.\n Handles: Questions, debugging, exploration, one-off tasks, explanations, CI failures, general help.\n Capability: Full Claude Code agent with all tools available.\n Note: Will inform user when assist mode is used for tracking.\n\nnodes:\n - id: assist\n command: archon-assist\n", "archon-comprehensive-pr-review": "name: archon-comprehensive-pr-review\ndescription: |\n Use when: User wants a comprehensive code review of a pull request with automatic fixes.\n Triggers: \"review this PR\", \"review PR #123\", \"comprehensive review\", \"full PR review\",\n \"review and fix\", \"check this PR\", \"code review\".\n Does: Syncs PR with main (rebase if needed) -> runs 5 specialized review agents in parallel ->\n synthesizes findings -> auto-fixes CRITICAL/HIGH issues -> reports remaining issues.\n NOT for: Quick questions about a PR, checking CI status, simple \"what changed\" queries.\n\n This workflow produces artifacts in $ARTIFACTS_DIR/../reviews/pr-{number}/ and posts\n a comprehensive review comment to the GitHub PR.\n\nnodes:\n - id: scope\n command: archon-pr-review-scope\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [scope]\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n", "archon-create-issue": "name: archon-create-issue\ndescription: |\n Use when: User wants to report a bug or problem as a GitHub issue with automated reproduction.\n Triggers: \"create issue\", \"file a bug\", \"report this bug\", \"open an issue for\",\n \"create github issue\", \"report issue\", \"log this bug\".\n Does: Classifies problem area (haiku) -> gathers context in parallel (templates, git state, duplicates) ->\n investigates relevant code -> reproduces the issue using area-specific tools (agent-browser, CLI, DB queries) ->\n gates on reproduction success -> creates issue with full evidence OR reports back if cannot reproduce.\n NOT for: Feature requests, enhancements, or non-bug work. Only for bugs/problems.\n\n Reproduction gating: If the issue cannot be reproduced, the workflow does NOT create an issue.\n Instead, it reports what was tried and suggests next steps to the user.\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: CLASSIFY — Haiku classification of user's problem\n # ═══════════════════════════════════════════════════════════════\n\n - id: classify\n prompt: |\n You are a problem classifier for the Archon codebase. Analyze the user's\n description and determine the issue type and which area of the system is affected.\n\n ## User's Description\n $ARGUMENTS\n\n ## Area Definitions\n | Area | Packages | Indicators |\n |------|----------|------------|\n | web-ui | @archon/web, @archon/server (routes, web adapter) | UI rendering, SSE streaming, React components, browser behavior |\n | api-server | @archon/server (routes, middleware) | HTTP endpoints, response codes, request handling |\n | cli | @archon/cli | CLI commands, workflow invocation from terminal, output formatting |\n | isolation | @archon/isolation, @archon/git | Worktrees, branch operations, cleanup, environment lifecycle |\n | workflows | @archon/workflows | YAML parsing, DAG execution, variable substitution, node types |\n | database | @archon/core (db/) | SQLite/PostgreSQL queries, schema, data integrity, migrations |\n | adapters | @archon/adapters | Slack/Telegram/GitHub/Discord message handling, auth, polling |\n | core | @archon/core (orchestrator, handlers, clients) | Message routing, session management, AI client streaming |\n | other | Any package not covered above | Cross-cutting concerns, build tooling, config, unknown area |\n\n ## Classification Rules\n - Choose the MOST SPECIFIC area. \"SSE disconnects\" = web-ui (not api-server).\n - If ambiguous between two areas, pick the one closer to the user-facing symptom.\n - Use \"other\" only when the problem genuinely doesn't fit any specific area.\n - needs_server: Set to \"true\" if reproducing requires a running Archon server.\n Typically true for: web-ui, api-server, core, adapters.\n Typically false for: cli, isolation, workflows, database.\n For \"other\": use your judgment based on the description.\n - repro_hint: Extract the user's reproduction steps into a concise instruction.\n If no explicit steps given, infer the most likely way to trigger the issue.\n\n Provide reasoning for your classification.\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n type:\n type: string\n enum: [\"bug\", \"regression\", \"crash\", \"performance\", \"configuration\"]\n area:\n type: string\n enum: [\"web-ui\", \"api-server\", \"cli\", \"isolation\", \"workflows\", \"database\", \"adapters\", \"core\", \"other\"]\n title:\n type: string\n keywords:\n type: string\n repro_hint:\n type: string\n needs_server:\n type: string\n enum: [\"true\", \"false\"]\n required: [type, area, title, keywords, repro_hint, needs_server]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: PARALLEL CONTEXT GATHERING\n # ═══════════════════════════════════════════════════════════════\n\n - id: fetch-template\n bash: |\n # Search for GitHub issue templates in standard locations\n TEMPLATES_FOUND=0\n\n # Check for issue template directory (YAML-based templates)\n if [ -d \".github/ISSUE_TEMPLATE\" ]; then\n echo \"=== Issue Templates Found ===\"\n for f in .github/ISSUE_TEMPLATE/*.md .github/ISSUE_TEMPLATE/*.yaml .github/ISSUE_TEMPLATE/*.yml; do\n if [ -f \"$f\" ]; then\n TEMPLATES_FOUND=$((TEMPLATES_FOUND + 1))\n echo \"--- Template: $f ---\"\n cat \"$f\"\n echo \"\"\n fi\n done\n fi\n\n # Check for single issue template\n for f in .github/ISSUE_TEMPLATE.md docs/ISSUE_TEMPLATE.md; do\n if [ -f \"$f\" ]; then\n TEMPLATES_FOUND=$((TEMPLATES_FOUND + 1))\n echo \"--- Template: $f ---\"\n cat \"$f\"\n fi\n done\n\n if [ \"$TEMPLATES_FOUND\" -eq 0 ]; then\n echo \"No issue templates found — will use standard format\"\n fi\n depends_on: [classify]\n\n - id: git-context\n bash: |\n echo \"=== Branch ===\"\n git branch --show-current\n\n echo \"=== Recent Commits (last 15) ===\"\n git log --oneline -15\n\n echo \"=== Working Tree Status ===\"\n git status --short\n\n echo \"=== Modified Files (last 3 commits) ===\"\n git diff --name-only HEAD~3..HEAD 2>/dev/null || echo \"(fewer than 3 commits)\"\n\n echo \"=== Environment ===\"\n echo \"Node: $(node --version 2>/dev/null || echo 'N/A')\"\n echo \"Bun: $(bun --version 2>/dev/null || echo 'N/A')\"\n echo \"OS: $(uname -s 2>/dev/null || echo 'Windows') $(uname -r 2>/dev/null || ver 2>/dev/null || echo '')\"\n echo \"Platform: $(uname -m 2>/dev/null || echo 'unknown')\"\n depends_on: [classify]\n\n - id: dedup-check\n bash: |\n KEYWORDS=$classify.output.keywords\n echo \"=== Searching for duplicates: $KEYWORDS ===\"\n\n echo \"--- Open Issues ---\"\n gh issue list --search \"$KEYWORDS\" --state open --limit 5 --json number,title,url,labels 2>/dev/null || echo \"No open matches\"\n\n echo \"--- Recently Closed ---\"\n gh issue list --search \"$KEYWORDS\" --state closed --limit 3 --json number,title,url,labels 2>/dev/null || echo \"No closed matches\"\n depends_on: [classify]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: INVESTIGATE — Search codebase for related code\n # ═══════════════════════════════════════════════════════════════\n\n - id: investigate\n prompt: |\n You are a codebase investigator. Search for code related to the reported problem.\n\n ## Problem\n - **Area**: $classify.output.area\n - **Type**: $classify.output.type\n - **Title**: $classify.output.title\n - **Reproduction hint**: $classify.output.repro_hint\n\n ## Git Context\n $git-context.output\n\n ## Instructions\n\n 1. Based on the area, search the relevant packages:\n - web-ui: `packages/web/src/`, `packages/server/src/adapters/web/`, `packages/server/src/routes/`\n - api-server: `packages/server/src/routes/`, `packages/server/src/`\n - cli: `packages/cli/src/`\n - isolation: `packages/isolation/src/`, `packages/git/src/`\n - workflows: `packages/workflows/src/`\n - database: `packages/core/src/db/`\n - adapters: `packages/adapters/src/`\n - core: `packages/core/src/orchestrator/`, `packages/core/src/handlers/`\n - other: search broadly based on keywords — check `packages/*/src/`, config files, build scripts\n\n 2. Find: entry points, error handling paths, related type definitions, recent changes\n to the affected area (check git log for the specific files).\n\n 3. Write your findings to `$ARTIFACTS_DIR/issue-context.md` with this structure:\n ```\n # Codebase Investigation\n ## Relevant Files\n - `file:line` — description of what's there\n ## Error Handling\n - How errors are currently handled in this area\n ## Recent Changes\n - Any recent commits touching this code\n ## Suspected Root Cause\n - Based on code analysis, where the bug likely is\n ```\n\n Be thorough but focused. Only include files directly relevant to the reported problem.\n depends_on: [classify, git-context]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: REPRODUCE — Area-specific issue reproduction\n # ═══════════════════════════════════════════════════════════════\n\n - id: start-server\n bash: |\n # Allocate a free port using Bun's OS assignment\n PORT=$(bun -e \"const s = Bun.serve({port: 0, fetch: () => new Response('')}); console.log(s.port); s.stop()\")\n echo \"$PORT\" > \"$ARTIFACTS_DIR/.server-port\"\n\n # Start dev server in background\n PORT=$PORT bun run dev:server > \"$ARTIFACTS_DIR/.server-log\" 2>&1 &\n SERVER_PID=$!\n echo \"$SERVER_PID\" > \"$ARTIFACTS_DIR/.server-pid\"\n\n # Wait for server to be ready (up to 30s)\n for i in $(seq 1 30); do\n if curl -s \"http://localhost:$PORT/api/health\" > /dev/null 2>&1; then\n echo \"Server ready on port $PORT (PID: $SERVER_PID)\"\n exit 0\n fi\n sleep 1\n done\n\n echo \"WARNING: Server may not be fully ready after 30s (port $PORT, PID $SERVER_PID)\"\n echo \"Continuing anyway — reproduce node will handle connection errors\"\n depends_on: [classify]\n when: \"$classify.output.needs_server == 'true'\"\n timeout: 45000\n\n - id: reproduce\n prompt: |\n You are an issue reproduction specialist. Your job is to reproduce the reported\n problem and capture evidence (screenshots, command output, error messages).\n\n ## Problem Context\n - **Area**: $classify.output.area\n - **Type**: $classify.output.type\n - **Title**: $classify.output.title\n - **Reproduction hint**: $classify.output.repro_hint\n\n ## Investigation Findings\n $investigate.output\n\n ## Server Info\n If a server was started, read the port from: `cat \"$ARTIFACTS_DIR/.server-port\"`\n If the file doesn't exist, no server is running (area doesn't need one).\n\n ---\n\n ## Reproduction Playbooks\n\n Follow the playbook matching the area. Capture ALL evidence to `$ARTIFACTS_DIR/`.\n\n ### web-ui\n 1. Read the server port: `PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" | tr -d '\\n')`\n 2. Open the app: `agent-browser open http://localhost:$PORT`\n 3. Take a baseline screenshot: `agent-browser screenshot \"$ARTIFACTS_DIR/repro-01-baseline.png\"`\n 4. Get interactive elements: `agent-browser snapshot -i`\n 5. Navigate to the area related to the issue (use @refs from snapshot)\n 6. Perform the actions described in the repro_hint\n 7. Screenshot each significant state: `agent-browser screenshot \"$ARTIFACTS_DIR/repro-02-action.png\"`\n 8. If an error appears, capture it: `agent-browser get text @errorElement`\n 9. Check browser console: `agent-browser console`\n 10. Check for JS errors: `agent-browser errors`\n 11. Final screenshot: `agent-browser screenshot \"$ARTIFACTS_DIR/repro-03-result.png\"`\n 12. Close browser: `agent-browser close`\n\n ### api-server\n 1. Read the server port: `PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" | tr -d '\\n')`\n 2. Create a test conversation: `curl -s -X POST http://localhost:$PORT/api/conversations -H \"Content-Type: application/json\" -d '{}'`\n 3. Hit the problematic endpoint based on the repro_hint\n 4. Capture response codes and bodies: `curl -s -w \"\\nHTTP_CODE: %{http_code}\\n\" ...`\n 5. For SSE issues: `curl -s -N http://localhost:$PORT/api/stream/` (timeout after 10s)\n 6. Check server logs: `cat \"$ARTIFACTS_DIR/.server-log\" | tail -50`\n 7. Save all curl output to `$ARTIFACTS_DIR/repro-api-responses.txt`\n\n ### cli\n 1. Run the CLI command that should trigger the issue\n 2. Capture stdout and stderr separately:\n `bun run cli > \"$ARTIFACTS_DIR/repro-cli-stdout.txt\" 2> \"$ARTIFACTS_DIR/repro-cli-stderr.txt\"; echo \"EXIT_CODE: $?\" >> \"$ARTIFACTS_DIR/repro-cli-stdout.txt\"`\n 3. If workflow-related: `bun run cli workflow list --json > \"$ARTIFACTS_DIR/repro-workflow-list.json\" 2>&1`\n 4. If the command hangs, use timeout: `timeout 30 bun run cli `\n 5. Check for error messages in output\n\n ### isolation\n 1. Check current state: `bun run cli isolation list > \"$ARTIFACTS_DIR/repro-isolation-list.txt\" 2>&1`\n 2. Check git worktrees: `git worktree list > \"$ARTIFACTS_DIR/repro-worktree-list.txt\"`\n 3. Check branches: `git branch -a > \"$ARTIFACTS_DIR/repro-branches.txt\"`\n 4. Try the operation that should fail (based on repro_hint)\n 5. Capture the error output\n 6. Query isolation DB: `sqlite3 ~/.archon/archon.db \"SELECT * FROM remote_agent_isolation_environments ORDER BY created_at DESC LIMIT 10\" > \"$ARTIFACTS_DIR/repro-isolation-db.txt\" 2>&1`\n\n ### workflows\n 1. List workflows: `bun run cli workflow list --json > \"$ARTIFACTS_DIR/repro-workflow-list.json\" 2>&1`\n 2. If a specific workflow is mentioned, try running it:\n `bun run cli workflow run --no-worktree \"test input\" > \"$ARTIFACTS_DIR/repro-workflow-run.txt\" 2>&1`\n 3. If YAML parsing is the issue, try loading the definition directly\n 4. Check for error messages in execution output\n\n ### database\n 1. Check DB exists: `ls -la ~/.archon/archon.db 2>/dev/null`\n 2. Run targeted queries against affected tables:\n - `sqlite3 ~/.archon/archon.db \".schema \" > \"$ARTIFACTS_DIR/repro-db-schema.txt\"`\n - `sqlite3 ~/.archon/archon.db \"SELECT COUNT(*) FROM
\" > \"$ARTIFACTS_DIR/repro-db-counts.txt\"`\n 3. Check for the specific data condition described in the repro_hint\n 4. If PostgreSQL: use `psql $DATABASE_URL -c \"...\"` instead\n\n ### adapters\n 1. Read the server port: `PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" | tr -d '\\n')`\n 2. Check adapter configuration: look for relevant env vars in `.env`\n 3. Check server startup logs: `cat \"$ARTIFACTS_DIR/.server-log\" | grep -i \"adapter\\|slack\\|telegram\\|github\\|discord\" | head -20`\n 4. If the adapter fails to initialize, capture the error\n 5. Test message routing via web API as a proxy:\n `curl -s -X POST http://localhost:$PORT/api/conversations//message -H \"Content-Type: application/json\" -d '{\"message\":\"/status\"}'`\n\n ### core\n 1. Read the server port: `PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" | tr -d '\\n')`\n 2. Create a conversation: `curl -s -X POST http://localhost:$PORT/api/conversations -H \"Content-Type: application/json\" -d '{}'`\n 3. Send a message that triggers the issue:\n `curl -s -X POST http://localhost:$PORT/api/conversations//message -H \"Content-Type: application/json\" -d '{\"message\":\"\"}'`\n 4. Poll for responses: `curl -s http://localhost:$PORT/api/conversations//messages`\n 5. Check session state in DB: `sqlite3 ~/.archon/archon.db \"SELECT * FROM remote_agent_sessions WHERE conversation_id=''\" 2>/dev/null`\n 6. Check server logs: `cat \"$ARTIFACTS_DIR/.server-log\" | tail -50`\n\n ### other\n 1. Run `bun run validate` to check for any obvious failures — capture output:\n `bun run validate > \"$ARTIFACTS_DIR/repro-validate.txt\" 2>&1; echo \"EXIT_CODE: $?\" >> \"$ARTIFACTS_DIR/repro-validate.txt\"`\n 2. Search the codebase for keywords from the repro_hint:\n - Use Grep/Glob to find related files\n - Check recent git log for relevant changes\n 3. If the description implies a build or config issue:\n - Check `package.json` scripts, `tsconfig.json`, `.env.example`\n - Try running the relevant build/dev command\n 4. If the description implies a runtime issue:\n - Start the server (if `.server-port` file exists) and try to trigger the behavior\n - Check logs for errors\n 5. Document everything you tried, even if nothing reproduces clearly\n\n ---\n\n ## Output\n\n After following the playbook, write your findings to `$ARTIFACTS_DIR/reproduction-results.md`:\n\n ```markdown\n # Reproduction Results\n\n ## Status: [REPRODUCED | NOT_REPRODUCED | PARTIAL]\n\n ## Steps Taken\n 1. [step]\n 2. [step]\n\n ## Expected Behavior\n [what should happen]\n\n ## Actual Behavior\n [what actually happened — or \"could not trigger the reported behavior\"]\n\n ## Evidence Files\n - `$ARTIFACTS_DIR/repro-*.png` — screenshots (if web-ui)\n - `$ARTIFACTS_DIR/repro-*.txt` — command output\n - `$ARTIFACTS_DIR/repro-*.json` — structured data\n\n ## Environment\n [OS, versions, relevant config]\n\n ## Notes\n [any additional observations, suspected root cause refinements]\n ```\n\n CRITICAL: The Status line MUST be exactly one of: REPRODUCED, NOT_REPRODUCED, PARTIAL.\n This value is read by a downstream bash node to decide whether to create the issue.\n\n Even if you cannot fully reproduce the issue, document what you tried\n and what you observed. Partial reproduction is still valuable evidence.\n depends_on: [classify, git-context, investigate, start-server]\n context: fresh\n skills:\n - agent-browser\n trigger_rule: one_success\n idle_timeout: 300000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: CLEANUP + GATE\n # ═══════════════════════════════════════════════════════════════\n\n - id: cleanup-server\n bash: |\n SERVER_PID=$(cat \"$ARTIFACTS_DIR/.server-pid\" 2>/dev/null | tr -d '\\n')\n SERVER_PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" 2>/dev/null | tr -d '\\n')\n\n if [ -z \"$SERVER_PID\" ]; then\n echo \"No server was started — skipping cleanup\"\n exit 0\n fi\n\n echo \"Cleaning up server PID $SERVER_PID on port $SERVER_PORT...\"\n\n # Kill by PID (cross-platform)\n kill \"$SERVER_PID\" 2>/dev/null || taskkill //F //T //PID \"$SERVER_PID\" 2>/dev/null || true\n\n # Kill by port (fallback)\n if [ -n \"$SERVER_PORT\" ]; then\n fuser -k \"$SERVER_PORT/tcp\" 2>/dev/null || true\n lsof -ti:\"$SERVER_PORT\" 2>/dev/null | xargs kill -9 2>/dev/null || true\n netstat -ano 2>/dev/null | grep \":$SERVER_PORT \" | grep LISTENING | awk '{print $5}' | sort -u | while read pid; do\n taskkill //F //T //PID \"$pid\" 2>/dev/null || true\n done\n fi\n\n # Close any agent-browser session\n agent-browser close 2>/dev/null || true\n\n sleep 1\n echo \"Cleanup complete\"\n depends_on: [reproduce]\n trigger_rule: all_done\n\n - id: check-reproduction\n bash: |\n # Read the reproduction status from the results file\n if [ ! -f \"$ARTIFACTS_DIR/reproduction-results.md\" ]; then\n echo \"NOT_REPRODUCED\"\n exit 0\n fi\n\n STATUS=$(grep -oE '(NOT_REPRODUCED|REPRODUCED|PARTIAL)' \"$ARTIFACTS_DIR/reproduction-results.md\" | head -1)\n\n if [ -z \"$STATUS\" ]; then\n echo \"NOT_REPRODUCED\"\n else\n echo \"$STATUS\"\n fi\n depends_on: [cleanup-server]\n trigger_rule: all_done\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: BRANCH ON REPRODUCTION RESULT\n # ═══════════════════════════════════════════════════════════════\n\n - id: report-failure\n prompt: |\n The issue could not be reproduced. Report this to the user with actionable detail.\n\n ## Problem Description\n - **Title**: $classify.output.title\n - **Area**: $classify.output.area\n - **Type**: $classify.output.type\n - **Reproduction hint**: $classify.output.repro_hint\n\n ## What Was Tried\n $reproduce.output\n\n ## Investigation Findings\n $investigate.output\n\n ## Instructions\n\n Report to the user clearly:\n\n 1. **State upfront**: \"Could not reproduce the reported issue. No GitHub issue was created.\"\n\n 2. **Summarize what was tried**: List the specific steps the reproduce node took,\n based on the area playbook. Be concrete — \"Started server on port X, navigated to Y,\n clicked Z — no error appeared.\"\n\n 3. **Share what was found**: Include relevant findings from the investigation\n (code references, recent changes, suspected areas).\n\n 4. **Suggest next steps**:\n - Ask the user to provide more specific reproduction steps\n - Mention any environment-specific factors that might matter\n (OS, browser, database state, specific data conditions)\n - If the investigation found suspicious code, mention it as a lead\n - Suggest running with debug logging: `LOG_LEVEL=debug bun run dev`\n\n 5. **Offer to retry**: \"If you can provide more specific steps, run the workflow\n again with those details.\"\n\n Do NOT create a GitHub issue. The purpose of this node is to communicate back to the\n user so they can provide better information or investigate manually.\n depends_on: [check-reproduction]\n when: \"$check-reproduction.output == 'NOT_REPRODUCED'\"\n context: fresh\n\n - id: draft-issue\n prompt: |\n You are a technical writer drafting a GitHub issue. Assemble all gathered\n context into a clear, well-structured issue body.\n\n ## Classification\n - **Type**: $classify.output.type\n - **Area**: $classify.output.area\n - **Title**: $classify.output.title\n\n ## Issue Template\n If templates were found, use the most appropriate one as the structure:\n $fetch-template.output\n\n ## Duplicate Check Results\n $dedup-check.output\n\n ## Codebase Investigation\n $investigate.output\n\n ## Reproduction Results\n $reproduce.output\n\n ## Instructions\n\n 1. **Check duplicates first**: If the dedup-check found a clearly matching open issue,\n note this prominently at the top. Still draft the issue but add a note suggesting\n it may be a duplicate of #XYZ.\n\n 2. **Use the template** if one was found for bug reports. Fill every section with real data.\n\n 3. **Structure** (if no template):\n ```markdown\n ## Description\n [Clear 1-2 sentence description]\n\n ## Steps to Reproduce\n [Numbered steps from reproduction results]\n\n ## Expected Behavior\n [What should happen]\n\n ## Actual Behavior\n [What actually happened, with evidence]\n\n ## Environment\n - OS: [from git-context]\n - Bun: [version]\n - Node: [version]\n - Branch: [current branch]\n\n ## Relevant Code\n [Key file:line references from investigation]\n\n ## Additional Context\n [Screenshots, logs, database state — reference artifact files]\n ```\n\n 4. **Include reproduction evidence**:\n - If REPRODUCED: include full steps and all evidence\n - If PARTIAL: include what was observed, note incomplete reproduction\n\n 5. **Suggest labels** based on classification:\n - Area label: `area: web`, `area: cli`, `area: workflows`, etc.\n - Type label: `bug`, `regression`, `performance`, etc.\n\n 6. Write the complete issue body to `$ARTIFACTS_DIR/issue-draft.md`\n\n 7. Write a one-line suggested title to `$ARTIFACTS_DIR/.issue-title`\n\n 8. Write suggested labels (comma-separated) to `$ARTIFACTS_DIR/.issue-labels`\n depends_on: [check-reproduction, fetch-template, dedup-check, investigate]\n when: \"$check-reproduction.output != 'NOT_REPRODUCED'\"\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: CREATE ISSUE\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-issue\n prompt: |\n Create the GitHub issue using the drafted content.\n\n ## Instructions\n\n 1. Read the draft: `cat \"$ARTIFACTS_DIR/issue-draft.md\"`\n 2. Read the title: `cat \"$ARTIFACTS_DIR/.issue-title\"`\n 3. Read suggested labels: `cat \"$ARTIFACTS_DIR/.issue-labels\"`\n\n 4. Check which labels actually exist in the repo:\n ```bash\n gh label list --json name -q '.[].name' | head -50\n ```\n Only use labels that exist. Skip any suggested label that doesn't match.\n\n 5. Create the issue:\n ```bash\n gh issue create \\\n --title \"$(cat \"$ARTIFACTS_DIR/.issue-title\")\" \\\n --body-file \"$ARTIFACTS_DIR/issue-draft.md\" \\\n --label \"label1,label2\"\n ```\n\n 6. Capture the result:\n ```bash\n ISSUE_URL=$(gh issue list --limit 1 --json url -q '.[0].url')\n echo \"$ISSUE_URL\" > \"$ARTIFACTS_DIR/.issue-url\"\n ```\n\n 7. Report to the user:\n - Issue URL\n - Title\n - Labels applied\n - Whether duplicates were found\n - Summary of reproduction results (reproduced/partial)\n depends_on: [draft-issue]\n context: fresh\n",