diff --git a/.archon/workflows/defaults/archon-adversarial-dev.yaml b/.archon/workflows/defaults/archon-adversarial-dev.yaml index 68722c8b1a..bea7117f4a 100644 --- a/.archon/workflows/defaults/archon-adversarial-dev.yaml +++ b/.archon/workflows/defaults/archon-adversarial-dev.yaml @@ -117,7 +117,7 @@ nodes: - id: adversarial-sprint depends_on: [init-workspace] idle_timeout: 600000 - model: claude-opus-4-6[1m] + model: opus[1m] loop: prompt: | # Adversarial Development — Sprint Loop diff --git a/.archon/workflows/defaults/archon-feature-development.yaml b/.archon/workflows/defaults/archon-feature-development.yaml index 6d0747700d..a2ab7da87d 100644 --- a/.archon/workflows/defaults/archon-feature-development.yaml +++ b/.archon/workflows/defaults/archon-feature-development.yaml @@ -8,7 +8,7 @@ description: | nodes: - id: implement command: archon-implement - model: claude-opus-4-6[1m] + model: opus[1m] - id: create-pr command: archon-create-pr diff --git a/.archon/workflows/defaults/archon-fix-github-issue.yaml b/.archon/workflows/defaults/archon-fix-github-issue.yaml index 12ad675de9..a6fd0d235c 100644 --- a/.archon/workflows/defaults/archon-fix-github-issue.yaml +++ b/.archon/workflows/defaults/archon-fix-github-issue.yaml @@ -133,7 +133,7 @@ nodes: command: archon-fix-issue depends_on: [bridge-artifacts] context: fresh - model: claude-opus-4-6[1m] + model: opus[1m] # ═══════════════════════════════════════════════════════════════ # PHASE 5: VALIDATE diff --git a/.archon/workflows/defaults/archon-idea-to-pr.yaml b/.archon/workflows/defaults/archon-idea-to-pr.yaml index 9329c55021..1c2fe738d3 100644 --- a/.archon/workflows/defaults/archon-idea-to-pr.yaml +++ b/.archon/workflows/defaults/archon-idea-to-pr.yaml @@ -52,7 +52,7 @@ nodes: command: archon-implement-tasks depends_on: [confirm-plan] context: fresh - model: claude-opus-4-6[1m] + model: opus[1m] # ═══════════════════════════════════════════════════════════════════ # PHASE 4: VALIDATE diff --git a/.archon/workflows/defaults/archon-piv-loop.yaml b/.archon/workflows/defaults/archon-piv-loop.yaml index 7227900c2f..c232762b89 100644 --- a/.archon/workflows/defaults/archon-piv-loop.yaml +++ b/.archon/workflows/defaults/archon-piv-loop.yaml @@ -415,7 +415,7 @@ nodes: - id: implement depends_on: [implement-setup] idle_timeout: 600000 - model: claude-opus-4-6[1m] + model: opus[1m] loop: prompt: | # PIV Loop — Implementation Agent diff --git a/.archon/workflows/defaults/archon-plan-to-pr.yaml b/.archon/workflows/defaults/archon-plan-to-pr.yaml index 067c1a818e..83dbbebd88 100644 --- a/.archon/workflows/defaults/archon-plan-to-pr.yaml +++ b/.archon/workflows/defaults/archon-plan-to-pr.yaml @@ -42,7 +42,7 @@ nodes: command: archon-implement-tasks depends_on: [confirm-plan] context: fresh - model: claude-opus-4-6[1m] + model: opus[1m] # ═══════════════════════════════════════════════════════════════════ # PHASE 4: VALIDATE diff --git a/.archon/workflows/defaults/archon-ralph-dag.yaml b/.archon/workflows/defaults/archon-ralph-dag.yaml index 5c0d7c9099..5482fd5a15 100644 --- a/.archon/workflows/defaults/archon-ralph-dag.yaml +++ b/.archon/workflows/defaults/archon-ralph-dag.yaml @@ -189,7 +189,7 @@ nodes: - id: implement depends_on: [validate-prd] idle_timeout: 600000 - model: claude-opus-4-6[1m] + model: opus[1m] loop: prompt: | # Ralph Agent — Autonomous Story Implementation diff --git a/.archon/workflows/defaults/archon-refactor-safely.yaml b/.archon/workflows/defaults/archon-refactor-safely.yaml index 56bc96ac36..81e4cb5f09 100644 --- a/.archon/workflows/defaults/archon-refactor-safely.yaml +++ b/.archon/workflows/defaults/archon-refactor-safely.yaml @@ -207,7 +207,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: execute-refactor - model: claude-opus-4-6[1m] + model: opus[1m] prompt: | You are executing a refactoring plan with strict safety guardrails. diff --git a/.archon/workflows/defaults/archon-workflow-builder.yaml b/.archon/workflows/defaults/archon-workflow-builder.yaml index a311b8d970..66ce915de1 100644 --- a/.archon/workflows/defaults/archon-workflow-builder.yaml +++ b/.archon/workflows/defaults/archon-workflow-builder.yaml @@ -61,7 +61,8 @@ nodes: 5. Whether this should be a simple DAG or include a loop node Be specific and concrete. Each proposed node should have a clear type - (bash, prompt, command, or loop) and a one-line description of what it does. + (bash, prompt, command, script, loop, or approval) and a one-line + description of what it does. model: haiku allowed_tools: [] output_format: @@ -115,7 +116,7 @@ nodes: nodes: - id: node-id-kebab-case - # Choose ONE of: prompt, bash, command, loop + # Choose ONE of: prompt, bash, command, script, loop, approval # --- prompt node (AI-executed) --- prompt: | @@ -131,6 +132,17 @@ nodes: # --- command node (references a .archon/commands/ file) --- command: command-name + # --- script node (TypeScript via bun, or Python via uv — no AI, stdout = $.output) --- + # Use for deterministic data transforms the shell would mangle (JSON parsing, etc.) + script: | + const raw = String.raw`$other-node.output`; + const data = JSON.parse(raw); + console.log(JSON.stringify({ count: data.items.length })); + runtime: bun # required: 'bun' (.ts/.js) or 'uv' (.py) + # deps: [requests] # uv only + # Or reference a named script in .archon/scripts/: + # script: extract-labels # no extension; bun resolves .ts/.js, uv resolves .py + # --- loop node (iterative AI execution) --- loop: prompt: | @@ -139,17 +151,22 @@ nodes: max_iterations: 10 fresh_context: true # optional: reset context each iteration + # --- approval node (human gate — pauses workflow) --- + approval: + message: "Review the plan above. Approve to continue." + # capture_response: true # store reviewer comment as $.output + # Common options for all node types: depends_on: [other-node-id] # DAG edges when: "$.output == 'value'" # conditional execution trigger_rule: all_success # all_success | one_success | all_done - timeout: 120000 # ms, for bash nodes + timeout: 120000 # ms, for bash and script nodes ``` ## Variable Reference - `$ARGUMENTS` — user's input text - `$ARTIFACTS_DIR` — pre-created directory for workflow artifacts - - `$.output` — stdout from a bash node or AI response from a prompt node + - `$.output` — stdout from a bash/script node or AI response from a prompt node - `$.output.field` — JSON field from a node with output_format - `$BASE_BRANCH` — base git branch @@ -158,12 +175,14 @@ nodes: 2. The `description:` MUST follow the "Use when / Triggers / Does / NOT for" pattern 3. Every node MUST have a unique kebab-case `id` 4. Use `depends_on` to define execution order - 5. Use `bash` nodes for deterministic operations (file checks, git commands, installs) - 6. Use `prompt` nodes for AI reasoning tasks - 7. Use `output_format` on prompt nodes when downstream nodes need structured data - 8. Use `allowed_tools: []` on classification/analysis nodes that don't need tools - 9. Use `denied_tools: [Edit, Bash]` when a node should only use Write (not edit existing files) - 10. Prefer `model: haiku` for simple classification tasks to save cost + 5. Use `bash` nodes for deterministic shell operations (file checks, git commands, installs) + 6. Use `script` nodes for typed data transforms (TypeScript JSON parsing, Python with deps) — stdout is captured as output, stderr is forwarded as a warning. $nodeId.output is NOT shell-quoted in script bodies — parse with JSON.parse / json.loads, not shell interpolation + 7. Use `prompt` nodes for AI reasoning tasks + 8. Use `approval` nodes to pause for human review at risky gates (plan→execute boundary, destructive actions) + 9. Use `output_format` on prompt nodes when downstream nodes need structured data + 10. Use `allowed_tools: []` on classification/analysis nodes that don't need tools + 11. Use `denied_tools: [Edit, Bash]` when a node should only use Write (not edit existing files) + 12. Prefer `model: haiku` for simple classification tasks to save cost ## Output diff --git a/.claude/skills/archon/SKILL.md b/.claude/skills/archon/SKILL.md index f36e7391b8..9a9a2f7c0b 100644 --- a/.claude/skills/archon/SKILL.md +++ b/.claude/skills/archon/SKILL.md @@ -42,12 +42,54 @@ Determine the user's intent and dispatch to the appropriate guide: | **Variable substitution reference** | Read `references/variables.md` | | **CLI command reference** | Read `references/cli-commands.md` | | **Run an interactive workflow** | Read `references/interactive-workflows.md` — transparent relay protocol | +| **Workflow good practices / anti-patterns** | Read `references/good-practices.md` — read before designing a non-trivial workflow | +| **Troubleshoot a failing / stuck workflow** | Read `references/troubleshooting.md` — log locations, common failure modes | | **Run a workflow (default)** | Continue with "Running Workflows" below | If the intent is ambiguous, ask the user to clarify. --- +## Richer Context: [archon.diy](https://archon.diy) + +The references in this skill are a distilled subset. The full, canonical docs live at **[archon.diy](https://archon.diy)** (Starlight site from `packages/docs-web/`). If the skill's reference pages don't cover what you need — an edge case, a worked example, a diagram, a deeper section on a feature — fetch the matching page from archon.diy. + +### When to reach for the live docs + +- You need an end-to-end example that's longer than what the skill shows (e.g. full patterns for hooks, MCP config, sandbox schema, approval flows) +- You're explaining a concept to the user and want the most readable framing (the `book/` series is written as a tutorial, not a reference) +- You hit a feature the skill only mentions in passing (e.g. `agents:` inline sub-agents, advanced Codex options, the full SyncHookJSONOutput schema) +- The user asks "where is this documented?" — point them at the archon.diy URL, not a skill file path + +### URL map + +| Topic | URL | +|-------|-----| +| Landing + install | [archon.diy](https://archon.diy) | +| Getting started (installation, quick start, concepts) | [archon.diy/getting-started/](https://archon.diy/getting-started/overview/) | +| The book (tutorial-style walkthrough) | [archon.diy/book/](https://archon.diy/book/) | +| Workflow authoring guide | [archon.diy/guides/authoring-workflows/](https://archon.diy/guides/authoring-workflows/) | +| Command authoring guide | [archon.diy/guides/authoring-commands/](https://archon.diy/guides/authoring-commands/) | +| Node type guides | [archon.diy/guides/loop-nodes/](https://archon.diy/guides/loop-nodes/), [/approval-nodes/](https://archon.diy/guides/approval-nodes/), [/script-nodes/](https://archon.diy/guides/script-nodes/) | +| Per-node features (Claude only) | [/hooks/](https://archon.diy/guides/hooks/), [/mcp-servers/](https://archon.diy/guides/mcp-servers/), [/skills/](https://archon.diy/guides/skills/) | +| Global workflows/commands/scripts | [archon.diy/guides/global-workflows/](https://archon.diy/guides/global-workflows/) | +| Variables reference | [archon.diy/reference/variables/](https://archon.diy/reference/variables/) | +| CLI reference | [archon.diy/reference/cli/](https://archon.diy/reference/cli/) | +| Security model (env, sandbox, target-repo `.env` stripping) | [archon.diy/reference/security/](https://archon.diy/reference/security/) | +| Architecture | [archon.diy/reference/architecture/](https://archon.diy/reference/architecture/) | +| Configuration (`.archon/config.yaml` full schema) | [archon.diy/reference/configuration/](https://archon.diy/reference/configuration/) | +| Troubleshooting | [archon.diy/reference/troubleshooting/](https://archon.diy/reference/troubleshooting/) | +| Adapter setup (Slack/Telegram/GitHub/Web/Discord/Gitea/GitLab) | [archon.diy/adapters/](https://archon.diy/adapters/) | +| Deployment (Docker, cloud, Windows) | [archon.diy/deployment/](https://archon.diy/deployment/) | + +URL shape is `archon.diy/
//` — the paths mirror the filenames under `packages/docs-web/src/content/docs/`. + +### Precedence + +This skill's reference pages are the primary source for routine workflow authoring, CLI use, and setup. Reach for archon.diy when the skill is incomplete for your case — don't go to the live docs first by default (skill refs load into context faster and are tuned for agents). + +--- + ## Running Workflows ### Core Command @@ -152,9 +194,9 @@ nodes: depends_on: [first-node] ``` -### Four Node Types +### Node Types -Each node has exactly ONE of: `command`, `prompt`, `bash`, or `loop`. +Each node has exactly ONE of: `command`, `prompt`, `bash`, `script`, `loop`, `approval`, or `cancel`. **Command node** — runs a `.archon/commands/*.md` file: ```yaml @@ -177,6 +219,22 @@ Each node has exactly ONE of: `command`, `prompt`, `bash`, or `loop`. timeout: 15000 ``` +**Script node** — TypeScript/JavaScript (via `bun`) or Python (via `uv`), no AI, stdout captured as output: +```yaml +- id: transform + script: | + const raw = process.argv.slice(2).join(' ') || '{}'; + console.log(JSON.stringify({ parsed: JSON.parse(raw) })); + runtime: bun # 'bun' (.ts/.js) or 'uv' (.py) — REQUIRED + timeout: 30000 # Optional, ms, default 120000 + +# Or reference a named script from .archon/scripts/ or ~/.archon/scripts/ +- id: analyze + script: analyze-metrics # loads .archon/scripts/analyze-metrics.py + runtime: uv + deps: ["pandas>=2.0"] # Optional, uv only — 'uv run --with ' +``` + **Loop node** — iterates AI prompt until completion: ```yaml - id: implement @@ -188,6 +246,29 @@ Each node has exactly ONE of: `command`, `prompt`, `bash`, or `loop`. until_bash: "bun run test" # Optional: exit 0 = done ``` +**Approval node** — pauses the workflow for human review. Requires `interactive: true` at the workflow level for Web UI delivery: +```yaml +interactive: true # workflow level — required for web UI + +nodes: + - id: review-gate + approval: + message: "Review the plan above before proceeding." + capture_response: true # Optional: user's comment → $review-gate.output + on_reject: # Optional: AI rework on rejection instead of cancel + prompt: "Revise based on feedback: $REJECTION_REASON" + max_attempts: 3 # Range 1-10, default 3 + depends_on: [plan] +``` + +**Cancel node** — terminates the workflow with a reason. Typically gated with `when:`: +```yaml +- id: stop-if-unsafe + cancel: "Refusing to proceed: input flagged UNSAFE." + depends_on: [classify] + when: "$classify.output != 'SAFE'" +``` + For the full authoring guide with all fields, conditions, trigger rules, and patterns: Read `references/workflow-dag.md` ### Creating a Command File @@ -230,7 +311,7 @@ For details: Read `references/dag-advanced.md` ### Example Files -- `examples/dag-workflow.yaml` — workflow with conditions, bash nodes, structured output +- `examples/dag-workflow.yaml` — workflow with conditions, bash + script + loop nodes, structured output - `examples/command-template.md` — Command file skeleton with all variables --- diff --git a/.claude/skills/archon/examples/dag-workflow.yaml b/.claude/skills/archon/examples/dag-workflow.yaml index 5e15f4c77c..676e08161e 100644 --- a/.claude/skills/archon/examples/dag-workflow.yaml +++ b/.claude/skills/archon/examples/dag-workflow.yaml @@ -1,7 +1,8 @@ -# Example: Workflow with all four node types +# Example: Workflow demonstrating multiple node types # -# Demonstrates: bash nodes, structured output, when: conditions, -# trigger_rule, per-node model, context: fresh, loop nodes, and output substitution. +# Demonstrates: bash nodes, script nodes (TypeScript via bun), structured output, +# when: conditions, trigger_rule, per-node model, context: fresh, loop nodes, +# and output substitution. # # IMPORTANT: This is a reference example. Design your actual workflow # around the user's specific needs — the number of nodes, their types, @@ -42,6 +43,27 @@ nodes: fi timeout: 5000 + # ── SCRIPT NODE: TypeScript (bun runtime), no AI, stdout captured as output ── + # Deterministic parsing the shell would mangle — extracts labels cleanly as JSON. + # + # NOTE: `$fetch-issue.output` is substituted *raw* into the script body (no shell + # quoting — see reference/variables.md). Wrapping it in a String.raw template + # preserves backslashes and newlines in the JSON payload without needing any + # escaping. Safe here because gh issue view --json emits clean JSON. + - id: extract-labels + script: | + const raw = String.raw`$fetch-issue.output`; + try { + const issue = JSON.parse(raw); + const labels = (issue.labels ?? []).map((l) => l.name); + console.log(JSON.stringify({ labels, count: labels.length })); + } catch { + console.log(JSON.stringify({ labels: [], count: 0 })); + } + runtime: bun + depends_on: [fetch-issue] + timeout: 10000 + # ── PROMPT NODE: Inline AI prompt with structured output ── - id: classify prompt: | diff --git a/.claude/skills/archon/references/authoring-commands.md b/.claude/skills/archon/references/authoring-commands.md index 0b1240da6b..603dd3e4a3 100644 --- a/.claude/skills/archon/references/authoring-commands.md +++ b/.claude/skills/archon/references/authoring-commands.md @@ -4,14 +4,29 @@ Commands are plain Markdown files containing AI prompt templates. They are the a ## File Location +Commands are discovered from three scopes, highest-precedence first: + ``` -.archon/commands/ -├── my-command.md # Custom command -├── review-code.md # Another custom command -└── defaults/ # Optional: override bundled defaults - └── archon-assist.md # Overrides the bundled archon-assist +/.archon/commands/ # 1. Repo-scoped (wins) +├── my-command.md # Custom command for this repo +├── archon-assist.md # Overrides the bundled archon-assist +└── triage/ # Subfolders allowed, 1 level deep + └── review.md # Resolves as 'review', not 'triage/review' + +~/.archon/commands/ # 2. Home-scoped (user-level, shared across all repos) +├── review-checklist.md # Personal helper available in every repo +└── pr-style-guide.md + + # 3. Shipped with Archon (archon-assist, etc.) ``` +**Resolution rules:** + +- Filename-without-extension is the command name (e.g. `my-command.md` → `my-command`). +- 1-level subfolders are supported for grouping; resolution is still by filename (`triage/review.md` → `review`). +- Repo scope overrides home scope overrides bundled, by name. +- Duplicate basenames **within a scope** (e.g. two different `review.md` files in `triage/` and `security/`) are a user error — keep names unique within each scope. + Commands are referenced by name (without `.md`) in workflow YAML files. ## File Format @@ -78,11 +93,14 @@ Command names must: ## Discovery and Priority When a workflow references `command: my-command`, Archon searches in this order: -1. `.archon/commands/my-command.md` (repo custom) -2. `.archon/commands/defaults/my-command.md` (repo default overrides) + +1. `/.archon/commands/my-command.md` (repo scope) +2. `~/.archon/commands/my-command.md` (home scope — shared across every repo on the machine) 3. Bundled defaults (shipped with Archon) -First match wins. To override a bundled command, create a file with the same name in your repo. +First match wins. To override a bundled command, drop a file with the same name at either scope. To override a home-scoped command for a specific repo, drop a file with the same name in that repo's `.archon/commands/`. + +> **Web UI note**: Home-scoped commands appear in the workflow builder's node palette under a dedicated "Global (~/.archon/commands/)" section, distinct from project and bundled entries. ## Referencing Commands from Workflows diff --git a/.claude/skills/archon/references/cli-commands.md b/.claude/skills/archon/references/cli-commands.md index 157eacb713..0cc1a0ee06 100644 --- a/.claude/skills/archon/references/cli-commands.md +++ b/.claude/skills/archon/references/cli-commands.md @@ -32,7 +32,7 @@ archon workflow run archon-fix-github-issue --resume | `--branch ` / `-b` | Branch name for worktree. Reuses existing worktree if healthy | | `--from ` / `--from-branch ` | Start-point branch for new worktree (default: repo default branch) | | `--no-worktree` | Skip isolation — run in the live checkout | -| `--resume` | Resume the last failed run of this workflow (skips completed steps/nodes) | +| `--resume` | Resume the last failed run of this workflow at this cwd (skips completed nodes) | | `--cwd ` | Working directory override | **Flag conflicts** (errors): @@ -42,6 +42,87 @@ archon workflow run archon-fix-github-issue --resume **Default behavior** (no flags): Auto-creates a worktree with branch name `{workflow-name}-{timestamp}`. +**Auto-resume without `--resume`**: If a prior invocation of the same workflow at the same cwd failed, the next invocation automatically skips completed nodes. `--resume` is only needed when you want to force resume a specific failed run or to reuse the worktree from that run. + +### `archon workflow status` + +Show the currently running workflow (if any) with its run ID, state, and last activity. + +```bash +archon workflow status +archon workflow status --json # Machine-readable output +``` + +### `archon workflow approve [comment]` + +Approve a paused approval-node workflow. Auto-resumes the workflow. + +```bash +archon workflow approve abc123 +archon workflow approve abc123 --comment "Plan looks good" +archon workflow approve abc123 "Plan looks good" # positional form +``` + +For interactive loop nodes, the comment becomes `$LOOP_USER_INPUT` on the next iteration. For approval nodes with `capture_response: true`, the comment becomes `$.output` for downstream nodes. + +### `archon workflow reject [reason]` + +Reject a paused approval gate. Without `on_reject` on the node, cancels the workflow. With `on_reject`, runs the rework prompt with `$REJECTION_REASON` substituted and re-pauses. + +```bash +archon workflow reject abc123 +archon workflow reject abc123 --reason "Plan misses test coverage" +archon workflow reject abc123 "Plan misses test coverage" +``` + +### `archon workflow abandon ` + +Mark a non-terminal workflow run as cancelled. Use when a `running` row is stuck after a server crash or when you want to discard a paused run without rejecting. This does NOT kill an in-flight subprocess — it only transitions the DB row. + +```bash +archon workflow abandon abc123 +``` + +> **There is no `archon workflow cancel` CLI subcommand.** To actively cancel a running workflow (terminate its subprocess), use the chat slash command `/workflow cancel ` on the platform that started it (Web UI, Slack, Telegram, etc.), or the Cancel button on the Web UI dashboard. The CLI only offers `abandon`, which is the right tool for orphan cleanup but does not interrupt a live subprocess. + +### `archon workflow resume [message]` + +Explicitly re-run a failed run. Most workflows auto-resume without this — use it when you want to force a specific run ID. + +```bash +archon workflow resume abc123 +archon workflow resume abc123 "continue with the plan" +``` + +### `archon workflow cleanup [days]` + +**Deletes** old terminal workflow runs (`completed`/`failed`/`cancelled`) from the database for disk hygiene. Does NOT transition `running` rows — use `abandon`/`cancel` for those. + +```bash +archon workflow cleanup # Default: 7 days +archon workflow cleanup 30 # Custom: 30 days +``` + +### `archon workflow event emit --run-id --type [--data ]` + +Emit a workflow event to a running workflow. Used inside loop prompts to signal state (e.g. "checkpoint written") for observability. Rarely invoked from the shell directly. + +```bash +archon workflow event emit --run-id abc123 --type checkpoint --data '{"step":"plan"}' +``` + +### `archon continue [flags] [message]` + +Continue work on a branch with prior context. Defaults to `archon-assist`; use `--workflow` to pick a different workflow. Useful for iterative sessions on the same worktree without typing the full `workflow run` incantation. + +```bash +archon continue feat/auth "Add password reset" +archon continue feat/auth --workflow archon-feature-development "Continue from step 3" +archon continue feat/auth --no-context "Start fresh without loading prior artifacts" +``` + +Flags: `--workflow `, `--no-context`. + ## Isolation Commands ### `archon isolation list` @@ -59,11 +140,20 @@ Outputs: branch name, path, workflow type, platform, last activity age. Ghost en Remove stale worktree environments. ```bash -archon isolation cleanup # Default: 7 days -archon isolation cleanup 14 # Custom: 14 days -archon isolation cleanup --merged # Remove branches merged into main (+ remote branches) +archon isolation cleanup # Default: 7 days +archon isolation cleanup 14 # Custom: 14 days +archon isolation cleanup --merged # Also remove worktrees whose branches merged into main (deletes remote branches too) +archon isolation cleanup --merged --include-closed # Also remove worktrees whose PRs were closed without merging ``` +**Flags:** + +| Flag | Description | +|------|-------------| +| `[days]` | Positional — age threshold in days. Environments untouched for longer than this are removed. Default: 7 | +| `--merged` | Union of three signals — ancestry (`git branch --merged`), patch equivalence (`git cherry`), and PR state (`gh`) — safely catches squash-merges | +| `--include-closed` | With `--merged`, also remove worktrees whose PRs were closed (abandoned, not merged) | + ## Validate Commands ### `archon validate workflows [name]` diff --git a/.claude/skills/archon/references/dag-advanced.md b/.claude/skills/archon/references/dag-advanced.md index 4add35d8f7..63a83e9101 100644 --- a/.claude/skills/archon/references/dag-advanced.md +++ b/.claude/skills/archon/references/dag-advanced.md @@ -1,6 +1,6 @@ # Advanced Features: Hooks, MCP, Skills, Retry -These features are available on **command and prompt nodes** (hooks, MCP, skills, tool restrictions) and **command, prompt, and bash nodes** (retry, output_format). Loop nodes do not support these features (`retry` on loop nodes is a hard error; others are silently ignored). +These features are available on **command and prompt nodes** (hooks, MCP, skills, tool restrictions, `output_format`, `agents`, Claude SDK options) and **command, prompt, bash, and script nodes** (retry). Loop nodes do not support these features (`retry` on loop nodes is a hard error; others are silently ignored). Bash and script nodes silently ignore AI-specific fields (a loader warning lists the ignored fields). ## Provider Compatibility diff --git a/.claude/skills/archon/references/good-practices.md b/.claude/skills/archon/references/good-practices.md new file mode 100644 index 0000000000..e731a2583d --- /dev/null +++ b/.claude/skills/archon/references/good-practices.md @@ -0,0 +1,241 @@ +# Workflow Good Practices and Anti-Patterns + +Guidance for authoring workflows that survive first contact with a real codebase. Written for an agent or human writing their first non-trivial workflow. + +## Good Practices + +### 1. Use deterministic nodes for deterministic work + +AI nodes are expensive, non-reproducible, and can hallucinate. Use `bash:` or `script:` for anything that has a right answer a computer can produce. + +- **Run tests** with `bash: "bun run test"`, not `prompt: "run the tests and tell me if they passed"`. +- **Parse JSON** with `script:` (bun/uv), not a `prompt:` that re-derives structure from free text. +- **Read files with known paths** via `bash: "cat path/to/file"` or `Read` in an AI node where the agent actually needs to reason about the content. +- **Git state checks** (current branch, uncommitted changes, merge-base) → `bash:`. + +### 2. Use `output_format` for every node whose output downstream `when:` reads + +`when:` conditions do best-effort JSON parsing on `$nodeId.output` for `.field` access. If the upstream node doesn't enforce a shape, you're pattern-matching free-form AI text — fragile. + +```yaml +# GOOD +- id: classify + prompt: "Classify as BUG or FEATURE" + output_format: # enforces the JSON shape + type: object + properties: + type: { type: string, enum: [BUG, FEATURE] } + required: [type] + +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output.type == 'BUG'" # safe field access + +# BAD +- id: classify + prompt: "Is this a bug or a feature?" + # no output_format; AI might reply "it looks like a bug", "BUG", or "This is a bug.\n\n..." + +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output == 'BUG'" # fragile string match +``` + +### 3. `trigger_rule: none_failed_min_one_success` after conditional branches + +After `when:`-gated branches, the downstream merge node will see one or more **skipped** dependencies. Skipped ≠ success. Default `all_success` fails. + +```yaml +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output.type == 'BUG'" + +- id: plan + command: plan-feature + depends_on: [classify] + when: "$classify.output.type == 'FEATURE'" + +- id: implement + command: implement + depends_on: [investigate, plan] + trigger_rule: none_failed_min_one_success # CORRECT — exactly one ran + # trigger_rule: all_success ← would fail here (one dep skipped) +``` + +Use `one_success` when any dep succeeding is enough; `none_failed_min_one_success` when no dep should have failed AND at least one must have succeeded; `all_done` for "run cleanup regardless" patterns with `cancel:` or notification nodes. + +### 4. `context: fresh` requires artifacts for state passing + +A node with `context: fresh` starts with no memory of prior nodes in the same workflow. The only way state moves is via files. Default is `fresh` for parallel layers and `shared` for sequential — explicit `context: fresh` is common when you want cost isolation. + +```yaml +- id: investigate + command: investigate-bug + # Investigator WRITES to $ARTIFACTS_DIR/investigation.md + +- id: implement + command: implement-fix + depends_on: [investigate] + context: fresh + # Implementer MUST read $ARTIFACTS_DIR/investigation.md — it has no memory + # of what the investigator found. +``` + +Command files should lead with "read artifacts from `$ARTIFACTS_DIR/...`" when they're downstream of a fresh node. This is the single biggest quality lever on multi-node workflows. + +### 5. Cheap models for glue, strong models for substance + +Classification, routing, formatting, and short summaries don't need Opus. Use `model: haiku` for these and reserve `sonnet`/`opus` for the nodes that actually produce code or long-form analysis. Combined with `allowed_tools: []` on pure-text nodes, this cuts cost dramatically. + +```yaml +- id: classify + prompt: "Classify this issue" + model: haiku # fast + cheap + allowed_tools: [] # no tool overhead + output_format: { ... } + +- id: implement + command: implement-fix + model: sonnet # where the thinking happens +``` + +### 6. Write the workflow description for routing + +Archon's orchestrator routes user intent to workflows by description. Write descriptions that make routing obvious. + +- Start with the imperative action: "Fix a GitHub issue end-to-end", "Generate a Remotion video composition". +- Mention triggers: "Use when the user asks to review a PR", "Use when there's a failing test run". +- Mention what it does NOT do: "Does not create a PR — use `archon-plan-to-pr` for that". + +### 7. Validate before shipping + +Never declare a workflow "done" without: + +```bash +archon validate workflows # YAML + DAG structure + resource refs +``` + +This checks: YAML syntax, node ID uniqueness, no cycles, all `depends_on` exist, all `$nodeId.output` refs point to known nodes, all `command:` files exist, all `mcp:` configs parse, all `skills:` directories exist, provider/model compatibility, named script existence, runtime availability. Fix everything it reports before first run. + +For brand-new workflows, also: +1. Run once against a trivial input (`archon workflow run my-workflow --branch test/sanity "hello"`) +2. Check the run log at `~/.archon/workspaces///logs/.jsonl` +3. Check artifacts at `~/.archon/workspaces///artifacts/runs//` + +See `references/troubleshooting.md` for how to read those. + +### 8. Design the artifact chain before writing command files + +In a multi-node workflow, each node's artifact IS the specification for the next node. Before writing any command body, map out: + +| Node | Reads | Writes | +|------|-------|--------| +| `investigate-issue` | GitHub issue via `gh` | `$ARTIFACTS_DIR/issues/issue-{n}.md` | +| `implement-issue` | Artifact from `investigate-issue` | Code files, tests | +| `create-pr` | Git diff | GitHub PR, `$ARTIFACTS_DIR/pr-body.md` | + +If a downstream agent can't execute from just its artifact, the artifact is incomplete. This is the single most common failure mode in multi-node workflows. + +### 9. Keep workflows reversible + +Use `worktree.enabled: true` at the workflow level for anything that modifies the codebase. The CLI `--no-worktree` flag will hard-error, forcing users into isolation. The cost is a one-time cp of the worktree; the benefit is never having a failed workflow corrupt a live checkout. + +For read-only workflows (triage, reporting, code analysis), pin `worktree.enabled: false` instead — saves the worktree setup cost. + +--- + +## Anti-Patterns + +### ❌ Asking AI to run deterministic checks + +```yaml +# BAD +- id: test + prompt: "Run bun run test and tell me if it passed" + +# GOOD +- id: test + bash: "bun run test 2>&1" + +- id: react-to-tests + prompt: "Fix any failures: $test.output" + depends_on: [test] + trigger_rule: all_done # run even if tests failed +``` + +### ❌ Pattern-matching free-form AI output in `when:` + +```yaml +# BAD — brittle +- id: decide + prompt: "Should we proceed? Answer yes or no." +- id: do-thing + depends_on: [decide] + when: "$decide.output == 'yes'" # AI says "Yes!" or "Yes, because..." — no match + +# GOOD +- id: decide + prompt: "Should we proceed?" + output_format: + type: object + properties: { proceed: { type: boolean } } + required: [proceed] +- id: do-thing + depends_on: [decide] + when: "$decide.output.proceed == 'true'" +``` + +### ❌ Commands that assume prior-node memory in a `context: fresh` chain + +```markdown + +Fix the bug we discussed in the investigation phase. + + +Read the investigation at `$ARTIFACTS_DIR/issues/issue-{n}.md`. +Extract the root cause, affected files, and implementation plan. +Implement the changes exactly as specified in the plan. +``` + +### ❌ Long flat layers of AI nodes + +Ten sibling `prompt:` nodes in one layer all depending on one upstream is a $N/run cost bomb and a latency trap. If the work is parallel and similar, use the `agents:` inline sub-agent map-reduce pattern with a cheap model per item and a single stronger reducer. See `references/dag-advanced.md` and the [Inline sub-agents section on archon.diy](https://archon.diy/guides/authoring-workflows/#inline-sub-agents) for a worked example. + +### ❌ Hardcoding secrets in YAML or MCP configs + +Use `$ENV_VAR` expansion in MCP configs and the `env:` block in `.archon/config.yaml` (or Web UI Settings → Projects → Env Vars). See `references/repo-init.md` §Per-Project Env Injection. + +### ❌ `retry` on a loop node + +Loop nodes manage their own iteration via `max_iterations`. Setting `retry:` on a loop is a **hard parse error** — the workflow fails to load. If a loop iteration is flaky, handle it inside the loop prompt (the AI can retry tool calls) or use `until_bash` to gate completion on a deterministic check. + +### ❌ Tiny `max_iterations` on open-ended loops + +A loop with `max_iterations: 3` that's supposed to implement N stories from a PRD will silently stop after 3 iterations and leave the work half-done. Think about the worst case — multi-story PRDs need 10–20, fix-iterate cycles need 5–8, refinement loops need 3–5. + +### ❌ Missing `interactive: true` at workflow level for approval/loop gates on web + +Web UI dispatches non-interactive workflows to a background worker that cannot deliver chat messages. Approval-gate messages and loop `gate_message` prompts will never reach the user. If the workflow has `approval:` nodes OR `loop.interactive: true`, set workflow-level `interactive: true`. + +### ❌ Tool-restricted nodes without the MCP wildcard + +```yaml +# BAD — no tools available, including MCP +- id: analyze + prompt: "Use the Postgres MCP to query users" + mcp: .archon/mcp/postgres.json + allowed_tools: [] # OOPS — disables EVERYTHING, including MCP tools + +# FIXED — Archon auto-adds mcp____* wildcards when mcp: is set, +# so this actually works out of the box. The anti-pattern is forgetting +# and manually adding Read/Write/Bash/etc. when you only want MCP. +- id: analyze + prompt: "Use Postgres MCP to query users" + mcp: .archon/mcp/postgres.json + allowed_tools: [] # correct — MCP tools auto-attached +``` + +Caveat: this only helps Claude. Codex gets MCP config from `~/.codex/config.toml` globally, not per-node. diff --git a/.claude/skills/archon/references/interactive-workflows.md b/.claude/skills/archon/references/interactive-workflows.md index 243cfdb7b0..856d50afd1 100644 --- a/.claude/skills/archon/references/interactive-workflows.md +++ b/.claude/skills/archon/references/interactive-workflows.md @@ -103,4 +103,4 @@ archon workflow reject "reason for rejection" - **Workflow shows `running` for a long time**: The AI is doing research/implementation. Be patient — check again in a few minutes. - **Log file not found**: The log is at `~/.archon/workspaces///logs/.jsonl` -- **User wants to cancel**: Run `archon workflow reject ` or `archon workflow cancel ` +- **User wants to cancel**: Run `archon workflow reject ` to stop at an approval gate, or `archon workflow abandon ` to mark the run cancelled without killing any subprocess. To actively terminate a still-live subprocess, use the chat slash command `/workflow cancel ` on the platform that started it — there is no `archon workflow cancel` CLI subcommand diff --git a/.claude/skills/archon/references/repo-init.md b/.claude/skills/archon/references/repo-init.md index 66be6375f5..e44907fd2e 100644 --- a/.claude/skills/archon/references/repo-init.md +++ b/.claude/skills/archon/references/repo-init.md @@ -10,14 +10,27 @@ Create the following in your repository root: .archon/ ├── commands/ # Custom command files (.md) ├── workflows/ # Workflow definitions (.yaml) +├── scripts/ # Named scripts for script: nodes (.ts/.js for bun, .py for uv) — optional ├── mcp/ # MCP server config files (.json) — optional -└── config.yaml # Repo-specific configuration — optional +├── state/ # Cross-run workflow state — gitignored, never committed +├── config.yaml # Repo-specific configuration — optional +└── .env # Repo-scoped Archon env (optional; do NOT commit) ``` ```bash -mkdir -p .archon/commands .archon/workflows +mkdir -p .archon/commands .archon/workflows .archon/scripts ``` +**What each directory is for:** + +- `commands/` — Reusable prompt templates used by `command:` workflow nodes. Committed to git. +- `workflows/` — YAML workflow definitions. Committed to git. +- `scripts/` — Named TypeScript/JavaScript (bun) or Python (uv) scripts referenced by `script:` nodes. Extension determines runtime: `.ts`/`.js` → bun, `.py` → uv. Committed to git. +- `mcp/` — MCP server JSON configs. Usually checked in with `$ENV_VAR` references; avoid hardcoding secrets. Some teams gitignore this and rely entirely on env expansion. +- `state/` — Workflow-written cross-run state (e.g. the `repo-triage` dedup log). **Always gitignore** — these are runtime artifacts, not source. +- `config.yaml` — Repo-specific defaults (assistant, worktree settings, etc.). Committed to git. +- `.env` — Repo-scoped Archon env (loaded with `override: true` at boot). **Do NOT commit.** This is different from the target repo's top-level `.env` — that file belongs to the target project, and Archon strips its auto-loaded keys from subprocess env before spawning AI to prevent leakage. See **Three-Path Env Model** below. + ## Minimal config.yaml Create `.archon/config.yaml` only if you need to override defaults: @@ -52,11 +65,59 @@ Archon ships with built-in commands and workflows (like `archon-assist`, `archon Add to your `.gitignore`: ```gitignore -# Archon runtime artifacts (never commit) -.archon/mcp/ # May contain env var references +# Archon runtime artifacts — NEVER commit +.archon/state/ # Cross-run workflow state, runtime-only +.archon/.env # Repo-scoped Archon env (secrets) + +# Optional — gitignore if your MCP configs hardcode secrets +.archon/mcp/ +``` + +`.archon/commands/`, `.archon/workflows/`, and `.archon/scripts/` **should be committed** — they are part of your project's workflow definitions. `.archon/config.yaml` should be committed unless it contains secrets (use `.archon/.env` for those instead). + +## Three-Path Env Model + +Archon loads env from three distinct paths at boot, with different trust levels and precedence: + +| Path | Scope | Trust | Loaded? | +|------|-------|-------|---------| +| `~/.archon/.env` | User (home) | Trusted — user owns it | Yes, with `override: true` | +| `/.archon/.env` | Repo (per-project, Archon-owned) | Trusted — user owns it | Yes, with `override: true` (overrides home) | +| `/.env` | Target repo | **Untrusted** — belongs to the project being worked on | **Stripped from `process.env`** before subprocess spawn to prevent secret leakage (see [archon.diy/reference/security/](https://archon.diy/reference/security/#target-repo-env-isolation) for the full trust model) | + +Boot behavior emits observable log lines: + +``` +[archon] loaded N keys from ~/.archon/.env +[archon] loaded M keys from /path/to/repo/.archon/.env +[archon] stripped K keys from /path/to/repo (ANTHROPIC_API_KEY, OPENAI_API_KEY, ...) ``` -The `.archon/commands/` and `.archon/workflows/` directories should be committed — they are part of your project's workflow definitions. +**Where should you put what?** + +- **API keys for Archon itself** (`ANTHROPIC_API_KEY`, `CLAUDE_CODE_OAUTH_TOKEN`, `DATABASE_URL`, `SLACK_BOT_TOKEN`, etc.) → `~/.archon/.env` (shared across all repos) or `/.archon/.env` (per-repo override). +- **Target-project env that a workflow needs** (`GH_TOKEN`, `DOTENV_PRIVATE_KEY`, etc.) → see [Per-Project Env Injection](#per-project-env-injection) below. +- **Target-project env that Archon should NOT touch** → leave it in `/.env` where the project already expects it. Archon strips it from subprocess env but doesn't delete the file. + +The `archon setup --scope home|project [--force]` wizard writes to the right file for you and produces a timestamped backup on every rewrite. + +## Per-Project Env Injection + +For env vars a workflow's `bash:` and `script:` subprocesses need (`GH_TOKEN` for `gh` calls, `DATABASE_URL` for a migration script, etc.), use one of the two **managed injection** surfaces — both inject into subprocess env at workflow execution time, after the target-repo `.env` strip: + +**Option 1: `.archon/config.yaml` `env:` block** (checked into git; values can be `$REF_NAME` expansions from Archon env): + +```yaml +env: + GH_TOKEN: $GH_TOKEN # expanded from ~/.archon/.env at runtime + BUILD_TARGET: production # literal value +``` + +**Option 2: Web UI Settings → Projects → Env Vars** — per-codebase, stored in the Archon DB, values never returned over the API (only keys are listed). Use this for values that should NOT appear in git. + +Both surfaces inject into: Claude/Codex/Pi subprocess env, `bash:` node subprocess env, `script:` node subprocess env, and direct chat messages that run against the codebase. The worktree isolation layer propagates them as well. + +> **About keys in the target repo's `/.env`**: Archon unconditionally strips the keys auto-loaded from `/.env` out of `process.env` at boot (see the Three-Path Env Model above) and the Bun subprocess is invoked with `--no-env-file`, so those values do NOT reach AI / bash / script subprocesses. If a workflow needs a value that currently lives in the target repo's `.env`, surface it through one of the two managed injection options above — don't expect the target `.env` to leak through. ## Global Configuration diff --git a/.claude/skills/archon/references/troubleshooting.md b/.claude/skills/archon/references/troubleshooting.md new file mode 100644 index 0000000000..099cccd928 --- /dev/null +++ b/.claude/skills/archon/references/troubleshooting.md @@ -0,0 +1,162 @@ +# Troubleshooting Workflows + +Where to look when a workflow fails, hangs, or does the wrong thing. + +## Log Locations + +Workflow run logs are written as JSONL per run: + +``` +~/.archon/workspaces///logs/.jsonl +``` + +Each line is a structured event. The discriminator is the `type` field. Values (see `packages/workflows/src/logger.ts` for the canonical list): + +| `type` | Meaning | +|--------|---------| +| `workflow_start` / `workflow_complete` / `workflow_error` | Run lifecycle | +| `node_start` / `node_complete` / `node_error` / `node_skipped` | Node lifecycle | +| `assistant` | AI assistant message — has `content` field with the full AI output | +| `tool` | SDK tool invocation — has `tool_name`, `tool_input`, `duration_ms`, and optionally `tokens` | +| `validation` | Workflow-level validation event — has `check` and `result` (`pass` / `fail` / `warn` / `unknown`) | + +> **Loop iterations and per-attempt retry events are NOT in the JSONL file.** They go through the workflow event emitter (WebSocket / `workflow_events` DB table) under `loop_iteration_started` / `loop_iteration_completed` etc. To see them, query the DB or the Web UI dashboard — not the JSONL log. + +Find the run ID from `archon workflow status` (most recent run). Then: + +```bash +# Last assistant message (what the AI said before failure) +jq 'select(.type == "assistant") | .content' | tail -1 + +# All error events (node failures + workflow-level failures) +jq 'select(.type == "node_error" or .type == "workflow_error")' + +# Full event stream +cat | jq . +``` + +Adapter logs (Slack / Telegram / Web / GitHub) are emitted to stderr when `LOG_LEVEL=debug` is set on the server. + +## Artifact Locations + +``` +~/.archon/workspaces///artifacts/runs// +``` + +Inspect artifacts when a multi-node workflow produces wrong output. The failing node's upstream artifact is usually where the problem originated. + +```bash +ls ~/.archon/workspaces///artifacts/runs// +cat ~/.archon/workspaces///artifacts/runs//issues/issue-42.md +``` + +Artifacts are **external** to the repo on purpose — they don't pollute git. + +## Common Failure Modes + +### "No base branch could be resolved" + +A node references `$BASE_BRANCH` in its prompt, but neither git auto-detection nor `worktree.baseBranch` in `.archon/config.yaml` produced a branch. + +**Fix:** +1. Set `worktree.baseBranch: main` (or `dev`, or whatever) in `.archon/config.yaml`. +2. Or pass `--from ` on `archon workflow run`. +3. Or remove the `$BASE_BRANCH` reference if the node doesn't actually need it. + +### "Claude Code not found" / "Codex CLI binary not found" + +Compiled-binary builds of Archon no longer embed Claude Code / Codex — you install them separately and Archon resolves the binary via env var or config. + +**Fix (Claude):** +- Install: `curl -fsSL https://claude.ai/install.sh | bash` (or `npm install -g @anthropic-ai/claude-code`) +- Set `CLAUDE_BIN_PATH=/path/to/claude` in `~/.archon/.env`, OR +- Set `assistants.claude.claudeBinaryPath: /absolute/path` in `.archon/config.yaml` +- Autodetect covers `$HOME/.local/bin/claude` (native installer) — no config needed if you used that path + +**Fix (Codex):** +- Install: `npm install -g @openai/codex` (or platform-specific instructions) +- Set `CODEX_CLI_PATH=/path/to/codex` or `assistants.codex.codexBinaryPath` in config +- Autodetect covers the standard npm / Homebrew locations per platform + +See [archon.diy/getting-started/installation/](https://archon.diy/getting-started/installation/) for full platform-specific install paths. + +### Workflow shows `running` for a long time but nothing happens + +Three possibilities: + +1. **The AI is actually working.** Check `~/.archon/workspaces///logs/.jsonl` — if you see recent `tool` or `assistant` events in the tail, it's fine. Wait. +2. **The server crashed and left an orphan row.** Server startup no longer auto-fails orphaned `running` rows (per the "No Autonomous Lifecycle Mutation" rule — `CLAUDE.md`). Transition it manually: + - Web UI: Dashboard → Abandon or Cancel button on the run card + - CLI: `archon workflow abandon ` — marks the DB row cancelled without killing any subprocess. Right tool for orphans since the subprocess is already gone + - Chat (Slack / Telegram / Web): `/workflow cancel ` — actively terminates the subprocess. Use for a still-live run that needs to be interrupted (there is no `archon workflow cancel` CLI subcommand) +3. **A node is past its `idle_timeout`.** The default is 5 minutes. Override with per-node `idle_timeout: 600000` (10 min) for long-running nodes. + +### Workflow fails mid-way; how do I resume? + +Auto-resume is default — just re-invoke the same workflow at the same cwd: + +```bash +archon workflow run my-workflow "original message" +# → "Resuming workflow — skipping N already-completed node(s)" +``` + +Use `--resume` only when you want to force-reuse the same worktree from a specific failed run. Use `archon workflow resume ` to force a specific run ID. + +**Caveat:** AI session context from prior nodes is NOT restored on resume. If a `context: shared` node depended on in-session memory, re-running it will have fresh context. Artifact-based handoff survives; in-context memory does not. + +### Approval gate not appearing on web UI + +You set `interactive: true` on the approval node but the workflow still runs in the background and no chat message appears. + +**Fix:** Set `interactive: true` at the **workflow level** too. Node-level `interactive` is ignored on web without workflow-level `interactive`. See `references/workflow-dag.md` §Approval Nodes and §Interactive Loops. + +### `MCP server connection failed: ` noise in chat + +User-level Claude plugin MCPs (e.g. `telegram`, `notion`) inherited from `~/.claude/` fail to connect in the headless subprocess. This is normal — they're not configured for Archon's worktree context. Archon filters these to debug logs (`dag.mcp_plugin_connection_suppressed`) and surfaces only workflow-configured MCP failures. + +If you see a failure for an MCP you DID configure via `mcp:` in the workflow: check the config JSON path, the MCP server's `command`/`args`, and any referenced env vars. + +### Node output is empty / `$nodeId.output.field` resolves to empty string + +Common causes: + +1. Upstream node is an AI node without `output_format` — the output is free-form text, JSON parsing fails, field access returns empty. +2. Upstream node was **skipped** (its `when:` evaluated false). Downstream `when:` with `==` comparisons against a specific value will fail-closed. +3. Bash/script node printed to stderr, not stdout. Only stdout is captured. +4. For script nodes, non-zero exit on a non-existent file / missing import silently drops the output. Check the run log for `node_error` entries. + +## Useful Diagnostic Commands + +```bash +# All active runs as JSON (running / paused / recently finished, depending on retention) +archon workflow status --json | jq '.runs[]' + +# Human-readable status of any active runs +archon workflow status + +# Active worktrees and their last activity +archon isolation list + +# Validate a specific workflow before running +archon validate workflows my-workflow + +# Validate a specific command +archon validate commands my-command + +# Dump the last 50 lines of a workflow's log +tail -n 50 ~/.archon/workspaces///logs/.jsonl | jq . + +# Increase log verbosity (workflow run) +archon workflow run my-workflow --verbose "..." + +# Increase server log verbosity +LOG_LEVEL=debug bun run start +``` + +## Escalation: when nothing makes sense + +1. Run `archon version` and note the version. +2. Run `archon validate workflows ` and capture the output. +3. Grab the last ~50 lines of the run's JSONL log. +4. Check the `CHANGELOG.md` for known issues / recent changes to the subsystem you're hitting. +5. File an issue at https://github.com/coleam00/Archon/issues with version, validate output, log tail, and the YAML. diff --git a/.claude/skills/archon/references/variables.md b/.claude/skills/archon/references/variables.md index 8f3d2dc57f..0275aa7d91 100644 --- a/.claude/skills/archon/references/variables.md +++ b/.claude/skills/archon/references/variables.md @@ -26,6 +26,7 @@ All variables are available in all workflows. The only exception is `$nodeId.out - **Command files** (`.archon/commands/*.md`) — all variables except `$nodeId.output` - **Inline `prompt:` fields** — in DAG prompt nodes and loop node prompts - **`bash:` scripts in DAG nodes** — `$nodeId.output` references are automatically shell-quoted (single-quoted with `'` escaped) +- **`script:` bodies in DAG nodes** — same substitution as bash, but `$nodeId.output` values are **NOT** shell-quoted. Parse with `JSON.parse` / `json.loads` rather than interpolating into shell syntax ## Substitution Order diff --git a/.claude/skills/archon/references/workflow-dag.md b/.claude/skills/archon/references/workflow-dag.md index eefb380646..817d7e9db0 100644 --- a/.claude/skills/archon/references/workflow-dag.md +++ b/.claude/skills/archon/references/workflow-dag.md @@ -20,9 +20,91 @@ nodes: depends_on: [other-node] # Node IDs that must complete first ``` -## Four Node Types (Mutually Exclusive) +## Workflow-Level Fields -Each node must have exactly ONE of these fields: +Top-level YAML fields on a workflow object. Per-node overrides (same name under a node) win over workflow-level defaults. + +### Core + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string (required) | Workflow identifier (used in `archon workflow run `) | +| `description` | string (required) | Human-readable summary. Used for routing; see [Workflow Description Best Practices](https://archon.diy/guides/authoring-workflows/#workflow-description-best-practices) | +| `provider` | string | AI provider (e.g. `claude`, `codex`, `pi`). Default: from `.archon/config.yaml` | +| `model` | string | Model override. Claude: `sonnet` \| `opus` \| `haiku` \| `claude-*` \| `inherit`. Codex: any non-Claude model ID | +| `interactive` | boolean | **Required for web UI** when the workflow has approval gates or `loop.interactive` nodes. Forces foreground execution so gate messages reach the user's chat. Default: `false` (background on web) | + +### Isolation + +| Field | Type | Description | +|-------|------|-------------| +| `worktree.enabled` | boolean | Pin isolation regardless of caller. `false` = always live checkout (CLI `--branch`/`--from` hard-error). `true` = always worktree (CLI `--no-worktree` hard-errors). Omit = caller decides. Use `false` for read-only workflows (triage, reporting) | + +Other worktree config (`baseBranch`, `copyFiles`, `initSubmodules`, `path`) lives in `.archon/config.yaml`, not the workflow YAML — see `references/repo-init.md`. + +### Claude SDK Advanced Options + +These fields apply to Claude nodes workflow-wide; each can be overridden per-node. Codex nodes ignore them with a warning. + +| Field | Type | Description | +|-------|------|-------------| +| `effort` | `'low'` \| `'medium'` \| `'high'` \| `'max'` | Claude Agent SDK reasoning depth. Different from Codex `modelReasoningEffort` below | +| `thinking` | string \| object | Extended thinking. String shorthand: `'adaptive'` \| `'enabled'` \| `'disabled'`. Object form: `{ type: 'enabled', budgetTokens: 8000 }` | +| `fallbackModel` | string | Model to use if the primary model fails (e.g. `claude-haiku-4-5-20251001`) | +| `betas` | string[] | SDK beta feature flags (non-empty array). Example: `['context-1m-2025-08-07']` for 1M-context Claude | +| `sandbox` | object | OS-level filesystem/network restrictions. Nested `network` / `filesystem` sub-objects — see [archon.diy/guides/authoring-workflows/#claude-sdk-advanced-options](https://archon.diy/guides/authoring-workflows/#claude-sdk-advanced-options) for the full schema. Layers on top of worktree isolation | + +Per-node-only (NOT valid at workflow level): `maxBudgetUsd`, `systemPrompt`. + +### Codex-Specific Options + +| Field | Type | Description | +|-------|------|-------------| +| `modelReasoningEffort` | `'minimal'` \| `'low'` \| `'medium'` \| `'high'` \| `'xhigh'` | Codex reasoning depth. Separate field from Claude's `effort` | +| `webSearchMode` | `'disabled'` \| `'cached'` \| `'live'` | Codex web search behavior. Default: `disabled` | +| `additionalDirectories` | string[] | Absolute paths Codex can read outside the codebase (shared libraries, docs repos) | + +### Complete workflow-level example + +```yaml +name: careful-migration +description: | + Plan a migration, get explicit approval, then implement under strict + sandbox and cost limits. Used by the ops team before destructive work. +provider: claude +model: sonnet +interactive: true # required — this workflow has an approval gate + +worktree: + enabled: true # always isolate; reject --no-worktree + +effort: high +thinking: adaptive +fallbackModel: claude-haiku-4-5-20251001 +betas: ['context-1m-2025-08-07'] +sandbox: + enabled: true + network: + allowedDomains: ['api.github.com'] + allowManagedDomainsOnly: true + filesystem: + denyWrite: ['/etc', '/usr'] + +nodes: + - id: plan + command: plan-migration + - id: review + approval: + message: "Review the migration plan above." + depends_on: [plan] + - id: implement + command: implement-migration + depends_on: [review] +``` + +## Node Types (Mutually Exclusive) + +Each node must have exactly ONE of these fields: `command`, `prompt`, `bash`, `script`, `loop`, `approval`, or `cancel`. ### Command Node Runs a command file from `.archon/commands/`: @@ -54,6 +136,54 @@ Runs a shell script without AI: - **stderr** forwarded as warning, does not fail the node - No AI invoked — AI-specific fields are ignored - Use `timeout:` (milliseconds) for execution time limit +- `$nodeId.output` substitutions are **auto shell-quoted** (safe to embed) + +### Script Node +Runs TypeScript/JavaScript (via `bun`) or Python (via `uv`) without AI. Same stdout/stderr contract as bash nodes. + +**Inline script (TypeScript):** +```yaml +- id: parse + script: | + const raw = process.argv.slice(2).join(' ') || '{}'; + const data = JSON.parse(raw); + console.log(JSON.stringify({ items: data.items?.length ?? 0 })); + runtime: bun # REQUIRED: 'bun' or 'uv' + timeout: 30000 # ms, default: 120000 +``` + +**Inline script (Python) with uv dependencies:** +```yaml +- id: fetch + script: | + import httpx, json + r = httpx.get("https://api.github.com/repos/anthropics/anthropic-cookbook") + print(json.dumps({ "stars": r.json()["stargazers_count"] })) + runtime: uv + deps: ["httpx>=0.27"] # Optional — 'uv run --with '. Ignored for bun. +``` + +**Named script from `.archon/scripts/`:** +```yaml +- id: analyze + script: analyze-metrics # Resolves .archon/scripts/analyze-metrics.py + runtime: uv # Must match file extension (.ts/.js → bun, .py → uv) + deps: ["pandas>=2.0"] +``` + +- **Inline vs named**: a `script` value is treated as inline code if it contains a newline or any shell metacharacter (space, or any of: `;` `(` `)` `{` `}` `&` `|` `<` `>` `$` `` ` `` `"` `'`). Otherwise it's a named-script lookup (bare identifier). +- **Named script resolution**: `/.archon/scripts/` (wins) → `~/.archon/scripts/`. 1-level subfolder grouping allowed. Extension determines runtime (`.ts`/`.js` → `bun`, `.py` → `uv`) and MUST match the declared `runtime:` +- **Dispatch**: + - `bun` + inline → `bun --no-env-file -e ''` + - `bun` + named → `bun --no-env-file run ` + - `uv` + inline → `uv run [--with dep ...] python -c ''` + - `uv` + named → `uv run [--with dep ...] ` +- **`deps`** is uv-only. Bun auto-installs on import; `deps` with `runtime: bun` emits a validator warning +- **stdout** captured as `$nodeId.output` (trailing newline trimmed) +- **stderr** forwarded as warning, does NOT fail the node. Non-zero exit DOES fail it. +- **`bun --no-env-file`** prevents target repo `.env` from leaking into the subprocess +- `$nodeId.output` substitutions are **NOT shell-quoted** in script bodies — parse with `JSON.parse` / `json.loads`, don't interpolate into shell syntax +- AI-specific fields (`model`, `provider`, `hooks`, `mcp`, `skills`, `output_format`, `allowed_tools`, `denied_tools`, `agents`, `effort`, `thinking`, `maxBudgetUsd`, `systemPrompt`, `fallbackModel`, `betas`, `sandbox`) emit a loader warning and are ignored ### Loop Node Iterates an AI prompt until a completion signal or max iterations: @@ -83,7 +213,7 @@ All node types share these fields: | `depends_on` | string[] | `[]` | Node IDs that must settle before this node runs | | `when` | string | — | Condition expression. Node **skipped** when false | | `trigger_rule` | string | `all_success` | Join semantics for multiple dependencies | -| `idle_timeout` | number (ms) | 300000 | Per-node idle timeout. On loop nodes, applies per-iteration | +| `idle_timeout` | number (ms) | 300000 | Idle timeout for AI streaming (`command`, `prompt`) and per-iteration idle for `loop`. Accepted but ignored on `bash` and `script` — use `timeout` there | **Command, prompt, and bash nodes** (silently ignored on loop nodes, except `retry` which is a hard error): @@ -129,14 +259,53 @@ nodes: ## Conditions (`when:`) +Gate whether a node runs based on upstream output. A condition that evaluates to `false` skips the node (fail-closed — skipped nodes propagate their skipped state to dependants). + +### Operators + +**String comparison** (literal string equality): ```yaml -- id: investigate - command: investigate-bug - depends_on: [classify] - when: "$classify.output.issue_type == 'bug'" +when: "$nodeId.output == 'VALUE'" +when: "$nodeId.output != 'VALUE'" +when: "$nodeId.output.field == 'VALUE'" # JSON dot notation (requires output_format) ``` -**Syntax**: `$nodeId.output OPERATOR 'value'` — operators: `==`, `!=` only. Values single-quoted. Invalid expressions skip the node (fail-closed). +**Numeric comparison** (both sides auto-parsed as numbers; fail-closed if either side is not finite): +```yaml +when: "$score.output > '80'" +when: "$score.output >= '0.9'" +when: "$score.output < '100'" +when: "$score.output <= '5'" +when: "$score.output.confidence >= '0.9'" +``` + +All six operators — `==`, `!=`, `<`, `>`, `<=`, `>=` — are supported. Values are single-quoted strings (even for numeric comparisons). + +### Compound Expressions + +Combine conditions with `&&` (AND) and `||` (OR). **`&&` binds tighter than `||`.** No parentheses supported — structure expressions with that precedence in mind. + +```yaml +when: "$a.output == 'X' && $b.output != 'Y'" +when: "$a.output == 'X' || $b.output == 'Y'" +when: "$score.output > '80' && $flag.output == 'true'" + +# Precedence: (A && B) || C +when: "$a.output == 'X' && $b.output == 'Y' || $c.output == 'Z'" +``` + +Short-circuit evaluation: `&&` stops at the first false, `||` stops at the first true. + +### Dot Notation (JSON Field Access) + +`$nodeId.output.field` parses the upstream output as JSON and extracts the named field. Returns empty string if parsing fails or the field is absent — which then fails-closed against any literal value. Requires the upstream node to have `output_format` set (for AI nodes) or to print valid JSON (for bash/script nodes). + +### Fail-Closed Rules + +- Invalid or unparseable expression → node skipped, warning logged +- Numeric operator with a non-numeric side → node skipped +- `$nodeId.output.field` on non-JSON output → field is empty → comparison fails +- Referenced node did not run (skipped upstream) → substitution is empty → comparison fails ## Node Output Substitution @@ -211,15 +380,53 @@ Loop nodes iterate an AI prompt until a completion condition is met. Use them fo max_iterations: 10 # Required. Integer >= 1. Fails if exceeded fresh_context: true # Optional. Default: false until_bash: "..." # Optional. Exit 0 = complete + interactive: true # Optional. Pauses between iterations for user input + gate_message: "..." # Required when interactive: true ``` | Field | Type | Required | Description | |-------|------|----------|-------------| -| `prompt` | string | Yes | Prompt template. Supports all variable substitution (`$ARGUMENTS`, `$nodeId.output`, etc.) | +| `prompt` | string | Yes | Prompt template. Supports all variable substitution (`$ARGUMENTS`, `$nodeId.output`, `$LOOP_USER_INPUT`, etc.) | | `until` | string | Yes | Completion signal to detect in AI output | | `max_iterations` | number | Yes | Hard limit. Node **fails** if exceeded | | `fresh_context` | boolean | No | Default `false`. `true` = fresh AI session each iteration | -| `until_bash` | string | No | Shell script run after each iteration. Exit 0 = complete | +| `until_bash` | string | No | Shell script run after each iteration. Exit 0 = complete. Variable substitution applies; `$nodeId.output` IS shell-quoted here | +| `interactive` | boolean | No | Default `false`. `true` = pause after each non-completing iteration for user feedback via `/workflow approve ` | +| `gate_message` | string | **Required when `interactive: true`** | Message shown to the user at each pause. Validated at parse time — a loop with `interactive: true` and no `gate_message` fails to load | + +### Interactive Loops + +Interactive loops pause between iterations so a human can provide feedback that feeds the next iteration. Use them for guided writing/refinement (e.g. PRD co-authoring, iterative design). + +```yaml +name: guided-refine +description: Refine an output with human feedback between iterations +interactive: true # REQUIRED at the workflow level for web UI + +nodes: + - id: refine + loop: + prompt: | + Review the current draft and improve it based on this feedback: + $LOOP_USER_INPUT + + When the output is satisfactory, output: DONE + until: DONE + max_iterations: 5 + interactive: true # node level — enables the pause + gate_message: | + Review the output above. Reply with feedback, or type DONE to finish. +``` + +The flow: +1. Iteration N runs. AI produces output. +2. If AI signalled completion (`DONE`) or `until_bash` exited 0, loop ends. +3. Otherwise: `gate_message` is sent to the user, workflow pauses (status = `paused`). +4. User runs `archon workflow approve ""` (or replies naturally in chat platforms). +5. Iteration N+1 runs with `$LOOP_USER_INPUT` substituted to the user's feedback — but **only on that first resumed iteration**. Subsequent iterations in the same resumed session see `$LOOP_USER_INPUT` as empty string. +6. Repeat. + +**Workflow-level `interactive: true` is required** for the gate message to reach the user on the web UI (otherwise the workflow dispatches to a background worker that can't deliver chat messages). The loader emits a warning if a node has `interactive: true` without workflow-level `interactive: true`. ### Completion Detection @@ -279,6 +486,148 @@ First iteration is always fresh regardless. --- +## Approval Nodes + +Approval nodes **pause the workflow** until a human approves or rejects the gate. Use them to insert review steps between AI-driven nodes — for example, reviewing a generated plan before committing to expensive implementation work. + +### Configuration + +```yaml +- id: review-gate + approval: + message: "Review the plan above before proceeding with implementation." + capture_response: false # Optional. true = user's comment stored as $review-gate.output + on_reject: # Optional. AI rework on rejection instead of cancel + prompt: "Revise based on feedback: $REJECTION_REASON" + max_attempts: 3 # Range 1–10, default 3. After max, workflow is cancelled. + depends_on: [plan] +``` + +### Fields + +| Field | Required | Description | +|-------|----------|-------------| +| `approval.message` | **Yes** | The message shown to the user when the workflow pauses | +| `approval.capture_response` | No | `true` = user's approval comment stored as `$.output` for downstream nodes. Default: `false` (downstream `$.output` is empty string) | +| `approval.on_reject.prompt` | No | Prompt run via AI when the user rejects. `$REJECTION_REASON` is substituted with the reject reason. After running, the workflow re-pauses at the same gate | +| `approval.on_reject.max_attempts` | No | Max times the on_reject prompt runs before the workflow is cancelled. Range: 1–10. Default: 3 | + +### Web UI Requirement + +Approval gates delivered on the Web UI require `interactive: true` at the **workflow level** — otherwise the workflow dispatches to a background worker and the gate message never reaches the user's chat window. + +```yaml +name: plan-approve-implement +interactive: true # REQUIRED for approval gates on web UI +nodes: + - id: plan + command: plan-feature + - id: review-gate + approval: + message: "Approve the plan to proceed." + depends_on: [plan] + - id: implement + command: implement + depends_on: [review-gate] +``` + +### Approve and Reject Commands + +```bash +# From the CLI +archon workflow approve +archon workflow approve --comment "looks good" +archon workflow reject +archon workflow reject --reason "plan needs more test coverage" + +# Cross-platform (Slack / Telegram / Web / GitHub chat) +/workflow approve +/workflow reject + +# Natural language (all platforms except CLI — auto-detects paused workflow) +User: "Looks good, proceed" +# → auto-approves. With capture_response: true, the message becomes $review-gate.output +``` + +### What Does NOT Work on Approval Nodes + +AI-specific fields (`model`, `provider`, `hooks`, `mcp`, `skills`, `output_format`, `allowed_tools`, `denied_tools`, `context`, `effort`, `thinking`, etc.) are accepted by the parser but emit a loader warning and are ignored — no AI runs during the pause. (Note: `on_reject.prompt` DOES run AI, using the workflow's default provider/model.) + +`retry`, `when`, `trigger_rule`, `depends_on`, `idle_timeout` all work. + +--- + +## Cancel Nodes + +Cancel nodes **terminate the workflow run** with a reason string. Useful for guarded exits — a `cancel:` node with a `when:` condition stops the workflow cleanly when preconditions aren't met. + +### Configuration + +```yaml +- id: gate-branch + cancel: "Refusing to run on main — this workflow modifies files." + when: "$check-branch.output == 'main'" + depends_on: [check-branch] +``` + +When a cancel node runs, Archon: +- Marks the workflow run as `cancelled` (not `failed`) +- Stops in-flight parallel nodes via the existing cancellation plumbing +- Records the reason string in the run's metadata +- Emits a `node_completed` event for the cancel node itself + +### Fields + +| Field | Required | Description | +|-------|----------|-------------| +| `cancel` | **Yes** | Non-empty reason string shown to the user and recorded in metadata | + +Standard DAG fields (`id`, `depends_on`, `when`, `trigger_rule`, `idle_timeout`) all work. AI-specific fields emit a loader warning and are ignored — cancel nodes don't invoke AI. + +### When to use `cancel` vs failing a `bash:` check + +- **Use `cancel:`** when the precondition failure is **expected** (e.g., wrong branch, required file missing, feature flag disabled). The run shows as `cancelled`, which doesn't trigger the DAG auto-resume path. +- **Use a `bash:` node that exits non-zero** when the check itself fails (e.g., network error, tool missing). The run shows as `failed`, which auto-resumes on the next invocation. + +### Typical Patterns + +**Gate on upstream classification:** +```yaml +- id: classify + prompt: "Is the input safe to proceed? Output 'SAFE' or 'UNSAFE'." + allowed_tools: [] + +- id: stop-if-unsafe + cancel: "Refusing to proceed: input flagged UNSAFE by classifier." + depends_on: [classify] + when: "$classify.output != 'SAFE'" + +- id: do-work + command: the-work + depends_on: [classify] + when: "$classify.output == 'SAFE'" +``` + +**Stop before expensive step unless precondition met:** +```yaml +- id: check-budget + bash: | + spent=$(gh api /meta --jq '.rate.used // 0') + echo "$spent" + +- id: abort-if-over + cancel: "Aborting — GH API quota exhausted." + depends_on: [check-budget] + when: "$check-budget.output > '4500'" + +- id: run-api-heavy-work + command: heavy-work + depends_on: [check-budget] + when: "$check-budget.output <= '4500'" +``` + +--- + ## Validate Before Finishing Before declaring a workflow complete, validate it: @@ -302,8 +651,13 @@ Use `--json` for machine-readable output. Use `archon validate commands ` - All `depends_on` reference existing IDs - No cycles - `$nodeId.output` refs in `when:`, `prompt:`, `loop.prompt:` must point to known IDs -- Exactly one of `command`, `prompt`, `bash`, `loop` per node +- Exactly one of `command`, `prompt`, `bash`, `script`, `loop`, `approval`, `cancel` per node +- Script nodes require `runtime: bun` or `runtime: uv` +- Named scripts must exist in `.archon/scripts/` or `~/.archon/scripts/` with extension matching declared runtime - `retry` on loop node = hard error +- `approval.message` required and non-empty +- `cancel` reason required and non-empty +- Approval `on_reject.max_attempts` must be 1–10 if set - `steps:` format rejected (deprecated — use `nodes:` only) ## Complete Example diff --git a/.claude/skills/release/SKILL.md b/.claude/skills/release/SKILL.md index 4f90f70978..1844336f2f 100644 --- a/.claude/skills/release/SKILL.md +++ b/.claude/skills/release/SKILL.md @@ -64,9 +64,15 @@ if [ -f scripts/build-binaries.sh ] && [ -f packages/cli/src/cli.ts ]; then packages/cli/src/cli.ts # Smoke test: the binary must start and exit 0 on a safe, non-interactive command. - # `version` or `--help` are both acceptable — pick one that does NOT touch the - # network, database, or require env vars. - if ! "$TMP_BINARY" version > /tmp/archon-preflight.log 2>&1; then + # Use `--help` (NOT `version`). The `version` command's compiled-binary code + # path depends on BUNDLED_IS_BINARY=true, which is set by scripts/build-binaries.sh + # — but we're doing a bare `bun build --compile` here to keep the smoke fast, + # so BUNDLED_IS_BINARY is still `false`. That sends `version` down the dev + # branch of version.ts which tries to read package.json from a path that only + # exists in node_modules, producing a false-positive ENOENT. `--help` has no + # such dev/binary branch and exercises the same module-init graph we're + # actually testing. Must NOT touch network, database, or require env vars. + if ! "$TMP_BINARY" --help > /tmp/archon-preflight.log 2>&1; then echo "ERROR: compiled binary crashed at startup" cat /tmp/archon-preflight.log echo "" diff --git a/.claude/skills/test-release/SKILL.md b/.claude/skills/test-release/SKILL.md index 31029014ea..c93d0c5bee 100644 --- a/.claude/skills/test-release/SKILL.md +++ b/.claude/skills/test-release/SKILL.md @@ -79,6 +79,8 @@ About to test: Path: brew (Homebrew tap on macOS) Version: 0.3.1 (expected) Cleanup: will uninstall after tests (brew uninstall + untap) + If `archon-stable` symlink is detected in Phase 2, it will be + restored at the end of Phase 5 by reinstalling the tap formula. Proceed? (y/N) ``` @@ -112,6 +114,18 @@ gh release view v --repo coleam00/Archon --json tagName,assets --jq '{t If the release does not exist or has no assets, abort with a clear message. Do not proceed to install a non-existent release. +4. **Detect persistent `archon-stable` install (brew path only).** If the user has renamed a prior brew install to `archon-stable` (the dual-homebrew pattern — see `~/.config/fish/functions/brew-upgrade-archon.fish`), Phase 5's `brew uninstall` will wipe it. Capture the state so Phase 5b can restore it: + +```bash +ARCHON_STABLE_WAS_INSTALLED="" +if [ -L /opt/homebrew/bin/archon-stable ] || [ -L /usr/local/bin/archon-stable ]; then + ARCHON_STABLE_WAS_INSTALLED="yes" + echo "Detected persistent archon-stable — will restore after Phase 5 uninstall." +fi +``` + +Export `ARCHON_STABLE_WAS_INSTALLED` into the environment used by Phase 5b. Only applies to the `brew` path — `curl-mac` and `curl-vps` don't go through brew and don't disturb `archon-stable`. + ## Phase 3 — Install ### Path: brew @@ -352,6 +366,25 @@ archon version | head -1 # should match the dev version captured in Phase 2 ``` +**Restore `archon-stable` if it existed before the test** (dual-homebrew pattern — see Phase 2 item 4): + +```bash +if [ -n "$ARCHON_STABLE_WAS_INSTALLED" ]; then + echo "Restoring archon-stable (detected before test)..." + brew tap coleam00/archon + brew install coleam00/archon/archon + BREW_BIN="$(brew --prefix)/bin" + if [ -e "$BREW_BIN/archon" ]; then + mv "$BREW_BIN/archon" "$BREW_BIN/archon-stable" + echo "archon-stable restored: $(archon-stable version 2>/dev/null | head -1)" + else + echo "WARNING: brew install succeeded but $BREW_BIN/archon missing — check formula" + fi +fi +``` + +> **Note on the restored version**: this reinstalls from whatever the tap currently ships, which is typically the release you just tested (so `archon-stable` ends up at the newly-tested version). That's usually what the operator wants — you just verified the new release works, and you want `archon-stable` pointed at it. If you were testing an older version for back-version QA, the restored `archon-stable` will be the *current* tap formula, not the pre-test version. For that rare case, the operator should re-run `brew-upgrade-archon` manually after the test. + ### Path: curl-mac ```bash diff --git a/homebrew/archon.rb b/homebrew/archon.rb index 0bac58a339..d8f4c45c18 100644 --- a/homebrew/archon.rb +++ b/homebrew/archon.rb @@ -7,28 +7,28 @@ class Archon < Formula desc "Remote agentic coding platform - control AI assistants from anywhere" homepage "https://github.com/coleam00/Archon" - version "0.3.6" + version "0.3.9" license "MIT" on_macos do on_arm do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-darwin-arm64" - sha256 "96b6dac50b046eece9eddbb988a0c39b4f9a0e2faac66e49b977ba6360069e86" + sha256 "b617f85a2181938b793b25ad816a9f6b3149d184f64b2e9e2ea2430f27778d64" end on_intel do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-darwin-x64" - sha256 "09f1dbe12417b4300b7b07b531eb7391a286305f8d4eafc11e7f61f5d26eb8eb" + sha256 "5a928af5e0e67ffe084159161a9ea3994a9304cc39bd06132719cd89cc715e86" end end on_linux do on_arm do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-linux-arm64" - sha256 "80b06a6ff699ec57cd4a3e49cfe7b899a3e8212688d70285f5a887bf10086731" + sha256 "567bfca9175e10d9b4fd748e3862bbd34141a234766a7ecf0a714d9c27b8c92e" end on_intel do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-linux-x64" - sha256 "09f5dac6db8037ed6f3e5b7e9c5eb8e37f19822a4ed2bf4cd7e654780f9d00de" + sha256 "c918218df2f0f853d107e6b1727dcd9accc034b183ffbccea93a331d8d376ed8" end end diff --git a/packages/docs-web/src/content/docs/adapters/community/discord.md b/packages/docs-web/src/content/docs/adapters/community/discord.md index 0f3e59082c..b719d719ce 100644 --- a/packages/docs-web/src/content/docs/adapters/community/discord.md +++ b/packages/docs-web/src/content/docs/adapters/community/discord.md @@ -40,6 +40,14 @@ Connect Archon to Discord so you can interact with your AI coding assistant from 2. Enable **"Message Content Intent"** (required for the bot to read messages) 3. Save changes +:::caution +Skipping this step causes Discord to reject the bot's connection with +`Used disallowed intents`. Archon will log +`discord.start_failed_continuing_without_adapter` and keep the rest of +the server running, but the Discord adapter will be unavailable until +the intent is enabled and the server is restarted. +::: + ## Invite Bot to Your Server 1. Go to "OAuth2" > "URL Generator" in the left sidebar diff --git a/packages/docs-web/src/content/docs/adapters/web.md b/packages/docs-web/src/content/docs/adapters/web.md index 0025ca0219..bb5e43ba91 100644 --- a/packages/docs-web/src/content/docs/adapters/web.md +++ b/packages/docs-web/src/content/docs/adapters/web.md @@ -166,7 +166,7 @@ Click on a workflow run (from the dashboard or progress card) to open the execut The Workflow Builder at `/workflows/builder` provides a visual editor for creating and modifying workflow YAML files. Features include: - **DAG canvas** -- Drag-and-drop nodes to build your workflow graph visually -- **Node palette** -- Add command, prompt, bash, and loop nodes from a sidebar library +- **Node palette** -- Drag command, prompt, and bash nodes from a sidebar library. Additional node types (`script`, `loop`, `approval`, `cancel`) are editable via the Code / Split view - **Node inspector** -- Click a node to configure its properties (command, prompt text, dependencies, model overrides, hooks, MCP servers, etc.) in a tabbed panel - **View modes** -- Toggle between Visual, Split, and Code views. Split mode shows the canvas and YAML side by side. - **Command picker** -- Browse available commands when configuring command nodes diff --git a/packages/docs-web/src/content/docs/book/dag-workflows.md b/packages/docs-web/src/content/docs/book/dag-workflows.md index 2a66702584..558df2590f 100644 --- a/packages/docs-web/src/content/docs/book/dag-workflows.md +++ b/packages/docs-web/src/content/docs/book/dag-workflows.md @@ -230,20 +230,23 @@ The classify-and-route example uses `none_failed_min_one_success` on `implement` ## Node Types -Archon supports four node types: +Archon supports seven node types. Exactly one mode field is required per node: | Type | Syntax | When to use | |------|--------|-------------| | **Command** | `command: my-command` | Load a command from `.archon/commands/my-command.md`. The standard choice. | | **Prompt** | `prompt: "inline instructions..."` | Quick, one-off instructions that don't need a reusable command file. | | **Bash** | `bash: "shell command"` | Run a shell script without AI. Stdout is captured as `$nodeId.output`. Deterministic operations only. | +| **Script** | `script: "..." ` + `runtime: bun \| uv` | Run TypeScript/JavaScript (bun) or Python (uv) without AI. Inline code or named reference to `.archon/scripts/`. Stdout captured as `$nodeId.output`. See [Script Nodes](/guides/script-nodes/). | | **Loop** | `loop: { prompt: "...", until: SIGNAL }` | Repeat an AI prompt until a completion signal appears in the output. See [Loop Nodes](/guides/loop-nodes/). | +| **Approval** | `approval: { message: "..." }` | Pause the workflow for a human approve/reject decision. See [Approval Nodes](/guides/approval-nodes/). | +| **Cancel** | `cancel: "reason string"` | Terminate the workflow run (status: cancelled, not failed). Usually gated with `when:`. | **Command** is the most common. Use it for anything you'll reuse across workflows. **Prompt** is convenient for glue nodes — summarizing outputs, formatting data — where the logic is simple and workflow-specific. -**Bash** is powerful for deterministic operations: running tests, checking git status, reading a file, fetching an API. The AI doesn't run the bash command; your shell does. The output becomes a variable for downstream nodes: +**Bash** is powerful for deterministic shell operations: running tests, checking git status, reading a file, fetching an API. The AI doesn't run the bash command; your shell does. The output becomes a variable for downstream nodes: ```yaml - id: check-tests @@ -255,6 +258,22 @@ Archon supports four node types: prompt: "Test output: $check-tests.output\n\nFix any failures." ``` +**Script** is for deterministic work that needs a real programming language — parsing JSON, transforming data between AI nodes, calling typed HTTP clients. Use `runtime: bun` for TypeScript/JavaScript and `runtime: uv` for Python: + +```yaml +- id: transform + script: | + const raw = process.env.UPSTREAM ?? '{}'; + const items = JSON.parse(raw).items ?? []; + console.log(JSON.stringify({ count: items.length })); + runtime: bun + +- id: analyze + script: analyze-metrics # Named script: .archon/scripts/analyze-metrics.py + runtime: uv + deps: ["pandas>=2.0"] # uv-only; bun auto-installs imports +``` + **Loop** is for iterative tasks where you don't know how many steps it will take. The AI runs until it emits a completion signal: ```yaml @@ -269,6 +288,32 @@ Archon supports four node types: fresh_context: true ``` +**Approval** pauses the workflow for human review. The downstream nodes don't run until the user approves in chat, CLI, or web UI: + +```yaml +interactive: true # required at workflow level for web UI delivery + +nodes: + - id: plan + command: plan-feature + - id: review-gate + approval: + message: "Review the plan above." + depends_on: [plan] + - id: implement + command: implement + depends_on: [review-gate] +``` + +**Cancel** terminates the workflow with a reason string. Pair with `when:` for guarded exits — the run shows as `cancelled` rather than `failed`: + +```yaml +- id: gate-branch + cancel: "Refusing to run on main — this workflow modifies files." + when: "$check-branch.output == 'main'" + depends_on: [check-branch] +``` + --- ## Best Practices diff --git a/packages/docs-web/src/content/docs/book/quick-reference.md b/packages/docs-web/src/content/docs/book/quick-reference.md index ae37659f7a..a0c34643c3 100644 --- a/packages/docs-web/src/content/docs/book/quick-reference.md +++ b/packages/docs-web/src/content/docs/book/quick-reference.md @@ -124,7 +124,10 @@ All nodes share these base fields: | `command` | One of | string | Name of a command file in `.archon/commands/` | | `prompt` | One of | string | Inline AI instructions | | `bash` | One of | string | Shell script (runs without AI; stdout captured as `$nodeId.output`) | +| `script` | One of | string | TypeScript/JavaScript (bun) or Python (uv) — inline or named ref to `.archon/scripts/`. Requires `runtime`. See [Script Nodes](/guides/script-nodes/) | | `loop` | One of | object | Loop configuration (see Loop Options below) | +| `approval` | One of | object | Pause for human review; see [Approval Nodes](/guides/approval-nodes/) | +| `cancel` | One of | string | Reason string; terminates the run with `cancelled` status (not `failed`). Usually gated with `when:` | | `depends_on` | No | string[] | Node IDs that must complete before this node runs | | `when` | No | string | Condition expression; node is skipped if false | | `trigger_rule` | No | string | Join semantics when multiple upstreams exist (see Trigger Rules) | @@ -135,12 +138,30 @@ All nodes share these base fields: | `allowed_tools` | No | string[] | Restrict available tools to this list (Claude only) | | `denied_tools` | No | string[] | Remove specific tools from this node's context (Claude only) | | `idle_timeout` | No | number | Per-node idle timeout in milliseconds (default: 5 minutes) | -| `retry` | No | object | Retry configuration for transient failures (see Retry Options) | +| `retry` | No | object | Retry configuration for transient failures (see Retry Options). **Hard error on loop nodes** | | `hooks` | No | object | SDK hook callbacks (Claude only; see Hook Schema) | | `mcp` | No | string | Path to MCP server config JSON file (Claude only) | | `skills` | No | string[] | Skill names to preload into this node's context (Claude only) | +| `agents` | No | object | Inline sub-agent definitions keyed by kebab-case ID. Claude only | -> **bash node timeout**: The `timeout` field on bash nodes is in **milliseconds** (default: 120000). This differs from hook `timeout`, which is in seconds. +**Script-specific fields** (required when `script:` is set): + +| Field | Required | Type | Description | +|-------|----------|------|-------------| +| `runtime` | Yes | `'bun'` \| `'uv'` | Which runtime executes the script. Must match file extension for named scripts (`.ts`/`.js` → bun, `.py` → uv) | +| `deps` | No | string[] | Python dependencies for `uv run --with`. Ignored for bun (bun auto-installs) | +| `timeout` | No | number | Hard kill in ms. Default: 120000 (2 min). Same semantics as `bash` timeout | + +**Approval-specific fields** (required when `approval:` is set): + +| Field | Required | Type | Description | +|-------|----------|------|-------------| +| `approval.message` | Yes | string | The message shown to the user when the workflow pauses | +| `approval.capture_response` | No | boolean | `true` = user's comment becomes `$.output`. Default: `false` | +| `approval.on_reject.prompt` | No | string | AI rework prompt when the user rejects. `$REJECTION_REASON` substituted | +| `approval.on_reject.max_attempts` | No | number | Max rework iterations before cancel. Range 1-10, default 3 | + +> **bash and script node timeout**: The `timeout` field is in **milliseconds** (default: 120000). This differs from hook `timeout`, which is in seconds. ### Trigger Rules diff --git a/packages/docs-web/src/content/docs/guides/authoring-workflows.md b/packages/docs-web/src/content/docs/guides/authoring-workflows.md index 0fbc282640..a4bc85fafd 100644 --- a/packages/docs-web/src/content/docs/guides/authoring-workflows.md +++ b/packages/docs-web/src/content/docs/guides/authoring-workflows.md @@ -174,6 +174,7 @@ nodes: | `command` | string | Command name to load from `.archon/commands/` | | `prompt` | string | Inline prompt string | | `bash` | string | Shell script (no AI). Stdout captured as `$nodeId.output`. Optional `timeout` (ms, default 120000) | +| `script` | string | TypeScript/JavaScript (via `bun`) or Python (via `uv`) — inline code or named reference to `.archon/scripts/`. Stdout captured as `$nodeId.output`. Requires `runtime: bun` or `runtime: uv`. Optional `deps` (uv only) and `timeout` (ms, default 120000). See [Script Nodes](/guides/script-nodes/) | | `loop` | object | Iterative AI prompt until completion signal. See [Loop Nodes](/guides/loop-nodes/) | | `approval` | object | Pauses workflow for human review. See [Approval Nodes](/guides/approval-nodes/) | | `cancel` | string | Terminates the workflow run with a reason string. Uses existing cancellation plumbing — in-flight parallel nodes are stopped | diff --git a/packages/docs-web/src/content/docs/guides/global-workflows.md b/packages/docs-web/src/content/docs/guides/global-workflows.md index 282881e312..a4651ba0ec 100644 --- a/packages/docs-web/src/content/docs/guides/global-workflows.md +++ b/packages/docs-web/src/content/docs/guides/global-workflows.md @@ -6,7 +6,7 @@ area: workflows audience: [user] status: current sidebar: - order: 8 + order: 9 --- Workflows placed in `~/.archon/workflows/`, commands in `~/.archon/commands/`, and scripts in `~/.archon/scripts/` are loaded globally -- they appear in every project and can be invoked from any repository. Workflows and commands carry the `source: 'global'` label in the Web UI node palette; scripts resolve under the same repo-wins-over-home precedence. diff --git a/packages/docs-web/src/content/docs/guides/hooks.md b/packages/docs-web/src/content/docs/guides/hooks.md index 3e6928ae21..201e60c3cb 100644 --- a/packages/docs-web/src/content/docs/guides/hooks.md +++ b/packages/docs-web/src/content/docs/guides/hooks.md @@ -6,7 +6,7 @@ area: workflows audience: [user] status: current sidebar: - order: 5 + order: 6 --- DAG workflow nodes support a `hooks` field that attaches Claude Agent SDK hooks diff --git a/packages/docs-web/src/content/docs/guides/index.md b/packages/docs-web/src/content/docs/guides/index.md index 0d53209fb6..f3cce0d69e 100644 --- a/packages/docs-web/src/content/docs/guides/index.md +++ b/packages/docs-web/src/content/docs/guides/index.md @@ -20,6 +20,7 @@ How-to guides for building and running AI coding workflows with Archon. - [Loop Nodes](/guides/loop-nodes/) — Iterative AI execution with completion conditions and deterministic exit checks - [Approval Nodes](/guides/approval-nodes/) — Human review gates with optional AI rework on rejection +- [Script Nodes](/guides/script-nodes/) — TypeScript/JavaScript (bun) or Python (uv) as a deterministic DAG node, without AI ## Node Features (Claude only) diff --git a/packages/docs-web/src/content/docs/guides/mcp-servers.md b/packages/docs-web/src/content/docs/guides/mcp-servers.md index 46474477e2..c777964d75 100644 --- a/packages/docs-web/src/content/docs/guides/mcp-servers.md +++ b/packages/docs-web/src/content/docs/guides/mcp-servers.md @@ -6,7 +6,7 @@ area: workflows audience: [user] status: current sidebar: - order: 6 + order: 7 --- DAG workflow nodes support a `mcp` field that attaches MCP (Model Context Protocol) diff --git a/packages/docs-web/src/content/docs/guides/remotion-workflow.md b/packages/docs-web/src/content/docs/guides/remotion-workflow.md index d68831be91..666b1ad916 100644 --- a/packages/docs-web/src/content/docs/guides/remotion-workflow.md +++ b/packages/docs-web/src/content/docs/guides/remotion-workflow.md @@ -6,7 +6,7 @@ area: workflows audience: [user] status: current sidebar: - order: 9 + order: 10 --- The `archon-remotion-generate` workflow uses AI to create Remotion video compositions. diff --git a/packages/docs-web/src/content/docs/guides/script-nodes.md b/packages/docs-web/src/content/docs/guides/script-nodes.md new file mode 100644 index 0000000000..73a0ad9fbe --- /dev/null +++ b/packages/docs-web/src/content/docs/guides/script-nodes.md @@ -0,0 +1,333 @@ +--- +title: Script Nodes +description: Run TypeScript, JavaScript, or Python code as a DAG node without invoking an AI agent. +category: guides +area: workflows +audience: [user] +status: current +sidebar: + order: 5 +--- + +DAG workflow nodes support a `script` field that runs a TypeScript, JavaScript, +or Python snippet as part of the workflow. No AI agent is invoked — the script +runs via the `bun` or `uv` runtime, `stdout` is captured as the node's output, +and the result is available downstream as `$nodeId.output`. + +Use script nodes for deterministic work that needs a real programming language: +parsing JSON, transforming data between upstream AI nodes, calling HTTP APIs +with typed clients, or computing values that a shell one-liner would mangle. +If a plain shell command is enough, use a [`bash:` node](/guides/authoring-workflows/#node-fields) +instead. + +## Quick Start + +### Inline TypeScript (bun) + +```yaml +nodes: + - id: parse + script: | + const data = { count: 42, label: "ok" }; + console.log(JSON.stringify(data)); + runtime: bun +``` + +### Inline Python (uv) + +```yaml +nodes: + - id: compute + script: | + import json, statistics + values = [1, 2, 3, 4, 5] + print(json.dumps({ "mean": statistics.mean(values) })) + runtime: uv +``` + +### Named script from `.archon/scripts/` + +```yaml +nodes: + - id: fetch-pages + script: fetch-github-pages # resolves .archon/scripts/fetch-github-pages.ts + runtime: bun + timeout: 60000 +``` + +The file `.archon/scripts/fetch-github-pages.ts` is loaded and executed with +`bun --no-env-file run `. + +## How It Works + +1. **Substitute variables.** `$ARGUMENTS`, `$WORKFLOW_ID`, `$ARTIFACTS_DIR`, + `$BASE_BRANCH`, `$DOCS_DIR`, and upstream `$nodeId.output` references are + substituted into the `script` text before execution. +2. **Detect inline vs named.** If the `script` value contains a newline or any + shell metacharacter (see [Inline vs Named Scripts](#inline-vs-named-scripts) + below), it's treated as inline code. Otherwise it's treated as a named-script + reference. +3. **Dispatch.** + - `runtime: bun` + inline → `bun --no-env-file -e ''` + - `runtime: bun` + named → `bun --no-env-file run ` + - `runtime: uv` + inline → `uv run [--with dep ...] python -c ''` + - `runtime: uv` + named → `uv run [--with dep ...] ` +4. **Capture.** `stdout` (with the trailing newline stripped) becomes + `$nodeId.output`. `stderr` is logged as a warning and posted to the + conversation but does **not** fail the node. A non-zero exit code fails it. + +## YAML Schema + +```yaml +- id: node-name + script: # required, non-empty + runtime: bun | uv # required + deps: ["httpx", "pydantic>=2"] # optional, uv-only (see below) + timeout: 60000 # optional ms, default 120000 + depends_on: [upstream] # optional + when: "$upstream.output != ''" # optional + trigger_rule: all_success # optional (default) + retry: # optional; same shape as bash/AI nodes + max_attempts: 3 + on_error: transient +``` + +### Fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `script` | string | Yes | Inline code, or the basename (no extension) of a file in `.archon/scripts/` or `~/.archon/scripts/` | +| `runtime` | `'bun'` \| `'uv'` | Yes | Which runtime executes the script. Must match the file extension for named scripts | +| `deps` | string[] | No | Python dependencies to install for this run. **uv only** — ignored with a warning for `bun` | +| `timeout` | number (ms) | No | Hard kill after this many milliseconds. Default: `120000` (2 min) | + +Standard DAG fields (`id`, `depends_on`, `when`, `trigger_rule`, `retry`) all +work. AI-specific fields (`model`, `provider`, `context`, `output_format`, +`allowed_tools`, `denied_tools`, `hooks`, `mcp`, `skills`, `agents`, `effort`, +`thinking`, `maxBudgetUsd`, `systemPrompt`, `fallbackModel`, `betas`, `sandbox`) +are accepted by the parser but emit a loader warning and are ignored at runtime +— no AI is invoked. `idle_timeout` is also accepted but ignored: script nodes +run as one-shot subprocesses, so use `timeout` (hard kill after N ms) instead. + +## Inline vs Named Scripts + +The executor decides mode from the `script` string itself. A value is treated +as **inline code** if it contains a newline or any shell metacharacter; otherwise +it's a **named script** lookup. + +- **Metacharacters that trigger inline mode:** space, `;` `(` `)` `{` `}` `&` + `|` `<` `>` `$` `` ` `` `"` `'` +- **Inline examples:** `"const x = 1; console.log(x)"`, multi-line blocks, any + snippet with a space +- **Named examples:** `fetch-pages`, `analyze_metrics`, `triage-fmt` — bare + identifiers with no whitespace or shell syntax + +If you want an inline snippet that happens to be syntactically a single +identifier, add a trailing comment or newline to force inline mode. + +### Named Script Resolution + +Named scripts are discovered from, in precedence order: + +1. `/.archon/scripts/` — repo-local +2. `~/.archon/scripts/` — home-scoped (shared across every repo) + +Each directory is walked one subfolder deep (e.g. `.archon/scripts/triage/foo.ts` +resolves as `foo`). Deeper nesting is ignored. On a same-name collision the +repo-local entry wins silently — see [Global Workflows](/guides/global-workflows/) +for the shared precedence rules. + +### Extension ↔ Runtime Mapping + +Named scripts derive their runtime from the file extension: + +| Extension | Runtime | +|-----------|---------| +| `.ts`, `.js` | `bun` | +| `.py` | `uv` | + +The `runtime:` declared on the node **must match the file's extension** — the +validator rejects `runtime: uv` pointing at a `.ts` file, and vice versa. For +inline scripts, you can use any language that the chosen runtime supports. + +## Dependencies (uv only) + +`deps` is a pass-through to `uv run --with `, which installs packages into +a per-run ephemeral environment: + +```yaml +- id: scrape + script: | + import httpx + r = httpx.get("https://api.github.com/repos/anthropics/anthropic-cookbook") + print(r.text) + runtime: uv + deps: ["httpx>=0.27"] +``` + +- **Version pinning** — any PEP 508 specifier works (`pkg==1.2.3`, `pkg>=2,<3`). +- **Bun ignores `deps`** — Bun auto-installs imported packages on first run, so + the validator emits a warning if you set `deps` with `runtime: bun`. Remove + the field, or switch to `uv` if you need explicit dependency management. +- **No persistent environment** — each run is isolated; there is no `requirements.txt` + or lockfile to maintain. + +## Output and Data Flow + +`stdout` (trimmed of its trailing newline) becomes `$nodeId.output`. Print JSON +if you want downstream nodes to access structured fields with +`$nodeId.output.field` — the workflow engine tries to parse the output as JSON +for field access in `when:` conditions and prompt substitution. + +```yaml +- id: classify + script: | + const input = process.argv.slice(2).join(' '); + const severity = input.includes('crash') ? 'high' : 'low'; + console.log(JSON.stringify({ severity, length: input.length })); + runtime: bun + +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output.severity == 'high'" +``` + +### Variable Substitution in Scripts + +Variables are substituted into the `script` text **as raw strings, without +shell quoting** — unlike `bash:` nodes, where `$nodeId.output` values are +auto-quoted. Treat substituted values as untrusted input and parse them with +language features, not by interpolating into shell syntax. + +For **named scripts**, variables are not passed automatically. Read them from +the environment (`process.env.USER_MESSAGE`, `os.environ['USER_MESSAGE']`) +or accept them via stdin. For **inline scripts**, substituted variables are +literally embedded into the code string at execution time. + +## Environment and Isolation + +Script subprocesses receive `process.env` merged with any codebase-scoped env +vars you've configured via the Web UI (Settings → Projects → Env Vars) or the +`env:` block in `.archon/config.yaml`. This is the same injection surface used +by Claude, Codex, and bash nodes. + +**Target repo `.env` isolation:** the Bun subprocess is invoked with +`--no-env-file`, so variables in the target repo's `.env` do **not** leak into +the script. Archon-managed env (from `~/.archon/.env` and `/.archon/.env`) +passes through normally. `uv`-launched Python subprocesses do not auto-load +`.env` at all. See [Security Model](/reference/security/#target-repo-env-isolation) +for the full story. + +## Validation + +`archon validate workflows ` checks script nodes for: + +- **Script file exists** — for named scripts, the basename must exist in + `.archon/scripts/` or `~/.archon/scripts/` with a matching extension for + the declared runtime. Missing files fail validation with a hint showing + the expected path. +- **Runtime available on PATH** — `bun` or `uv` must be installed. Missing + runtimes emit a warning with the official install command: + - `curl -fsSL https://bun.sh/install | bash` + - `curl -LsSf https://astral.sh/uv/install.sh | sh` +- **`deps` with `runtime: bun`** — warns that `deps` is a no-op under Bun. + +Runtime availability is cached per-process — the check spawns `which bun` / +`which uv` once and memoizes the result. + +## Patterns + +### Transform AI output before the next node + +Use a script node as a deterministic adapter between two AI nodes. The script +parses the upstream classifier's JSON, filters, and forwards a clean payload: + +```yaml +- id: classify + prompt: "Classify: $ARGUMENTS" + allowed_tools: [] + output_format: + type: object + properties: + items: + type: array + items: { type: object } + +- id: filter + script: | + const upstream = JSON.parse(process.env.UPSTREAM ?? '{}'); + const high = (upstream.items ?? []).filter(i => i.severity === 'high'); + console.log(JSON.stringify(high)); + runtime: bun + depends_on: [classify] + +- id: triage + command: triage-high-severity + depends_on: [filter] + when: "$filter.output != '[]'" +``` + +*(Note: to actually populate `UPSTREAM` you'd inline-substitute +`$classify.output` into the script body. The example above illustrates the +shape.)* + +### Reusable helper in `~/.archon/scripts/` + +A helper you want available in every repo — say, a triage summary formatter — +lives at `~/.archon/scripts/triage-fmt.ts`: + +```typescript +// ~/.archon/scripts/triage-fmt.ts +const raw = process.argv.slice(2).join(' ') || '{}'; +const data = JSON.parse(raw); +const lines = data.issues?.map((i: { id: string; title: string }) => + `- [${i.id}] ${i.title}` +).join('\n') ?? ''; +console.log(lines || 'no issues'); +``` + +Then reference it by name from any repo's workflow: + +```yaml +- id: format + script: triage-fmt + runtime: bun + depends_on: [gather] +``` + +### Python with scientific dependencies + +```yaml +- id: analyze + script: | + import json, sys + import pandas as pd + data = json.loads(sys.argv[1]) if len(sys.argv) > 1 else [] + df = pd.DataFrame(data) + print(df.describe().to_json()) + runtime: uv + deps: ["pandas>=2.0"] + depends_on: [collect] +``` + +## What Does NOT Work + +- **AI-only features** — `hooks`, `mcp`, `skills`, `allowed_tools`, + `denied_tools`, `agents`, `model`, `provider`, `output_format`, `effort`, + `thinking`, `maxBudgetUsd`, `systemPrompt`, `fallbackModel`, `betas`, and + `sandbox` are all ignored at runtime. The loader emits a warning listing + the ignored fields. +- **Interactive prompts** — the script runs headlessly; any `stdin` read will + see EOF immediately. +- **Runtimes other than `bun` and `uv`** — rejected at parse time. +- **Cancelling mid-execution** — script subprocesses are killed on workflow + cancel, but there's no cooperative cancellation signal. Design scripts to + complete quickly or fail fast. + +## See Also + +- [Authoring Workflows](/guides/authoring-workflows/) — full workflow reference +- [Global Workflows, Commands, and Scripts](/guides/global-workflows/) — home-scoped `~/.archon/scripts/` +- [Security Model](/reference/security/#target-repo-env-isolation) — env isolation details +- [Variables Reference](/reference/variables/) — substitution rules diff --git a/packages/docs-web/src/content/docs/guides/skills.md b/packages/docs-web/src/content/docs/guides/skills.md index d27262ffac..f64b6def3d 100644 --- a/packages/docs-web/src/content/docs/guides/skills.md +++ b/packages/docs-web/src/content/docs/guides/skills.md @@ -6,7 +6,7 @@ area: workflows audience: [user] status: current sidebar: - order: 7 + order: 8 --- DAG workflow nodes support a `skills` field that preloads named skills into the diff --git a/packages/docs-web/src/content/docs/reference/variables.md b/packages/docs-web/src/content/docs/reference/variables.md index f32779cb6c..127ab8d653 100644 --- a/packages/docs-web/src/content/docs/reference/variables.md +++ b/packages/docs-web/src/content/docs/reference/variables.md @@ -8,11 +8,11 @@ sidebar: order: 5 --- -Archon substitutes variables in command files, inline prompts, and bash scripts before execution. There are three categories of variables: workflow variables (substituted by the workflow engine), positional arguments (substituted by the command handler), and node output references (DAG workflows only). +Archon substitutes variables in command files, inline prompts, bash scripts, and `script:` node bodies before execution. There are three categories of variables: workflow variables (substituted by the workflow engine), positional arguments (substituted by the command handler), and node output references (DAG workflows only). ## Workflow Variables -These variables are substituted by the workflow executor in all node types (`command:`, `prompt:`, `bash:`, `loop:`). +These variables are substituted by the workflow executor in all node types (`command:`, `prompt:`, `bash:`, `script:`, `loop:`). | Variable | Resolves to | Notes | |----------|-------------|-------| @@ -64,6 +64,10 @@ In DAG workflows, nodes can reference the output of any completed upstream node. | `$nodeId.output` | Full output string of the referenced node | The node must be a declared dependency (in `depends_on`) | | `$nodeId.output.field` | A specific JSON field from the node's output | Requires the upstream node to use `output_format` for structured JSON | +### Shell Quoting in `bash:` vs `script:` + +`$nodeId.output` values are **auto shell-quoted** (single-quoted, with embedded `'` escaped) when substituted into `bash:` scripts, so the value is always safe to embed in a shell command. They are **not** shell-quoted when substituted into `script:` bodies — the raw value is embedded as-is. For script nodes, treat substituted values as untrusted input and parse them with language features (e.g. `JSON.parse`), not by interpolating into shell syntax. + ### Example ```yaml diff --git a/packages/providers/src/claude/binary-resolver.test.ts b/packages/providers/src/claude/binary-resolver.test.ts index f87e78f36d..c5c407a531 100644 --- a/packages/providers/src/claude/binary-resolver.test.ts +++ b/packages/providers/src/claude/binary-resolver.test.ts @@ -5,6 +5,8 @@ * with BUNDLED_IS_BINARY=true, which conflicts with other test files. */ import { describe, test, expect, mock, beforeEach, afterAll, spyOn } from 'bun:test'; +import { homedir } from 'node:os'; +import { join } from 'node:path'; import { createMockLogger } from '../test/mocks/logger'; const mockLogger = createMockLogger(); @@ -76,7 +78,55 @@ describe('resolveClaudeBinaryPath (binary mode)', () => { expect(result).toBe('/env/cli.js'); }); - test('throws with install instructions when nothing configured', async () => { + test('autodetects native installer path when env and config are unset', async () => { + // Mirror the implementation: use os.homedir() + node:path.join so the + // expected path matches the platform's actual home dir and separator. + const expected = join( + homedir(), + '.local', + 'bin', + process.platform === 'win32' ? 'claude.exe' : 'claude' + ); + // File exists only at the native-installer path. + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation( + (path: string) => path === expected + ); + + const result = await resolver.resolveClaudeBinaryPath(); + expect(result).toBe(expected); + // Log must mark this as autodetect, not 'env' or 'config' — the source + // string is load-bearing for debug triage. + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: expected, source: 'autodetect' }, + 'claude.binary_resolved' + ); + }); + + test('env var takes precedence over autodetect when both would match', async () => { + process.env.CLAUDE_BIN_PATH = '/custom/env/claude'; + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true); + + const result = await resolver.resolveClaudeBinaryPath(); + expect(result).toBe('/custom/env/claude'); + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: '/custom/env/claude', source: 'env' }, + 'claude.binary_resolved' + ); + }); + + test('config takes precedence over autodetect when both would match', async () => { + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true); + + const result = await resolver.resolveClaudeBinaryPath('/custom/config/claude'); + expect(result).toBe('/custom/config/claude'); + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: '/custom/config/claude', source: 'config' }, + 'claude.binary_resolved' + ); + }); + + test('throws with install instructions when nothing is configured and autodetect misses', async () => { + // Every probe returns false — env unset, config unset, native path absent. fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(false); const promise = resolver.resolveClaudeBinaryPath(); diff --git a/packages/providers/src/claude/binary-resolver.ts b/packages/providers/src/claude/binary-resolver.ts index f236acb277..c2273d85d2 100644 --- a/packages/providers/src/claude/binary-resolver.ts +++ b/packages/providers/src/claude/binary-resolver.ts @@ -9,13 +9,16 @@ * Resolution order (binary mode only): * 1. `CLAUDE_BIN_PATH` environment variable * 2. `assistants.claude.claudeBinaryPath` in config - * 3. Throw with install instructions + * 3. Autodetect canonical install path (native installer default) + * 4. Throw with install instructions * * In dev mode (BUNDLED_IS_BINARY=false), returns undefined so the caller * omits `pathToClaudeCodeExecutable` entirely and the SDK resolves via its * normal node_modules lookup. */ import { existsSync as _existsSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { join } from 'node:path'; import { BUNDLED_IS_BINARY, createLogger } from '@archon/paths'; /** Wrapper for existsSync — enables spyOn in tests (direct imports can't be spied on). */ @@ -89,6 +92,25 @@ export async function resolveClaudeBinaryPath( return configClaudeBinaryPath; } - // 3. Not found — throw with install instructions + // 3. Autodetect — the Anthropic native installer + // (`curl -fsSL https://claude.ai/install.sh | bash` on macOS/Linux, + // `irm https://claude.ai/install.ps1 | iex` on Windows) writes the + // executable to a fixed location relative to $HOME. Users who follow + // the recommended install path don't need any env var or config entry; + // users who deviate (npm global, custom path, etc.) still set one of + // the higher-priority sources above. + const nativeInstallerPath = + process.platform === 'win32' + ? join(homedir(), '.local', 'bin', 'claude.exe') + : join(homedir(), '.local', 'bin', 'claude'); + if (fileExists(nativeInstallerPath)) { + getLog().info( + { binaryPath: nativeInstallerPath, source: 'autodetect' }, + 'claude.binary_resolved' + ); + return nativeInstallerPath; + } + + // 4. Not found — throw with install instructions throw new Error(INSTALL_INSTRUCTIONS); } diff --git a/packages/providers/src/codex/binary-resolver.test.ts b/packages/providers/src/codex/binary-resolver.test.ts index 1df4e7c6f6..a121e4c204 100644 --- a/packages/providers/src/codex/binary-resolver.test.ts +++ b/packages/providers/src/codex/binary-resolver.test.ts @@ -87,7 +87,70 @@ describe('resolveCodexBinaryPath (binary mode)', () => { expect(normalized).toContain('/tmp/test-archon-home/vendor/codex/'); }); + test('autodetects npm global install at ~/.npm-global/bin/codex (POSIX)', async () => { + if (process.platform === 'win32') return; // POSIX-only probe + const home = process.env.HOME ?? '/Users/test'; + const expected = `${home}/.npm-global/bin/codex`; + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation( + (path: string) => path === expected + ); + + const result = await resolver.resolveCodexBinaryPath(); + expect(result).toBe(expected); + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: expected, source: 'autodetect' }, + 'codex.binary_resolved' + ); + }); + + test('autodetects homebrew install on Apple Silicon', async () => { + if (process.platform !== 'darwin' || process.arch !== 'arm64') { + // `/opt/homebrew/bin/codex` is only probed on darwin-arm64; on other + // hosts this test has nothing to assert (the probe list excludes it). + return; + } + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation( + (path: string) => path === '/opt/homebrew/bin/codex' + ); + + const result = await resolver.resolveCodexBinaryPath(); + expect(result).toBe('/opt/homebrew/bin/codex'); + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: '/opt/homebrew/bin/codex', source: 'autodetect' }, + 'codex.binary_resolved' + ); + }); + + test('autodetects system install at /usr/local/bin/codex', async () => { + if (process.platform === 'win32') { + // /usr/local/bin is not probed on Windows. + return; + } + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation( + (path: string) => path === '/usr/local/bin/codex' + ); + + const result = await resolver.resolveCodexBinaryPath(); + expect(result).toBe('/usr/local/bin/codex'); + }); + + test('vendor directory takes precedence over autodetect', async () => { + // Both vendor and npm-global would match; vendor must win (lower tier #). + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation((path: string) => { + const normalized = path.replace(/\\/g, '/'); + return normalized.includes('vendor/codex') || normalized.includes('.npm-global'); + }); + + const result = await resolver.resolveCodexBinaryPath(); + expect(result!.replace(/\\/g, '/')).toContain('/vendor/codex/'); + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ source: 'vendor' }), + 'codex.binary_resolved' + ); + }); + test('throws with install instructions when binary not found anywhere', async () => { + // Env unset, config unset, vendor dir empty, every autodetect path missing. fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(false); await expect(resolver.resolveCodexBinaryPath()).rejects.toThrow('Codex CLI binary not found'); diff --git a/packages/providers/src/codex/binary-resolver.ts b/packages/providers/src/codex/binary-resolver.ts index a1e0f01a5b..1ac8e57cfb 100644 --- a/packages/providers/src/codex/binary-resolver.ts +++ b/packages/providers/src/codex/binary-resolver.ts @@ -9,12 +9,14 @@ * 1. `CODEX_BIN_PATH` environment variable * 2. `assistants.codex.codexBinaryPath` in config * 3. `~/.archon/vendor/codex/` (user-placed) - * 4. Throw with install instructions + * 4. Autodetect canonical install paths (npm prefix defaults per platform) + * 5. Throw with install instructions * * In dev mode (BUNDLED_IS_BINARY=false), returns undefined so the SDK * uses its normal node_modules-based resolution. */ import { existsSync as _existsSync } from 'node:fs'; +import { homedir } from 'node:os'; import { join } from 'node:path'; import { BUNDLED_IS_BINARY, getArchonHome, createLogger } from '@archon/paths'; @@ -89,7 +91,19 @@ export async function resolveCodexBinaryPath( } } - // 4. Not found — throw with install instructions + // 4. Autodetect — probe the handful of paths Codex typically lands at + // when installed via the documented package managers. Users who install + // somewhere else (custom npm prefix, etc.) still set one of the higher- + // priority sources above. Order: most specific → least specific. + const autodetectPaths = getAutodetectPaths(); + for (const probePath of autodetectPaths) { + if (fileExists(probePath)) { + getLog().info({ binaryPath: probePath, source: 'autodetect' }, 'codex.binary_resolved'); + return probePath; + } + } + + // 5. Not found — throw with install instructions const vendorPath = `~/.archon/${CODEX_VENDOR_DIR}/`; throw new Error( 'Codex CLI binary not found. The Codex provider requires a native binary\n' + @@ -105,3 +119,47 @@ export async function resolveCodexBinaryPath( ' codexBinaryPath: /path/to/codex\n' ); } + +/** + * Canonical install locations probed by tier 4 autodetect. Grounded in + * the official @openai/codex README and the npm global-install contract + * (npm writes the binary to `{npm_prefix}/bin/` on POSIX and + * `{npm_prefix}\.cmd` on Windows). The probes cover the npm prefix + * a default install lands at on each platform: + * + * - `$HOME/.npm-global/bin/codex` — common when the user ran + * `npm config set prefix ~/.npm-global` to avoid root writes + * - `/opt/homebrew/bin/codex` — mac Apple Silicon with homebrew-node + * (homebrew sets npm prefix to /opt/homebrew) + * - `/usr/local/bin/codex` — mac Intel with homebrew-node, or linux + * with system-installed node (npm prefix defaults to /usr/local) + * - `%AppData%\npm\codex.cmd` — Windows npm global default + * + * Not covered (explicit override required via CODEX_BIN_PATH or config): + * - users with other custom npm prefixes — `npm root -g` would spawn + * a subprocess per resolve, too heavy for a probe helper + * - Homebrew cask install (`brew install --cask codex`) — cask layout + * isn't a PATH binary; users should symlink or set the path + * - manual GitHub Releases extract — placement is user-determined + */ +function getAutodetectPaths(): string[] { + const paths: string[] = []; + + if (process.platform === 'win32') { + const appData = process.env.APPDATA; + if (appData) paths.push(join(appData, 'npm', 'codex.cmd')); + paths.push(join(homedir(), '.npm-global', 'codex.cmd')); + return paths; + } + + // POSIX (macOS + Linux) + paths.push(join(homedir(), '.npm-global', 'bin', 'codex')); + + if (process.platform === 'darwin' && process.arch === 'arm64') { + paths.push('/opt/homebrew/bin/codex'); + } + + paths.push('/usr/local/bin/codex'); + + return paths; +} diff --git a/packages/providers/src/codex/provider.test.ts b/packages/providers/src/codex/provider.test.ts index 669826ebc3..8aaef21295 100644 --- a/packages/providers/src/codex/provider.test.ts +++ b/packages/providers/src/codex/provider.test.ts @@ -114,6 +114,29 @@ describe('CodexProvider', () => { }); }); + test('preserves cached input tokens from Codex usage', async () => { + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { + type: 'turn.completed', + usage: { input_tokens: 100, cached_input_tokens: 80, output_tokens: 25 }, + }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test prompt', '/workspace')) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(1); + expect(chunks[0]).toEqual({ + type: 'result', + sessionId: 'new-thread-id', + tokens: { input: 100, output: 25, cacheRead: 80 }, + }); + }); + test('yields tool events from command_execution items', async () => { mockRunStreamed.mockResolvedValue({ events: (async function* () { diff --git a/packages/providers/src/codex/provider.ts b/packages/providers/src/codex/provider.ts index b9e1d493e9..3237220e3f 100644 --- a/packages/providers/src/codex/provider.ts +++ b/packages/providers/src/codex/provider.ts @@ -144,9 +144,11 @@ function extractUsageFromCodexEvent(event: TurnCompletedEvent): TokenUsage { getLog().warn({ eventType: event.type }, 'codex.usage_null_on_turn_completed'); return { input: 0, output: 0 }; } + const cacheRead = event.usage.cached_input_tokens; return { input: event.usage.input_tokens, output: event.usage.output_tokens, + ...(typeof cacheRead === 'number' && cacheRead > 0 ? { cacheRead } : {}), }; } @@ -310,7 +312,7 @@ async function* streamCodexEvents( typeof rawError === 'string' ? rawError : typeof rawError === 'object' && rawError !== null && 'message' in rawError - ? String((rawError as { message: unknown }).message) + ? String(rawError.message) : undefined; const changes = item.changes as { kind: string; path?: string }[] | undefined; diff --git a/packages/providers/src/community/pi/provider.test.ts b/packages/providers/src/community/pi/provider.test.ts index 17e6de417d..40ffcec80f 100644 --- a/packages/providers/src/community/pi/provider.test.ts +++ b/packages/providers/src/community/pi/provider.test.ts @@ -209,6 +209,21 @@ describe('PiProvider', () => { expect(new PiProvider().getCapabilities()).toEqual(PI_CAPABILITIES); }); + test('sendQuery installs PI_PACKAGE_DIR shim before Pi SDK loads', async () => { + // Runtime-safety regression: Pi's config.js reads `getPackageJsonPath()` at + // its module init, which resolves to a non-existent path inside compiled + // archon binaries. The shim writes a stub package.json to tmpdir and sets + // PI_PACKAGE_DIR so Pi's short-circuit kicks in. Must run BEFORE the + // dynamic imports in sendQuery — we verify by calling the fast-fail "no + // model" path (which returns before any Pi SDK logic executes) and + // asserting the env var was set regardless. + delete process.env.PI_PACKAGE_DIR; + expect(process.env.PI_PACKAGE_DIR).toBeUndefined(); + await consume(new PiProvider().sendQuery('hi', '/tmp')); + expect(process.env.PI_PACKAGE_DIR).toBeDefined(); + expect(process.env.PI_PACKAGE_DIR).toContain('archon-pi-shim'); + }); + test('throws when no model is configured', async () => { const { error } = await consume(new PiProvider().sendQuery('hi', '/tmp')); expect(error?.message).toContain('Pi provider requires a model'); diff --git a/packages/providers/src/community/pi/provider.ts b/packages/providers/src/community/pi/provider.ts index e4b6804762..610bcd56ab 100644 --- a/packages/providers/src/community/pi/provider.ts +++ b/packages/providers/src/community/pi/provider.ts @@ -1,3 +1,7 @@ +import { existsSync, mkdirSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + import { createLogger } from '@archon/paths'; import type { Api, Model } from '@mariozechner/pi-ai'; @@ -24,6 +28,44 @@ import { parsePiModelRef } from './model-ref'; // All Pi SDK value bindings and Pi-dependent helper modules are dynamically // imported inside `sendQuery()` below, which runs only when a Pi workflow is // actually invoked. Type-only imports above are fine — TS erases them. +// +// Lazy-loading defers the crash from boot-time to sendQuery-time — but the +// crash still happens when Pi is actually used. `ensurePiPackageDirShim()` +// (see below) fixes the *runtime* half: before any dynamic Pi import in +// sendQuery, write a stub package.json to tmpdir and point Pi at it via +// its own documented `PI_PACKAGE_DIR` escape hatch. + +/** + * Write a minimal package.json to a stable tmpdir and set `PI_PACKAGE_DIR` + * so Pi's `config.js` short-circuits its `dirname(process.execPath)` walk + * (which fails inside a compiled archon binary). Pi only reads three + * optional fields from that package.json — `piConfig.name`, `piConfig.configDir`, + * and `version` — so the stub is genuinely minimal. Idempotent: the file is + * only written once per host (existsSync check), and the env var is set on + * every call so multiple PiProvider instances stay consistent. + * + * Done on each sendQuery rather than at module load so (a) the file write + * is paid only when Pi is actually used, and (b) the env var can't get + * clobbered between registration and invocation. + */ +function ensurePiPackageDirShim(): void { + const shimDir = join(tmpdir(), 'archon-pi-shim'); + const shimPkgJson = join(shimDir, 'package.json'); + if (!existsSync(shimPkgJson)) { + mkdirSync(shimDir, { recursive: true }); + // `piConfig: {}` is explicit so Pi's defaults (`name: 'pi'`, + // `configDir: '.pi'`) kick in — matches Pi's standalone behavior. + writeFileSync( + shimPkgJson, + JSON.stringify({ + name: 'archon-pi-shim', + version: '0.0.0', + piConfig: {}, + }) + ); + } + process.env.PI_PACKAGE_DIR = shimDir; +} /** * Map Pi provider id → env var name used by pi-ai's getEnvApiKey(). @@ -115,6 +157,13 @@ export class PiProvider implements IAgentProvider { resumeSessionId?: string, requestOptions?: SendQueryOptions ): AsyncGenerator { + // Install the PI_PACKAGE_DIR shim BEFORE the dynamic imports below: Pi's + // config.js runs `readFileSync(getPackageJsonPath())` at its own module + // init, and getPackageJsonPath() checks process.env.PI_PACKAGE_DIR first. + // Without this, the dynamic import below would crash with ENOENT on + // `dirname(process.execPath)/package.json` inside a compiled binary. + ensurePiPackageDirShim(); + // Lazy-load Pi SDK and all Pi-dependent helper modules here. Must not move // these imports to module scope — see the header comment for the failure // mode (archon compiled binary crashes at startup when Pi's config.js diff --git a/packages/providers/src/types.ts b/packages/providers/src/types.ts index d6cb8b4a87..538330ddbd 100644 --- a/packages/providers/src/types.ts +++ b/packages/providers/src/types.ts @@ -95,6 +95,10 @@ export interface TokenUsage { input: number; output: number; total?: number; + /** Input tokens served from provider prompt/cache storage, if reported separately. */ + cacheRead?: number; + /** Input tokens written to provider prompt/cache storage, if reported separately. */ + cacheWrite?: number; cost?: number; } diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index c1c76cf549..ee14cfef5b 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -385,8 +385,24 @@ export async function startServer(opts: ServerOptions = {}): Promise { .catch(createMessageErrorHandler('Discord', discordAdapter, conversationId)); }); - await discord.start(); - activePlatforms.push('Discord'); + // Don't let a Discord login failure (bad token, missing privileged + // intents, etc.) bring down the whole server — users running + // `archon serve` for the web UI shouldn't lose it because of an + // unrelated bot misconfiguration. See #1365. + try { + await discord.start(); + activePlatforms.push('Discord'); + } catch (error) { + const err = error as Error; + const isPrivilegedIntentError = err.message?.includes('disallowed intents'); + const hint = isPrivilegedIntentError + ? 'Enable "Message Content Intent" in the Discord Developer Portal ' + + '(your application > Bot > Privileged Gateway Intents) and restart, ' + + 'or unset DISCORD_BOT_TOKEN if you do not want the Discord adapter.' + : 'Verify DISCORD_BOT_TOKEN is valid, or unset it to disable the Discord adapter.'; + getLog().error({ err, hint }, 'discord.start_failed_continuing_without_adapter'); + discord = null; + } } else { getLog().info('discord_adapter_skipped'); } diff --git a/packages/workflows/src/defaults/bundled-defaults.generated.ts b/packages/workflows/src/defaults/bundled-defaults.generated.ts index cd430f3d5a..81b0c8b2ee 100644 --- a/packages/workflows/src/defaults/bundled-defaults.generated.ts +++ b/packages/workflows/src/defaults/bundled-defaults.generated.ts @@ -55,24 +55,24 @@ export const BUNDLED_COMMANDS: Record = { // Bundled default workflows (20 total) export const BUNDLED_WORKFLOWS: Record = { - "archon-adversarial-dev": "name: archon-adversarial-dev\ndescription: |\n Use when: User wants to build a complete application from scratch using adversarial development.\n Triggers: \"adversarial dev\", \"adversarial development\", \"build with adversarial\", \"gan dev\",\n \"adversarial build\", \"build app adversarially\", \"adversarial coding\".\n Does: Three-role GAN-inspired development — Planner creates spec with sprints, then a state-machine\n loop alternates between Generator (builds code) and Evaluator (attacks it) with hard pass/fail\n thresholds. The evaluator's job is to BREAK what the generator builds. If any criterion scores\n below 7/10, the sprint goes back to the generator with adversarial feedback. Stops on sprint\n failure after max retries.\n NOT for: Bug fixes, PR reviews, refactoring existing code, simple one-off tasks.\n\n Based on Anthropic's harness design article for long-running application development.\n Separates planning, building, and evaluation into distinct roles with adversarial tension.\nprovider: claude\nmodel: sonnet\n\nnodes:\n # ─── Phase 1: Planning ───────────────────────────────────────────────\n - id: plan\n prompt: |\n You are a product planning expert. Your job is to take a short user prompt and expand it\n into a comprehensive product specification.\n\n ## User Request\n\n $ARGUMENTS\n\n ## Your Task\n\n Write a comprehensive product specification to the file `$ARTIFACTS_DIR/spec.md` using the Write tool.\n\n The spec MUST include ALL of the following sections:\n\n ### 1. Product Overview\n What the product does, who it's for, core value proposition.\n\n ### 2. Tech Stack\n Specific technologies, frameworks, and libraries. Be opinionated — pick concrete choices,\n not \"a modern framework.\" Include exact package names and versions where relevant.\n\n ### 3. Design Language\n Visual style, specific color hex codes, typography choices, component patterns, spacing system.\n\n ### 4. Feature List\n Every feature organized by priority. Be exhaustive.\n\n ### 5. Sprint Plan\n Features broken into 3-6 sprints, ordered by dependency and importance:\n - **Sprint 1** should establish the foundation (project setup, core data models, basic UI shell)\n - Each subsequent sprint builds on the previous\n - Label each sprint clearly: \"Sprint 1: Foundation\", \"Sprint 2: Core Features\", etc.\n - List the specific features/deliverables for each sprint\n\n Be specific and opinionated. The more concrete the spec (exact API paths, specific color codes,\n named libraries), the better the generator can build and the evaluator can test.\n\n IMPORTANT: Write the spec to `$ARTIFACTS_DIR/spec.md` using the Write tool. Do NOT just output\n it as conversation text.\n allowed_tools: [Read, Write, Glob, Grep]\n\n # ─── Phase 2: Workspace Initialization ───────────────────────────────\n - id: init-workspace\n depends_on: [plan]\n bash: |\n ARTIFACTS=\"$ARTIFACTS_DIR\"\n\n # Create directory structure for harness communication\n mkdir -p \"$ARTIFACTS/contracts\"\n mkdir -p \"$ARTIFACTS/feedback\"\n mkdir -p \"$ARTIFACTS/app\"\n\n # Initialize isolated git repo in app directory\n cd \"$ARTIFACTS/app\"\n git init -q\n git commit --allow-empty -m \"Initial commit: adversarial-dev workspace\" -q\n\n # Extract sprint count from spec (find highest \"Sprint N\" reference)\n SPEC=\"$ARTIFACTS/spec.md\"\n SPRINT_COUNT=3\n if [ -f \"$SPEC\" ]; then\n FOUND=$(grep -ioE 'sprint\\s+[0-9]+' \"$SPEC\" | grep -oE '[0-9]+' | sort -n | tail -1)\n if [ -n \"$FOUND\" ] && [ \"$FOUND\" -ge 1 ] 2>/dev/null; then\n SPRINT_COUNT=$FOUND\n fi\n if [ \"$SPRINT_COUNT\" -gt 10 ]; then\n SPRINT_COUNT=10\n fi\n fi\n\n # Write initial state machine file\n cat > \"$ARTIFACTS/state.json\" << 'STATEEOF'\n {\n \"phase\": \"negotiating\",\n \"sprint\": 1,\n \"totalSprints\": SPRINT_COUNT_PLACEHOLDER,\n \"retry\": 0,\n \"maxRetries\": 3,\n \"passThreshold\": 7,\n \"completedSprints\": [],\n \"status\": \"running\"\n }\n STATEEOF\n STATE_TMP=\"$ARTIFACTS/state.json.tmp\"\n sed \"s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/\" \"$ARTIFACTS/state.json\" > \"$STATE_TMP\"\n mv \"$STATE_TMP\" \"$ARTIFACTS/state.json\"\n\n echo \"{\\\"totalSprints\\\": $SPRINT_COUNT, \\\"appDir\\\": \\\"$ARTIFACTS/app\\\", \\\"artifactsDir\\\": \\\"$ARTIFACTS\\\"}\"\n timeout: 30000\n\n # ─── Phase 3: Adversarial Sprint Loop ────────────────────────────────\n #\n # State machine driven by $ARTIFACTS_DIR/state.json\n # Each iteration plays ONE role: negotiator, generator, or evaluator\n # fresh_context ensures genuine separation between roles\n #\n - id: adversarial-sprint\n depends_on: [init-workspace]\n idle_timeout: 600000\n model: claude-opus-4-6[1m]\n loop:\n prompt: |\n # Adversarial Development — Sprint Loop\n\n You are part of a GAN-inspired adversarial development system with three distinct roles.\n Each iteration you play ONE role, determined by the current phase in the state file.\n\n ## FIRST: Read State\n\n Read `$ARTIFACTS_DIR/state.json` to determine:\n - `phase` — which role you play this iteration\n - `sprint` — current sprint number\n - `totalSprints` — how many sprints total\n - `retry` — current retry attempt (0 = first try)\n - `maxRetries` — max retries before hard failure (default 3)\n - `passThreshold` — minimum score to pass (default 7)\n\n Then read `$ARTIFACTS_DIR/spec.md` for product context.\n\n ## Directory Layout\n\n - App source code: `$ARTIFACTS_DIR/app/`\n - Sprint contracts: `$ARTIFACTS_DIR/contracts/sprint-{N}.json`\n - Evaluation feedback: `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`\n - State machine: `$ARTIFACTS_DIR/state.json`\n\n ---\n\n ## ROLE: CONTRACT NEGOTIATOR (phase = \"negotiating\")\n\n You negotiate the success criteria for the current sprint. Play BOTH sides sequentially:\n\n **Step 1 — Generator's Proposal:**\n Read the spec carefully. Identify what Sprint {N} should deliver based on the sprint plan.\n Propose a sprint contract with 5-15 specific, testable criteria.\n\n Each criterion MUST be concrete and verifiable. Examples:\n - GOOD: \"GET /api/tasks returns 200 with JSON array; each item has id (number), title (string), status (string), createdAt (ISO date)\"\n - GOOD: \"Clicking the Add Task button opens a modal with title input, priority dropdown (low/medium/high), and due date picker\"\n - BAD: \"The API works well\"\n - BAD: \"Tasks can be managed\"\n\n **Step 2 — Evaluator's Tightening:**\n Now review your proposal as an adversary. For EACH criterion ask:\n - Is it specific enough to test programmatically?\n - What edge cases are missing? (empty inputs, special characters, concurrent requests)\n - Is the bar high enough, or would sloppy code pass?\n\n Tighten vague criteria. Add edge cases. Raise the bar.\n\n **Write the final contract** to `$ARTIFACTS_DIR/contracts/sprint-{N}.json`:\n ```json\n {\n \"sprintNumber\": ,\n \"features\": [\"feature1\", \"feature2\", ...],\n \"criteria\": [\n {\n \"name\": \"short-kebab-name\",\n \"description\": \"Specific, testable description of what must be true\",\n \"threshold\": 7\n }\n ]\n }\n ```\n\n **Update state.json**: Set `\"phase\": \"building\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: GENERATOR (phase = \"building\")\n\n You are a software engineer. Build features that MUST survive an adversarial evaluator\n who will actively try to break your code.\n\n **Read these files:**\n 1. `$ARTIFACTS_DIR/spec.md` — full product spec (design language, tech stack, all features)\n 2. `$ARTIFACTS_DIR/contracts/sprint-{N}.json` — the contract you must satisfy\n 3. If `retry` > 0: read `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R-1}.json` for the\n evaluator's previous feedback\n\n **If this is a RETRY (retry > 0):**\n Read the feedback CAREFULLY. Every failed criterion must be addressed.\n - If scores were close (5-6) and trending up: REFINE your approach\n - If scores were low (1-4) or the approach is fundamentally broken: PIVOT to a new strategy\n - Address EVERY feedback item — the evaluator WILL check\n - Re-verify each fix by running the code before committing\n\n **Build rules:**\n - All code goes in `$ARTIFACTS_DIR/app/`\n - Build ONE feature at a time, verify it works, then commit:\n ```bash\n cd $ARTIFACTS_DIR/app && git add -A && git commit -m \"feat: description of what was built\"\n ```\n - Install dependencies as needed (npm/bun/pip/etc)\n - Test your code — start the server, hit the endpoints, verify the UI renders\n - Think about what the evaluator will attack: edge cases, error handling, input validation\n - Build defensively — the evaluator's job is to break you\n\n **Update state.json**: Set `\"phase\": \"evaluating\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: EVALUATOR (phase = \"evaluating\")\n\n You are an ADVERSARIAL QA agent. Your mandate is to BREAK what the generator built.\n You are not helpful. You are not generous. You are an attacker.\n\n **CRITICAL CONSTRAINTS:**\n - You are READ-ONLY for source code. NEVER use Write or Edit on files in `$ARTIFACTS_DIR/app/`.\n - You MAY use Bash to run the app, curl endpoints, run test scripts, check behavior.\n - You MUST kill any background processes (servers, watchers) you start BEFORE finishing.\n Use: `pkill -f \"node\\|bun\\|python\\|npm\" 2>/dev/null || true`\n - You MUST score EVERY criterion in the contract. No skipping.\n\n **Scoring guidelines:**\n - **9-10**: Exceptional. Works perfectly including edge cases the contract didn't mention.\n - **7-8**: Solid. Meets the criterion as stated. Minor polish issues at most.\n - **5-6**: Partial. Core functionality exists but fails important edge cases or has bugs.\n - **3-4**: Weak. Barely functional. Major gaps.\n - **1-2**: Broken. Does not work or is not implemented.\n\n Do NOT grade on a curve. Do NOT give benefit of the doubt. A 7 means \"genuinely meets the bar.\"\n If something is broken, say it's broken.\n\n **Read**: `$ARTIFACTS_DIR/contracts/sprint-{N}.json` for the criteria.\n\n **For each criterion:**\n 1. Read the relevant source code\n 2. Run the application (start server, test endpoints, check rendered UI)\n 3. Try to BREAK it — invalid inputs, missing fields, edge cases, error handling gaps\n 4. Score it honestly\n\n **Write evaluation** to `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`:\n ```json\n {\n \"passed\": = passThreshold, false otherwise>,\n \"scores\": {\n \"criterion-name\": ,\n ...\n },\n \"feedback\": [\n {\n \"criterion\": \"criterion-name\",\n \"score\": <1-10>,\n \"details\": \"Specific findings. Include file paths, line numbers, exact error messages, curl commands that failed.\"\n }\n ],\n \"overallSummary\": \"What worked, what didn't, what the generator must fix.\"\n }\n ```\n\n **Determine pass/fail** — `passed` is `true` ONLY if every single score >= `passThreshold`.\n\n **Update state.json based on result:**\n\n **If PASSED (all criteria >= threshold):**\n - Add current sprint number to `completedSprints` array\n - If `sprint` < `totalSprints`: set `\"phase\": \"negotiating\"`, increment `\"sprint\"` by 1, set `\"retry\": 0`\n - If `sprint` == `totalSprints`: set `\"phase\": \"complete\"`, set `\"status\": \"complete\"`\n\n **If FAILED:**\n - If `retry` < `maxRetries`: set `\"phase\": \"building\"`, increment `\"retry\"` by 1\n - If `retry` >= `maxRetries`: set `\"phase\": \"failed\"`, set `\"status\": \"failed\"`\n\n **IMPORTANT**: Kill all background processes before finishing:\n ```bash\n pkill -f \"node|bun|python|npm|next|vite|webpack\" 2>/dev/null || true\n ```\n\n ---\n\n ## COMPLETION\n\n After updating state.json, check the `status` field:\n - If `\"status\": \"complete\"` → all sprints passed! Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"failed\"` → sprint failed after max retries. Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"running\"` → more work to do. Do NOT output any completion signal.\n\n until: ALL_SPRINTS_COMPLETE\n max_iterations: 60\n fresh_context: true\n until_bash: |\n grep -qE '\"status\"\\s*:\\s*\"(complete|failed)\"' \"$ARTIFACTS_DIR/state.json\"\n\n # ─── Phase 4: Report ─────────────────────────────────────────────────\n - id: report\n depends_on: [adversarial-sprint]\n trigger_rule: all_done\n context: fresh\n model: haiku\n prompt: |\n You are a project reporter. Generate a comprehensive summary of the adversarial development run.\n\n ## Read ALL of these files:\n 1. `$ARTIFACTS_DIR/state.json` — final state (tells you success/failure, sprint count)\n 2. `$ARTIFACTS_DIR/spec.md` — the original product spec\n 3. All files in `$ARTIFACTS_DIR/contracts/` — sprint contracts (use Glob to find them)\n 4. All files in `$ARTIFACTS_DIR/feedback/` — evaluation results (use Glob to find them)\n\n ## Generate a report covering:\n\n ### Build Summary\n - What application was built (from the spec)\n - Final status: did all sprints pass or did it fail? On which sprint?\n - Total sprints completed vs planned\n\n ### Per-Sprint Breakdown\n For each sprint that was attempted:\n - What the contract required (features + key criteria)\n - How many attempts were needed (retry count)\n - Final scores for each criterion\n - Key feedback that drove retries and improvements\n\n ### Quality Metrics\n - Average score across all final-round criteria\n - Which criteria required the most retries\n - Where the adversarial evaluator pushed quality the highest\n\n ### How to Run\n - The application code lives in: `$ARTIFACTS_DIR/app/`\n - Include the tech stack and how to start the app (from the spec)\n - Include any setup steps (install deps, env vars, etc.)\n\n Write this report to `$ARTIFACTS_DIR/report.md` AND output it as your response so the user\n sees it directly.\n allowed_tools: [Read, Write, Glob, Grep]\n", + "archon-adversarial-dev": "name: archon-adversarial-dev\ndescription: |\n Use when: User wants to build a complete application from scratch using adversarial development.\n Triggers: \"adversarial dev\", \"adversarial development\", \"build with adversarial\", \"gan dev\",\n \"adversarial build\", \"build app adversarially\", \"adversarial coding\".\n Does: Three-role GAN-inspired development — Planner creates spec with sprints, then a state-machine\n loop alternates between Generator (builds code) and Evaluator (attacks it) with hard pass/fail\n thresholds. The evaluator's job is to BREAK what the generator builds. If any criterion scores\n below 7/10, the sprint goes back to the generator with adversarial feedback. Stops on sprint\n failure after max retries.\n NOT for: Bug fixes, PR reviews, refactoring existing code, simple one-off tasks.\n\n Based on Anthropic's harness design article for long-running application development.\n Separates planning, building, and evaluation into distinct roles with adversarial tension.\nprovider: claude\nmodel: sonnet\n\nnodes:\n # ─── Phase 1: Planning ───────────────────────────────────────────────\n - id: plan\n prompt: |\n You are a product planning expert. Your job is to take a short user prompt and expand it\n into a comprehensive product specification.\n\n ## User Request\n\n $ARGUMENTS\n\n ## Your Task\n\n Write a comprehensive product specification to the file `$ARTIFACTS_DIR/spec.md` using the Write tool.\n\n The spec MUST include ALL of the following sections:\n\n ### 1. Product Overview\n What the product does, who it's for, core value proposition.\n\n ### 2. Tech Stack\n Specific technologies, frameworks, and libraries. Be opinionated — pick concrete choices,\n not \"a modern framework.\" Include exact package names and versions where relevant.\n\n ### 3. Design Language\n Visual style, specific color hex codes, typography choices, component patterns, spacing system.\n\n ### 4. Feature List\n Every feature organized by priority. Be exhaustive.\n\n ### 5. Sprint Plan\n Features broken into 3-6 sprints, ordered by dependency and importance:\n - **Sprint 1** should establish the foundation (project setup, core data models, basic UI shell)\n - Each subsequent sprint builds on the previous\n - Label each sprint clearly: \"Sprint 1: Foundation\", \"Sprint 2: Core Features\", etc.\n - List the specific features/deliverables for each sprint\n\n Be specific and opinionated. The more concrete the spec (exact API paths, specific color codes,\n named libraries), the better the generator can build and the evaluator can test.\n\n IMPORTANT: Write the spec to `$ARTIFACTS_DIR/spec.md` using the Write tool. Do NOT just output\n it as conversation text.\n allowed_tools: [Read, Write, Glob, Grep]\n\n # ─── Phase 2: Workspace Initialization ───────────────────────────────\n - id: init-workspace\n depends_on: [plan]\n bash: |\n ARTIFACTS=\"$ARTIFACTS_DIR\"\n\n # Create directory structure for harness communication\n mkdir -p \"$ARTIFACTS/contracts\"\n mkdir -p \"$ARTIFACTS/feedback\"\n mkdir -p \"$ARTIFACTS/app\"\n\n # Initialize isolated git repo in app directory\n cd \"$ARTIFACTS/app\"\n git init -q\n git commit --allow-empty -m \"Initial commit: adversarial-dev workspace\" -q\n\n # Extract sprint count from spec (find highest \"Sprint N\" reference)\n SPEC=\"$ARTIFACTS/spec.md\"\n SPRINT_COUNT=3\n if [ -f \"$SPEC\" ]; then\n FOUND=$(grep -ioE 'sprint\\s+[0-9]+' \"$SPEC\" | grep -oE '[0-9]+' | sort -n | tail -1)\n if [ -n \"$FOUND\" ] && [ \"$FOUND\" -ge 1 ] 2>/dev/null; then\n SPRINT_COUNT=$FOUND\n fi\n if [ \"$SPRINT_COUNT\" -gt 10 ]; then\n SPRINT_COUNT=10\n fi\n fi\n\n # Write initial state machine file\n cat > \"$ARTIFACTS/state.json\" << 'STATEEOF'\n {\n \"phase\": \"negotiating\",\n \"sprint\": 1,\n \"totalSprints\": SPRINT_COUNT_PLACEHOLDER,\n \"retry\": 0,\n \"maxRetries\": 3,\n \"passThreshold\": 7,\n \"completedSprints\": [],\n \"status\": \"running\"\n }\n STATEEOF\n STATE_TMP=\"$ARTIFACTS/state.json.tmp\"\n sed \"s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/\" \"$ARTIFACTS/state.json\" > \"$STATE_TMP\"\n mv \"$STATE_TMP\" \"$ARTIFACTS/state.json\"\n\n echo \"{\\\"totalSprints\\\": $SPRINT_COUNT, \\\"appDir\\\": \\\"$ARTIFACTS/app\\\", \\\"artifactsDir\\\": \\\"$ARTIFACTS\\\"}\"\n timeout: 30000\n\n # ─── Phase 3: Adversarial Sprint Loop ────────────────────────────────\n #\n # State machine driven by $ARTIFACTS_DIR/state.json\n # Each iteration plays ONE role: negotiator, generator, or evaluator\n # fresh_context ensures genuine separation between roles\n #\n - id: adversarial-sprint\n depends_on: [init-workspace]\n idle_timeout: 600000\n model: opus[1m]\n loop:\n prompt: |\n # Adversarial Development — Sprint Loop\n\n You are part of a GAN-inspired adversarial development system with three distinct roles.\n Each iteration you play ONE role, determined by the current phase in the state file.\n\n ## FIRST: Read State\n\n Read `$ARTIFACTS_DIR/state.json` to determine:\n - `phase` — which role you play this iteration\n - `sprint` — current sprint number\n - `totalSprints` — how many sprints total\n - `retry` — current retry attempt (0 = first try)\n - `maxRetries` — max retries before hard failure (default 3)\n - `passThreshold` — minimum score to pass (default 7)\n\n Then read `$ARTIFACTS_DIR/spec.md` for product context.\n\n ## Directory Layout\n\n - App source code: `$ARTIFACTS_DIR/app/`\n - Sprint contracts: `$ARTIFACTS_DIR/contracts/sprint-{N}.json`\n - Evaluation feedback: `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`\n - State machine: `$ARTIFACTS_DIR/state.json`\n\n ---\n\n ## ROLE: CONTRACT NEGOTIATOR (phase = \"negotiating\")\n\n You negotiate the success criteria for the current sprint. Play BOTH sides sequentially:\n\n **Step 1 — Generator's Proposal:**\n Read the spec carefully. Identify what Sprint {N} should deliver based on the sprint plan.\n Propose a sprint contract with 5-15 specific, testable criteria.\n\n Each criterion MUST be concrete and verifiable. Examples:\n - GOOD: \"GET /api/tasks returns 200 with JSON array; each item has id (number), title (string), status (string), createdAt (ISO date)\"\n - GOOD: \"Clicking the Add Task button opens a modal with title input, priority dropdown (low/medium/high), and due date picker\"\n - BAD: \"The API works well\"\n - BAD: \"Tasks can be managed\"\n\n **Step 2 — Evaluator's Tightening:**\n Now review your proposal as an adversary. For EACH criterion ask:\n - Is it specific enough to test programmatically?\n - What edge cases are missing? (empty inputs, special characters, concurrent requests)\n - Is the bar high enough, or would sloppy code pass?\n\n Tighten vague criteria. Add edge cases. Raise the bar.\n\n **Write the final contract** to `$ARTIFACTS_DIR/contracts/sprint-{N}.json`:\n ```json\n {\n \"sprintNumber\": ,\n \"features\": [\"feature1\", \"feature2\", ...],\n \"criteria\": [\n {\n \"name\": \"short-kebab-name\",\n \"description\": \"Specific, testable description of what must be true\",\n \"threshold\": 7\n }\n ]\n }\n ```\n\n **Update state.json**: Set `\"phase\": \"building\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: GENERATOR (phase = \"building\")\n\n You are a software engineer. Build features that MUST survive an adversarial evaluator\n who will actively try to break your code.\n\n **Read these files:**\n 1. `$ARTIFACTS_DIR/spec.md` — full product spec (design language, tech stack, all features)\n 2. `$ARTIFACTS_DIR/contracts/sprint-{N}.json` — the contract you must satisfy\n 3. If `retry` > 0: read `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R-1}.json` for the\n evaluator's previous feedback\n\n **If this is a RETRY (retry > 0):**\n Read the feedback CAREFULLY. Every failed criterion must be addressed.\n - If scores were close (5-6) and trending up: REFINE your approach\n - If scores were low (1-4) or the approach is fundamentally broken: PIVOT to a new strategy\n - Address EVERY feedback item — the evaluator WILL check\n - Re-verify each fix by running the code before committing\n\n **Build rules:**\n - All code goes in `$ARTIFACTS_DIR/app/`\n - Build ONE feature at a time, verify it works, then commit:\n ```bash\n cd $ARTIFACTS_DIR/app && git add -A && git commit -m \"feat: description of what was built\"\n ```\n - Install dependencies as needed (npm/bun/pip/etc)\n - Test your code — start the server, hit the endpoints, verify the UI renders\n - Think about what the evaluator will attack: edge cases, error handling, input validation\n - Build defensively — the evaluator's job is to break you\n\n **Update state.json**: Set `\"phase\": \"evaluating\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: EVALUATOR (phase = \"evaluating\")\n\n You are an ADVERSARIAL QA agent. Your mandate is to BREAK what the generator built.\n You are not helpful. You are not generous. You are an attacker.\n\n **CRITICAL CONSTRAINTS:**\n - You are READ-ONLY for source code. NEVER use Write or Edit on files in `$ARTIFACTS_DIR/app/`.\n - You MAY use Bash to run the app, curl endpoints, run test scripts, check behavior.\n - You MUST kill any background processes (servers, watchers) you start BEFORE finishing.\n Use: `pkill -f \"node\\|bun\\|python\\|npm\" 2>/dev/null || true`\n - You MUST score EVERY criterion in the contract. No skipping.\n\n **Scoring guidelines:**\n - **9-10**: Exceptional. Works perfectly including edge cases the contract didn't mention.\n - **7-8**: Solid. Meets the criterion as stated. Minor polish issues at most.\n - **5-6**: Partial. Core functionality exists but fails important edge cases or has bugs.\n - **3-4**: Weak. Barely functional. Major gaps.\n - **1-2**: Broken. Does not work or is not implemented.\n\n Do NOT grade on a curve. Do NOT give benefit of the doubt. A 7 means \"genuinely meets the bar.\"\n If something is broken, say it's broken.\n\n **Read**: `$ARTIFACTS_DIR/contracts/sprint-{N}.json` for the criteria.\n\n **For each criterion:**\n 1. Read the relevant source code\n 2. Run the application (start server, test endpoints, check rendered UI)\n 3. Try to BREAK it — invalid inputs, missing fields, edge cases, error handling gaps\n 4. Score it honestly\n\n **Write evaluation** to `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`:\n ```json\n {\n \"passed\": = passThreshold, false otherwise>,\n \"scores\": {\n \"criterion-name\": ,\n ...\n },\n \"feedback\": [\n {\n \"criterion\": \"criterion-name\",\n \"score\": <1-10>,\n \"details\": \"Specific findings. Include file paths, line numbers, exact error messages, curl commands that failed.\"\n }\n ],\n \"overallSummary\": \"What worked, what didn't, what the generator must fix.\"\n }\n ```\n\n **Determine pass/fail** — `passed` is `true` ONLY if every single score >= `passThreshold`.\n\n **Update state.json based on result:**\n\n **If PASSED (all criteria >= threshold):**\n - Add current sprint number to `completedSprints` array\n - If `sprint` < `totalSprints`: set `\"phase\": \"negotiating\"`, increment `\"sprint\"` by 1, set `\"retry\": 0`\n - If `sprint` == `totalSprints`: set `\"phase\": \"complete\"`, set `\"status\": \"complete\"`\n\n **If FAILED:**\n - If `retry` < `maxRetries`: set `\"phase\": \"building\"`, increment `\"retry\"` by 1\n - If `retry` >= `maxRetries`: set `\"phase\": \"failed\"`, set `\"status\": \"failed\"`\n\n **IMPORTANT**: Kill all background processes before finishing:\n ```bash\n pkill -f \"node|bun|python|npm|next|vite|webpack\" 2>/dev/null || true\n ```\n\n ---\n\n ## COMPLETION\n\n After updating state.json, check the `status` field:\n - If `\"status\": \"complete\"` → all sprints passed! Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"failed\"` → sprint failed after max retries. Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"running\"` → more work to do. Do NOT output any completion signal.\n\n until: ALL_SPRINTS_COMPLETE\n max_iterations: 60\n fresh_context: true\n until_bash: |\n grep -qE '\"status\"\\s*:\\s*\"(complete|failed)\"' \"$ARTIFACTS_DIR/state.json\"\n\n # ─── Phase 4: Report ─────────────────────────────────────────────────\n - id: report\n depends_on: [adversarial-sprint]\n trigger_rule: all_done\n context: fresh\n model: haiku\n prompt: |\n You are a project reporter. Generate a comprehensive summary of the adversarial development run.\n\n ## Read ALL of these files:\n 1. `$ARTIFACTS_DIR/state.json` — final state (tells you success/failure, sprint count)\n 2. `$ARTIFACTS_DIR/spec.md` — the original product spec\n 3. All files in `$ARTIFACTS_DIR/contracts/` — sprint contracts (use Glob to find them)\n 4. All files in `$ARTIFACTS_DIR/feedback/` — evaluation results (use Glob to find them)\n\n ## Generate a report covering:\n\n ### Build Summary\n - What application was built (from the spec)\n - Final status: did all sprints pass or did it fail? On which sprint?\n - Total sprints completed vs planned\n\n ### Per-Sprint Breakdown\n For each sprint that was attempted:\n - What the contract required (features + key criteria)\n - How many attempts were needed (retry count)\n - Final scores for each criterion\n - Key feedback that drove retries and improvements\n\n ### Quality Metrics\n - Average score across all final-round criteria\n - Which criteria required the most retries\n - Where the adversarial evaluator pushed quality the highest\n\n ### How to Run\n - The application code lives in: `$ARTIFACTS_DIR/app/`\n - Include the tech stack and how to start the app (from the spec)\n - Include any setup steps (install deps, env vars, etc.)\n\n Write this report to `$ARTIFACTS_DIR/report.md` AND output it as your response so the user\n sees it directly.\n allowed_tools: [Read, Write, Glob, Grep]\n", "archon-architect": "name: archon-architect\ndescription: |\n Use when: User wants an architectural sweep, complexity reduction, or codebase health improvement.\n Triggers: \"architect\", \"simplify codebase\", \"reduce complexity\", \"architectural sweep\",\n \"clean up architecture\", \"codebase health\", \"fix architecture\".\n Does: Scans codebase metrics -> analyzes architecture with principled lens -> plans targeted\n simplifications -> executes fixes with self-review loops (hooks) -> validates -> creates PR.\n NOT for: Single-file fixes, feature development, bug fixes, PR reviews.\n\n DAG workflow showcasing per-node hooks:\n - PostToolUse hooks create organic quality loops (lint after write, self-review)\n - PreToolUse hooks inject architectural principles before changes\n - Different nodes have different trust levels and steering\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: MEASURE\n # Gather raw metrics — file sizes, complexity hotspots, dependency fan-out\n # ═══════════════════════════════════════════════════════════════\n\n - id: scan-metrics\n bash: |\n echo \"=== FILE SIZE HOTSPOTS (top 30 largest source files) ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' \\\n -exec wc -l {} + 2>/dev/null | sort -rn | head -30\n\n echo \"\"\n echo \"=== IMPORT FAN-OUT (files with most imports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*'); do\n count=$(grep -c \"^import \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 8 ]; then\n echo \"$count imports: $f\"\n fi\n done | sort -rn | head -20\n\n echo \"\"\n echo \"=== EXPORT FAN-OUT (files with most exports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*'); do\n count=$(grep -c \"^export \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count exports: $f\"\n fi\n done | sort -rn | head -20\n\n echo \"\"\n echo \"=== FUNCTION LENGTH HOTSPOTS (functions over 50 lines) ===\"\n grep -rn \"^\\(export \\)\\?\\(async \\)\\?function \\|=> {$\" \\\n --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null \\\n | head -30\n\n echo \"\"\n echo \"=== TYPE SAFETY GAPS ===\"\n echo \"any usage:\"\n grep -rn \": any\\b\\|as any\\b\" --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | wc -l\n echo \"eslint-disable comments:\"\n grep -rn \"eslint-disable\" --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | wc -l\n timeout: 60000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: ANALYZE\n # Read through hotspots with an architectural lens\n # Hooks inject assessment criteria after every file read\n # ═══════════════════════════════════════════════════════════════\n\n - id: analyze\n prompt: |\n You are a senior software architect performing a codebase health assessment.\n\n ## Codebase Metrics\n\n $scan-metrics.output\n\n ## User Focus\n\n $ARGUMENTS\n\n ## Instructions\n\n 1. Read the top 10-15 files flagged by the metrics above (largest, most imports, most exports)\n 2. For each file, assess the criteria injected after you read it (you'll see them)\n 3. Build a running list of architectural concerns\n 4. Focus on:\n - Modules doing too many things (SRP violations)\n - Abstractions that don't earn their complexity\n - Duplicated patterns that should be consolidated (Rule of Three)\n - God files or god functions\n - Leaky abstractions or tight coupling between layers\n - Dead code or unused exports\n 5. Do NOT suggest changes yet — only diagnose\n\n ## Output\n\n Write a structured assessment to $ARTIFACTS_DIR/architecture-assessment.md with:\n - Executive summary (3-5 sentences)\n - Top findings ranked by impact\n - For each finding: file, what's wrong, why it matters, estimated effort\n depends_on: [scan-metrics]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n hooks:\n PostToolUse:\n - matcher: \"Read\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n For the file you just read, assess:\n (1) Single responsibility — does this module do exactly one thing?\n (2) Cognitive load — could a new team member understand this in 5 minutes?\n (3) Abstraction value — does every abstraction earn its complexity, or is it premature?\n (4) Dependency direction — does this file depend on things at its own level or below, not above?\n Add any concerns to your running list. Be specific — cite line ranges and function names.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: PLAN\n # Prioritize and scope the changes — pure reasoning, no tools\n # ═══════════════════════════════════════════════════════════════\n\n - id: plan\n prompt: |\n You are planning targeted architectural improvements.\n\n ## Assessment\n\n $analyze.output\n\n ## Principles\n\n - KISS: prefer straightforward over clever\n - YAGNI: remove speculative abstractions\n - Rule of Three: only extract when a pattern appears 3+ times\n - Each change must be independently revertable\n - Do NOT mix refactoring with behavior changes\n - Scope to what can be done safely in one pass (max 5-7 files)\n\n ## Instructions\n\n 1. From the assessment, select the top 3-5 highest-impact, lowest-risk improvements\n 2. For each, write a precise plan: which file, what to change, why\n 3. Order them so each change is independent (no cascading dependencies between changes)\n 4. Estimate blast radius — how many other files are affected\n\n ## Output\n\n Write the plan as a numbered list. Be specific about exactly what code to change.\n Keep it concise — the implement node will follow this literally.\n depends_on: [analyze]\n allowed_tools: [Read]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: EXECUTE\n # Make the changes with hooks creating quality feedback loops\n # ═══════════════════════════════════════════════════════════════\n\n - id: simplify\n prompt: |\n You are implementing targeted architectural simplifications.\n\n ## Plan\n\n $plan.output\n\n ## Rules\n\n - Follow the plan exactly — do not add extra improvements you notice along the way\n - Each change must preserve existing behavior (refactor only, no feature changes)\n - After each file edit, you'll be prompted to validate — follow those instructions\n - If a change turns out to be harder than expected, skip it and move on\n - Commit each logical change separately with a clear commit message\n\n ## Instructions\n\n 1. Work through the plan items in order\n 2. For each item: read the file, make the change, follow the post-edit checklist\n 3. After all changes, do a final `git diff --stat` to verify scope\n depends_on: [plan]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n Before writing: Is this file in your plan? If not, explain why you're\n touching it. Check how many files import from this module — changes to\n widely-imported modules need extra scrutiny.\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just modified a file. Do these things NOW before moving on:\n 1. Run the type checker to verify your change compiles\n 2. Re-read the file you changed — is it ACTUALLY simpler, or did you just move complexity around?\n 3. State in ONE sentence why this change reduces complexity. If you cannot justify it, revert it.\n - matcher: \"Read\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Before modifying this file, consider: will your change reduce or increase\n the number of concepts a reader needs to hold in their head?\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Check the exit code. If the command failed, diagnose the root cause\n before attempting a fix. Do not blindly retry.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE\n # Run full validation suite — bash only, cannot edit to \"fix\" failures\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n bash: |\n echo \"=== TYPE CHECK ===\"\n bun run type-check 2>&1\n TC_EXIT=$?\n\n echo \"\"\n echo \"=== LINT ===\"\n bun run lint 2>&1\n LINT_EXIT=$?\n\n echo \"\"\n echo \"=== TESTS ===\"\n bun run test 2>&1\n TEST_EXIT=$?\n\n echo \"\"\n echo \"=== RESULTS ===\"\n echo \"Type check: $([ $TC_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Lint: $([ $LINT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Tests: $([ $TEST_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n\n # Always exit 0 so downstream nodes can read output and decide\n if [ $TC_EXIT -eq 0 ] && [ $LINT_EXIT -eq 0 ] && [ $TEST_EXIT -eq 0 ]; then\n echo \"VALIDATION_STATUS: PASS\"\n else\n echo \"VALIDATION_STATUS: FAIL\"\n fi\n depends_on: [simplify]\n timeout: 300000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: FIX VALIDATION FAILURES (if any)\n # Only runs if validate failed — focused fix with same quality hooks\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-failures\n prompt: |\n Review the validation output below.\n\n ## Validation Output\n\n $validate.output\n\n ## Instructions\n\n If the output ends with \"VALIDATION_STATUS: PASS\", respond with\n \"All checks passed — no fixes needed.\" and stop.\n\n If there are failures:\n\n 1. Read the validation failures carefully\n 2. Fix ONLY what's broken — do not make additional improvements\n 3. If a fix requires changing behavior (not just fixing a type/lint error),\n revert the original change instead\n 4. Run the specific failing check after each fix to confirm it passes\n 5. After all fixes, run the full validation suite: `bun run validate`\n depends_on: [validate]\n context: fresh\n hooks:\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just made a fix. Run the specific failing validation check NOW\n to verify your fix works. Do not batch fixes — verify each one.\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n You are fixing validation failures only. Do not make any changes\n beyond what's needed to pass the failing checks. If in doubt, revert\n the original change that caused the failure.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: CREATE PR\n # Hooks ensure this node only does git operations\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a pull request for the architectural improvements.\n\n ## Context\n\n - Architecture assessment: $analyze.output\n - Plan: $plan.output\n - Validation: $validate.output\n\n ## Instructions\n\n 1. Stage all changes and create a single commit (or verify existing commits)\n 2. Push the branch: `git push -u origin HEAD`\n 3. Check if a PR already exists: `gh pr list --head $(git branch --show-current)`\n 4. Create the PR with:\n - Title: concise description of what was simplified (under 70 chars)\n - Body: use the format below\n 5. Save the PR URL to `$ARTIFACTS_DIR/.pr-url`\n\n ## PR Body Format\n\n ```markdown\n ## Architectural Sweep\n\n **Focus**: $ARGUMENTS\n\n ### Assessment\n\n [3-5 sentence summary from the architecture assessment]\n\n ### Changes\n\n [For each change: what file, what was simplified, why]\n\n ### Validation\n\n - [x] Type check passes\n - [x] Lint passes\n - [x] Tests pass\n - [x] Each change preserves existing behavior\n ```\n depends_on: [fix-failures]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n permissionDecision: deny\n permissionDecisionReason: \"PR creation node — do not modify source files. Use only git and gh commands.\"\n PostToolUse:\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Verify this command succeeded. If git push or gh pr create failed,\n read the error message carefully before retrying.\n", "archon-assist": "name: archon-assist\ndescription: |\n Use when: No other workflow matches the request.\n Handles: Questions, debugging, exploration, one-off tasks, explanations, CI failures, general help.\n Capability: Full Claude Code agent with all tools available.\n Note: Will inform user when assist mode is used for tracking.\n\nnodes:\n - id: assist\n command: archon-assist\n", "archon-comprehensive-pr-review": "name: archon-comprehensive-pr-review\ndescription: |\n Use when: User wants a comprehensive code review of a pull request with automatic fixes.\n Triggers: \"review this PR\", \"review PR #123\", \"comprehensive review\", \"full PR review\",\n \"review and fix\", \"check this PR\", \"code review\".\n Does: Syncs PR with main (rebase if needed) -> runs 5 specialized review agents in parallel ->\n synthesizes findings -> auto-fixes CRITICAL/HIGH issues -> reports remaining issues.\n NOT for: Quick questions about a PR, checking CI status, simple \"what changed\" queries.\n\n This workflow produces artifacts in $ARTIFACTS_DIR/../reviews/pr-{number}/ and posts\n a comprehensive review comment to the GitHub PR.\n\nnodes:\n - id: scope\n command: archon-pr-review-scope\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [scope]\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n", "archon-create-issue": "name: archon-create-issue\ndescription: |\n Use when: User wants to report a bug or problem as a GitHub issue with automated reproduction.\n Triggers: \"create issue\", \"file a bug\", \"report this bug\", \"open an issue for\",\n \"create github issue\", \"report issue\", \"log this bug\".\n Does: Classifies problem area (haiku) -> gathers context in parallel (templates, git state, duplicates) ->\n investigates relevant code -> reproduces the issue using area-specific tools (agent-browser, CLI, DB queries) ->\n gates on reproduction success -> creates issue with full evidence OR reports back if cannot reproduce.\n NOT for: Feature requests, enhancements, or non-bug work. Only for bugs/problems.\n\n Reproduction gating: If the issue cannot be reproduced, the workflow does NOT create an issue.\n Instead, it reports what was tried and suggests next steps to the user.\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: CLASSIFY — Haiku classification of user's problem\n # ═══════════════════════════════════════════════════════════════\n\n - id: classify\n prompt: |\n You are a problem classifier for the Archon codebase. Analyze the user's\n description and determine the issue type and which area of the system is affected.\n\n ## User's Description\n $ARGUMENTS\n\n ## Area Definitions\n | Area | Packages | Indicators |\n |------|----------|------------|\n | web-ui | @archon/web, @archon/server (routes, web adapter) | UI rendering, SSE streaming, React components, browser behavior |\n | api-server | @archon/server (routes, middleware) | HTTP endpoints, response codes, request handling |\n | cli | @archon/cli | CLI commands, workflow invocation from terminal, output formatting |\n | isolation | @archon/isolation, @archon/git | Worktrees, branch operations, cleanup, environment lifecycle |\n | workflows | @archon/workflows | YAML parsing, DAG execution, variable substitution, node types |\n | database | @archon/core (db/) | SQLite/PostgreSQL queries, schema, data integrity, migrations |\n | adapters | @archon/adapters | Slack/Telegram/GitHub/Discord message handling, auth, polling |\n | core | @archon/core (orchestrator, handlers, clients) | Message routing, session management, AI client streaming |\n | other | Any package not covered above | Cross-cutting concerns, build tooling, config, unknown area |\n\n ## Classification Rules\n - Choose the MOST SPECIFIC area. \"SSE disconnects\" = web-ui (not api-server).\n - If ambiguous between two areas, pick the one closer to the user-facing symptom.\n - Use \"other\" only when the problem genuinely doesn't fit any specific area.\n - needs_server: Set to \"true\" if reproducing requires a running Archon server.\n Typically true for: web-ui, api-server, core, adapters.\n Typically false for: cli, isolation, workflows, database.\n For \"other\": use your judgment based on the description.\n - repro_hint: Extract the user's reproduction steps into a concise instruction.\n If no explicit steps given, infer the most likely way to trigger the issue.\n\n Provide reasoning for your classification.\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n type:\n type: string\n enum: [\"bug\", \"regression\", \"crash\", \"performance\", \"configuration\"]\n area:\n type: string\n enum: [\"web-ui\", \"api-server\", \"cli\", \"isolation\", \"workflows\", \"database\", \"adapters\", \"core\", \"other\"]\n title:\n type: string\n keywords:\n type: string\n repro_hint:\n type: string\n needs_server:\n type: string\n enum: [\"true\", \"false\"]\n required: [type, area, title, keywords, repro_hint, needs_server]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: PARALLEL CONTEXT GATHERING\n # ═══════════════════════════════════════════════════════════════\n\n - id: fetch-template\n bash: |\n # Search for GitHub issue templates in standard locations\n TEMPLATES_FOUND=0\n\n # Check for issue template directory (YAML-based templates)\n if [ -d \".github/ISSUE_TEMPLATE\" ]; then\n echo \"=== Issue Templates Found ===\"\n for f in .github/ISSUE_TEMPLATE/*.md .github/ISSUE_TEMPLATE/*.yaml .github/ISSUE_TEMPLATE/*.yml; do\n if [ -f \"$f\" ]; then\n TEMPLATES_FOUND=$((TEMPLATES_FOUND + 1))\n echo \"--- Template: $f ---\"\n cat \"$f\"\n echo \"\"\n fi\n done\n fi\n\n # Check for single issue template\n for f in .github/ISSUE_TEMPLATE.md docs/ISSUE_TEMPLATE.md; do\n if [ -f \"$f\" ]; then\n TEMPLATES_FOUND=$((TEMPLATES_FOUND + 1))\n echo \"--- Template: $f ---\"\n cat \"$f\"\n fi\n done\n\n if [ \"$TEMPLATES_FOUND\" -eq 0 ]; then\n echo \"No issue templates found — will use standard format\"\n fi\n depends_on: [classify]\n\n - id: git-context\n bash: |\n echo \"=== Branch ===\"\n git branch --show-current\n\n echo \"=== Recent Commits (last 15) ===\"\n git log --oneline -15\n\n echo \"=== Working Tree Status ===\"\n git status --short\n\n echo \"=== Modified Files (last 3 commits) ===\"\n git diff --name-only HEAD~3..HEAD 2>/dev/null || echo \"(fewer than 3 commits)\"\n\n echo \"=== Environment ===\"\n echo \"Node: $(node --version 2>/dev/null || echo 'N/A')\"\n echo \"Bun: $(bun --version 2>/dev/null || echo 'N/A')\"\n echo \"OS: $(uname -s 2>/dev/null || echo 'Windows') $(uname -r 2>/dev/null || ver 2>/dev/null || echo '')\"\n echo \"Platform: $(uname -m 2>/dev/null || echo 'unknown')\"\n depends_on: [classify]\n\n - id: dedup-check\n bash: |\n KEYWORDS=$classify.output.keywords\n echo \"=== Searching for duplicates: $KEYWORDS ===\"\n\n echo \"--- Open Issues ---\"\n gh issue list --search \"$KEYWORDS\" --state open --limit 5 --json number,title,url,labels 2>/dev/null || echo \"No open matches\"\n\n echo \"--- Recently Closed ---\"\n gh issue list --search \"$KEYWORDS\" --state closed --limit 3 --json number,title,url,labels 2>/dev/null || echo \"No closed matches\"\n depends_on: [classify]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: INVESTIGATE — Search codebase for related code\n # ═══════════════════════════════════════════════════════════════\n\n - id: investigate\n prompt: |\n You are a codebase investigator. Search for code related to the reported problem.\n\n ## Problem\n - **Area**: $classify.output.area\n - **Type**: $classify.output.type\n - **Title**: $classify.output.title\n - **Reproduction hint**: $classify.output.repro_hint\n\n ## Git Context\n $git-context.output\n\n ## Instructions\n\n 1. Based on the area, search the relevant packages:\n - web-ui: `packages/web/src/`, `packages/server/src/adapters/web/`, `packages/server/src/routes/`\n - api-server: `packages/server/src/routes/`, `packages/server/src/`\n - cli: `packages/cli/src/`\n - isolation: `packages/isolation/src/`, `packages/git/src/`\n - workflows: `packages/workflows/src/`\n - database: `packages/core/src/db/`\n - adapters: `packages/adapters/src/`\n - core: `packages/core/src/orchestrator/`, `packages/core/src/handlers/`\n - other: search broadly based on keywords — check `packages/*/src/`, config files, build scripts\n\n 2. Find: entry points, error handling paths, related type definitions, recent changes\n to the affected area (check git log for the specific files).\n\n 3. Write your findings to `$ARTIFACTS_DIR/issue-context.md` with this structure:\n ```\n # Codebase Investigation\n ## Relevant Files\n - `file:line` — description of what's there\n ## Error Handling\n - How errors are currently handled in this area\n ## Recent Changes\n - Any recent commits touching this code\n ## Suspected Root Cause\n - Based on code analysis, where the bug likely is\n ```\n\n Be thorough but focused. Only include files directly relevant to the reported problem.\n depends_on: [classify, git-context]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: REPRODUCE — Area-specific issue reproduction\n # ═══════════════════════════════════════════════════════════════\n\n - id: start-server\n bash: |\n # Allocate a free port using Bun's OS assignment\n PORT=$(bun -e \"const s = Bun.serve({port: 0, fetch: () => new Response('')}); console.log(s.port); s.stop()\")\n echo \"$PORT\" > \"$ARTIFACTS_DIR/.server-port\"\n\n # Start dev server in background\n PORT=$PORT bun run dev:server > \"$ARTIFACTS_DIR/.server-log\" 2>&1 &\n SERVER_PID=$!\n echo \"$SERVER_PID\" > \"$ARTIFACTS_DIR/.server-pid\"\n\n # Wait for server to be ready (up to 30s)\n for i in $(seq 1 30); do\n if curl -s \"http://localhost:$PORT/api/health\" > /dev/null 2>&1; then\n echo \"Server ready on port $PORT (PID: $SERVER_PID)\"\n exit 0\n fi\n sleep 1\n done\n\n echo \"WARNING: Server may not be fully ready after 30s (port $PORT, PID $SERVER_PID)\"\n echo \"Continuing anyway — reproduce node will handle connection errors\"\n depends_on: [classify]\n when: \"$classify.output.needs_server == 'true'\"\n timeout: 45000\n\n - id: reproduce\n prompt: |\n You are an issue reproduction specialist. Your job is to reproduce the reported\n problem and capture evidence (screenshots, command output, error messages).\n\n ## Problem Context\n - **Area**: $classify.output.area\n - **Type**: $classify.output.type\n - **Title**: $classify.output.title\n - **Reproduction hint**: $classify.output.repro_hint\n\n ## Investigation Findings\n $investigate.output\n\n ## Server Info\n If a server was started, read the port from: `cat \"$ARTIFACTS_DIR/.server-port\"`\n If the file doesn't exist, no server is running (area doesn't need one).\n\n ---\n\n ## Reproduction Playbooks\n\n Follow the playbook matching the area. Capture ALL evidence to `$ARTIFACTS_DIR/`.\n\n ### web-ui\n 1. Read the server port: `PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" | tr -d '\\n')`\n 2. Open the app: `agent-browser open http://localhost:$PORT`\n 3. Take a baseline screenshot: `agent-browser screenshot \"$ARTIFACTS_DIR/repro-01-baseline.png\"`\n 4. Get interactive elements: `agent-browser snapshot -i`\n 5. Navigate to the area related to the issue (use @refs from snapshot)\n 6. Perform the actions described in the repro_hint\n 7. Screenshot each significant state: `agent-browser screenshot \"$ARTIFACTS_DIR/repro-02-action.png\"`\n 8. If an error appears, capture it: `agent-browser get text @errorElement`\n 9. Check browser console: `agent-browser console`\n 10. Check for JS errors: `agent-browser errors`\n 11. Final screenshot: `agent-browser screenshot \"$ARTIFACTS_DIR/repro-03-result.png\"`\n 12. Close browser: `agent-browser close`\n\n ### api-server\n 1. Read the server port: `PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" | tr -d '\\n')`\n 2. Create a test conversation: `curl -s -X POST http://localhost:$PORT/api/conversations -H \"Content-Type: application/json\" -d '{}'`\n 3. Hit the problematic endpoint based on the repro_hint\n 4. Capture response codes and bodies: `curl -s -w \"\\nHTTP_CODE: %{http_code}\\n\" ...`\n 5. For SSE issues: `curl -s -N http://localhost:$PORT/api/stream/` (timeout after 10s)\n 6. Check server logs: `cat \"$ARTIFACTS_DIR/.server-log\" | tail -50`\n 7. Save all curl output to `$ARTIFACTS_DIR/repro-api-responses.txt`\n\n ### cli\n 1. Run the CLI command that should trigger the issue\n 2. Capture stdout and stderr separately:\n `bun run cli > \"$ARTIFACTS_DIR/repro-cli-stdout.txt\" 2> \"$ARTIFACTS_DIR/repro-cli-stderr.txt\"; echo \"EXIT_CODE: $?\" >> \"$ARTIFACTS_DIR/repro-cli-stdout.txt\"`\n 3. If workflow-related: `bun run cli workflow list --json > \"$ARTIFACTS_DIR/repro-workflow-list.json\" 2>&1`\n 4. If the command hangs, use timeout: `timeout 30 bun run cli `\n 5. Check for error messages in output\n\n ### isolation\n 1. Check current state: `bun run cli isolation list > \"$ARTIFACTS_DIR/repro-isolation-list.txt\" 2>&1`\n 2. Check git worktrees: `git worktree list > \"$ARTIFACTS_DIR/repro-worktree-list.txt\"`\n 3. Check branches: `git branch -a > \"$ARTIFACTS_DIR/repro-branches.txt\"`\n 4. Try the operation that should fail (based on repro_hint)\n 5. Capture the error output\n 6. Query isolation DB: `sqlite3 ~/.archon/archon.db \"SELECT * FROM remote_agent_isolation_environments ORDER BY created_at DESC LIMIT 10\" > \"$ARTIFACTS_DIR/repro-isolation-db.txt\" 2>&1`\n\n ### workflows\n 1. List workflows: `bun run cli workflow list --json > \"$ARTIFACTS_DIR/repro-workflow-list.json\" 2>&1`\n 2. If a specific workflow is mentioned, try running it:\n `bun run cli workflow run --no-worktree \"test input\" > \"$ARTIFACTS_DIR/repro-workflow-run.txt\" 2>&1`\n 3. If YAML parsing is the issue, try loading the definition directly\n 4. Check for error messages in execution output\n\n ### database\n 1. Check DB exists: `ls -la ~/.archon/archon.db 2>/dev/null`\n 2. Run targeted queries against affected tables:\n - `sqlite3 ~/.archon/archon.db \".schema \" > \"$ARTIFACTS_DIR/repro-db-schema.txt\"`\n - `sqlite3 ~/.archon/archon.db \"SELECT COUNT(*) FROM
\" > \"$ARTIFACTS_DIR/repro-db-counts.txt\"`\n 3. Check for the specific data condition described in the repro_hint\n 4. If PostgreSQL: use `psql $DATABASE_URL -c \"...\"` instead\n\n ### adapters\n 1. Read the server port: `PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" | tr -d '\\n')`\n 2. Check adapter configuration: look for relevant env vars in `.env`\n 3. Check server startup logs: `cat \"$ARTIFACTS_DIR/.server-log\" | grep -i \"adapter\\|slack\\|telegram\\|github\\|discord\" | head -20`\n 4. If the adapter fails to initialize, capture the error\n 5. Test message routing via web API as a proxy:\n `curl -s -X POST http://localhost:$PORT/api/conversations//message -H \"Content-Type: application/json\" -d '{\"message\":\"/status\"}'`\n\n ### core\n 1. Read the server port: `PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" | tr -d '\\n')`\n 2. Create a conversation: `curl -s -X POST http://localhost:$PORT/api/conversations -H \"Content-Type: application/json\" -d '{}'`\n 3. Send a message that triggers the issue:\n `curl -s -X POST http://localhost:$PORT/api/conversations//message -H \"Content-Type: application/json\" -d '{\"message\":\"\"}'`\n 4. Poll for responses: `curl -s http://localhost:$PORT/api/conversations//messages`\n 5. Check session state in DB: `sqlite3 ~/.archon/archon.db \"SELECT * FROM remote_agent_sessions WHERE conversation_id=''\" 2>/dev/null`\n 6. Check server logs: `cat \"$ARTIFACTS_DIR/.server-log\" | tail -50`\n\n ### other\n 1. Run `bun run validate` to check for any obvious failures — capture output:\n `bun run validate > \"$ARTIFACTS_DIR/repro-validate.txt\" 2>&1; echo \"EXIT_CODE: $?\" >> \"$ARTIFACTS_DIR/repro-validate.txt\"`\n 2. Search the codebase for keywords from the repro_hint:\n - Use Grep/Glob to find related files\n - Check recent git log for relevant changes\n 3. If the description implies a build or config issue:\n - Check `package.json` scripts, `tsconfig.json`, `.env.example`\n - Try running the relevant build/dev command\n 4. If the description implies a runtime issue:\n - Start the server (if `.server-port` file exists) and try to trigger the behavior\n - Check logs for errors\n 5. Document everything you tried, even if nothing reproduces clearly\n\n ---\n\n ## Output\n\n After following the playbook, write your findings to `$ARTIFACTS_DIR/reproduction-results.md`:\n\n ```markdown\n # Reproduction Results\n\n ## Status: [REPRODUCED | NOT_REPRODUCED | PARTIAL]\n\n ## Steps Taken\n 1. [step]\n 2. [step]\n\n ## Expected Behavior\n [what should happen]\n\n ## Actual Behavior\n [what actually happened — or \"could not trigger the reported behavior\"]\n\n ## Evidence Files\n - `$ARTIFACTS_DIR/repro-*.png` — screenshots (if web-ui)\n - `$ARTIFACTS_DIR/repro-*.txt` — command output\n - `$ARTIFACTS_DIR/repro-*.json` — structured data\n\n ## Environment\n [OS, versions, relevant config]\n\n ## Notes\n [any additional observations, suspected root cause refinements]\n ```\n\n CRITICAL: The Status line MUST be exactly one of: REPRODUCED, NOT_REPRODUCED, PARTIAL.\n This value is read by a downstream bash node to decide whether to create the issue.\n\n Even if you cannot fully reproduce the issue, document what you tried\n and what you observed. Partial reproduction is still valuable evidence.\n depends_on: [classify, git-context, investigate, start-server]\n context: fresh\n skills:\n - agent-browser\n trigger_rule: one_success\n idle_timeout: 300000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: CLEANUP + GATE\n # ═══════════════════════════════════════════════════════════════\n\n - id: cleanup-server\n bash: |\n SERVER_PID=$(cat \"$ARTIFACTS_DIR/.server-pid\" 2>/dev/null | tr -d '\\n')\n SERVER_PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" 2>/dev/null | tr -d '\\n')\n\n if [ -z \"$SERVER_PID\" ]; then\n echo \"No server was started — skipping cleanup\"\n exit 0\n fi\n\n echo \"Cleaning up server PID $SERVER_PID on port $SERVER_PORT...\"\n\n # Kill by PID (cross-platform)\n kill \"$SERVER_PID\" 2>/dev/null || taskkill //F //T //PID \"$SERVER_PID\" 2>/dev/null || true\n\n # Kill by port (fallback)\n if [ -n \"$SERVER_PORT\" ]; then\n fuser -k \"$SERVER_PORT/tcp\" 2>/dev/null || true\n lsof -ti:\"$SERVER_PORT\" 2>/dev/null | xargs kill -9 2>/dev/null || true\n netstat -ano 2>/dev/null | grep \":$SERVER_PORT \" | grep LISTENING | awk '{print $5}' | sort -u | while read pid; do\n taskkill //F //T //PID \"$pid\" 2>/dev/null || true\n done\n fi\n\n # Close any agent-browser session\n agent-browser close 2>/dev/null || true\n\n sleep 1\n echo \"Cleanup complete\"\n depends_on: [reproduce]\n trigger_rule: all_done\n\n - id: check-reproduction\n bash: |\n # Read the reproduction status from the results file\n if [ ! -f \"$ARTIFACTS_DIR/reproduction-results.md\" ]; then\n echo \"NOT_REPRODUCED\"\n exit 0\n fi\n\n STATUS=$(grep -oE '(NOT_REPRODUCED|REPRODUCED|PARTIAL)' \"$ARTIFACTS_DIR/reproduction-results.md\" | head -1)\n\n if [ -z \"$STATUS\" ]; then\n echo \"NOT_REPRODUCED\"\n else\n echo \"$STATUS\"\n fi\n depends_on: [cleanup-server]\n trigger_rule: all_done\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: BRANCH ON REPRODUCTION RESULT\n # ═══════════════════════════════════════════════════════════════\n\n - id: report-failure\n prompt: |\n The issue could not be reproduced. Report this to the user with actionable detail.\n\n ## Problem Description\n - **Title**: $classify.output.title\n - **Area**: $classify.output.area\n - **Type**: $classify.output.type\n - **Reproduction hint**: $classify.output.repro_hint\n\n ## What Was Tried\n $reproduce.output\n\n ## Investigation Findings\n $investigate.output\n\n ## Instructions\n\n Report to the user clearly:\n\n 1. **State upfront**: \"Could not reproduce the reported issue. No GitHub issue was created.\"\n\n 2. **Summarize what was tried**: List the specific steps the reproduce node took,\n based on the area playbook. Be concrete — \"Started server on port X, navigated to Y,\n clicked Z — no error appeared.\"\n\n 3. **Share what was found**: Include relevant findings from the investigation\n (code references, recent changes, suspected areas).\n\n 4. **Suggest next steps**:\n - Ask the user to provide more specific reproduction steps\n - Mention any environment-specific factors that might matter\n (OS, browser, database state, specific data conditions)\n - If the investigation found suspicious code, mention it as a lead\n - Suggest running with debug logging: `LOG_LEVEL=debug bun run dev`\n\n 5. **Offer to retry**: \"If you can provide more specific steps, run the workflow\n again with those details.\"\n\n Do NOT create a GitHub issue. The purpose of this node is to communicate back to the\n user so they can provide better information or investigate manually.\n depends_on: [check-reproduction]\n when: \"$check-reproduction.output == 'NOT_REPRODUCED'\"\n context: fresh\n\n - id: draft-issue\n prompt: |\n You are a technical writer drafting a GitHub issue. Assemble all gathered\n context into a clear, well-structured issue body.\n\n ## Classification\n - **Type**: $classify.output.type\n - **Area**: $classify.output.area\n - **Title**: $classify.output.title\n\n ## Issue Template\n If templates were found, use the most appropriate one as the structure:\n $fetch-template.output\n\n ## Duplicate Check Results\n $dedup-check.output\n\n ## Codebase Investigation\n $investigate.output\n\n ## Reproduction Results\n $reproduce.output\n\n ## Instructions\n\n 1. **Check duplicates first**: If the dedup-check found a clearly matching open issue,\n note this prominently at the top. Still draft the issue but add a note suggesting\n it may be a duplicate of #XYZ.\n\n 2. **Use the template** if one was found for bug reports. Fill every section with real data.\n\n 3. **Structure** (if no template):\n ```markdown\n ## Description\n [Clear 1-2 sentence description]\n\n ## Steps to Reproduce\n [Numbered steps from reproduction results]\n\n ## Expected Behavior\n [What should happen]\n\n ## Actual Behavior\n [What actually happened, with evidence]\n\n ## Environment\n - OS: [from git-context]\n - Bun: [version]\n - Node: [version]\n - Branch: [current branch]\n\n ## Relevant Code\n [Key file:line references from investigation]\n\n ## Additional Context\n [Screenshots, logs, database state — reference artifact files]\n ```\n\n 4. **Include reproduction evidence**:\n - If REPRODUCED: include full steps and all evidence\n - If PARTIAL: include what was observed, note incomplete reproduction\n\n 5. **Suggest labels** based on classification:\n - Area label: `area: web`, `area: cli`, `area: workflows`, etc.\n - Type label: `bug`, `regression`, `performance`, etc.\n\n 6. Write the complete issue body to `$ARTIFACTS_DIR/issue-draft.md`\n\n 7. Write a one-line suggested title to `$ARTIFACTS_DIR/.issue-title`\n\n 8. Write suggested labels (comma-separated) to `$ARTIFACTS_DIR/.issue-labels`\n depends_on: [check-reproduction, fetch-template, dedup-check, investigate]\n when: \"$check-reproduction.output != 'NOT_REPRODUCED'\"\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: CREATE ISSUE\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-issue\n prompt: |\n Create the GitHub issue using the drafted content.\n\n ## Instructions\n\n 1. Read the draft: `cat \"$ARTIFACTS_DIR/issue-draft.md\"`\n 2. Read the title: `cat \"$ARTIFACTS_DIR/.issue-title\"`\n 3. Read suggested labels: `cat \"$ARTIFACTS_DIR/.issue-labels\"`\n\n 4. Check which labels actually exist in the repo:\n ```bash\n gh label list --json name -q '.[].name' | head -50\n ```\n Only use labels that exist. Skip any suggested label that doesn't match.\n\n 5. Create the issue:\n ```bash\n gh issue create \\\n --title \"$(cat \"$ARTIFACTS_DIR/.issue-title\")\" \\\n --body-file \"$ARTIFACTS_DIR/issue-draft.md\" \\\n --label \"label1,label2\"\n ```\n\n 6. Capture the result:\n ```bash\n ISSUE_URL=$(gh issue list --limit 1 --json url -q '.[0].url')\n echo \"$ISSUE_URL\" > \"$ARTIFACTS_DIR/.issue-url\"\n ```\n\n 7. Report to the user:\n - Issue URL\n - Title\n - Labels applied\n - Whether duplicates were found\n - Summary of reproduction results (reproduced/partial)\n depends_on: [draft-issue]\n context: fresh\n", - "archon-feature-development": "name: archon-feature-development\ndescription: |\n Use when: Implementing a feature from an existing plan.\n Input: Path to a plan file ($ARTIFACTS_DIR/plan.md) or GitHub issue containing a plan.\n Does: Implements the plan with validation loops -> creates pull request.\n NOT for: Creating plans (plans should be created separately), bug fixes, code reviews.\n\nnodes:\n - id: implement\n command: archon-implement\n model: claude-opus-4-6[1m]\n\n - id: create-pr\n command: archon-create-pr\n depends_on: [implement]\n context: fresh\n", - "archon-fix-github-issue": "name: archon-fix-github-issue\ndescription: |\n Use when: User wants to FIX, RESOLVE, or IMPLEMENT a solution for a GitHub issue.\n Triggers: \"fix this issue\", \"implement issue #123\", \"resolve this bug\", \"fix it\",\n \"fix issue\", \"resolve issue\", \"fix #123\".\n NOT for: Comprehensive multi-agent reviews (use archon-issue-review-full),\n questions about issues, CI failures, PR reviews, general exploration.\n\n DAG workflow that:\n 1. Classifies the issue (bug/feature/enhancement/etc)\n 2. Researches context (web research + codebase exploration via investigate/plan)\n 3. Routes to investigate (bugs) or plan (features) based on classification\n 4. Implements the fix/feature with validation\n 5. Creates a draft PR using the repo's PR template\n 6. Runs smart review (always code review + CLAUDE.md check, conditional additional agents)\n 7. Aggressively self-fixes all findings (tests, docs, error handling)\n 8. Simplifies changed code (implements fixes directly, not just reports)\n 9. Reports results back to the GitHub issue with follow-up suggestions\n\nprovider: claude\nmodel: sonnet\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: FETCH & CLASSIFY\n # ═══════════════════════════════════════════════════════════════\n\n - id: extract-issue-number\n prompt: |\n Find the GitHub issue number for this request.\n\n Request: $ARGUMENTS\n\n Rules:\n - If the message contains an explicit issue number (e.g., \"#709\", \"issue 709\", \"709\"), extract that number.\n - If the message is ambiguous (e.g., \"fix the SQLite timestamp bug\"), use `gh issue list` to search for matching issues and pick the best match.\n\n CRITICAL: Your final output must be ONLY the bare number with no quotes, no markdown, no explanation. Example correct output: 709\n\n - id: fetch-issue\n bash: |\n # Strip quotes, whitespace, markdown backticks from AI output\n ISSUE_NUM=$(echo \"$extract-issue-number.output\" | tr -d \"'\\\"\\`\\n \" | grep -oE '[0-9]+' | head -1)\n if [ -z \"$ISSUE_NUM\" ]; then\n echo \"Failed to extract issue number from: $extract-issue-number.output\" >&2\n exit 1\n fi\n gh issue view \"$ISSUE_NUM\" --json title,body,labels,comments,state,url,author\n depends_on: [extract-issue-number]\n\n - id: classify\n prompt: |\n You are an issue classifier. Analyze the GitHub issue below and determine its type.\n\n ## Issue Content\n\n $fetch-issue.output\n\n ## Classification Rules\n\n | Type | Indicators |\n |------|------------|\n | bug | \"broken\", \"error\", \"crash\", \"doesn't work\", stack traces, regression |\n | feature | \"add\", \"new\", \"support\", \"would be nice\", net-new capability |\n | enhancement | \"improve\", \"better\", \"update existing\", \"extend\", incremental improvement |\n | refactor | \"clean up\", \"simplify\", \"reorganize\", \"restructure\" |\n | chore | \"update deps\", \"upgrade\", \"maintenance\", \"CI/CD\" |\n | documentation | \"docs\", \"readme\", \"clarify\", \"examples\" |\n\n Provide reasoning for your classification.\n depends_on: [fetch-issue]\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n issue_type:\n type: string\n enum: [\"bug\", \"feature\", \"enhancement\", \"refactor\", \"chore\", \"documentation\"]\n title:\n type: string\n reasoning:\n type: string\n required: [issue_type, title, reasoning]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: RESEARCH (parallel with PR template fetch)\n # ═══════════════════════════════════════════════════════════════\n\n - id: web-research\n command: archon-web-research\n depends_on: [classify]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: INVESTIGATE (bugs) / PLAN (features)\n # ═══════════════════════════════════════════════════════════════\n\n - id: investigate\n command: archon-investigate-issue\n depends_on: [classify, web-research]\n when: \"$classify.output.issue_type == 'bug'\"\n context: fresh\n\n - id: plan\n command: archon-create-plan\n depends_on: [classify, web-research]\n when: \"$classify.output.issue_type != 'bug'\"\n context: fresh\n\n # Bridge: ensure investigation.md exists for the implement step\n # archon-fix-issue reads from $ARTIFACTS_DIR/investigation.md\n # archon-create-plan writes to $ARTIFACTS_DIR/plan.md\n # This node copies plan.md → investigation.md when the plan path was taken\n - id: bridge-artifacts\n bash: |\n if [ -f \"$ARTIFACTS_DIR/plan.md\" ] && [ ! -f \"$ARTIFACTS_DIR/investigation.md\" ]; then\n cp \"$ARTIFACTS_DIR/plan.md\" \"$ARTIFACTS_DIR/investigation.md\"\n echo \"Bridged plan.md to investigation.md for implement step\"\n elif [ -f \"$ARTIFACTS_DIR/investigation.md\" ]; then\n echo \"investigation.md exists from investigate step\"\n else\n echo \"WARNING: No investigation.md or plan.md found — implement may fail\"\n fi\n depends_on: [investigate, plan]\n trigger_rule: one_success\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement\n command: archon-fix-issue\n depends_on: [bridge-artifacts]\n context: fresh\n model: claude-opus-4-6[1m]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n command: archon-validate\n depends_on: [implement]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: CREATE DRAFT PR\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a draft pull request for the current branch.\n\n ## Context\n\n - **Issue**: $ARGUMENTS\n - **Classification**: $classify.output\n - **Issue title**: $classify.output.title\n\n ## Instructions\n\n 1. Check git status — ensure all changes are committed. If uncommitted changes exist, stage and commit them.\n 2. Push the branch: `git push -u origin HEAD`\n 3. Read implementation artifacts from `$ARTIFACTS_DIR/` for context:\n - `$ARTIFACTS_DIR/investigation.md` or `$ARTIFACTS_DIR/plan.md`\n - `$ARTIFACTS_DIR/implementation.md`\n - `$ARTIFACTS_DIR/validation.md`\n 4. Check if a PR already exists for this branch: `gh pr list --head $(git branch --show-current)`\n - If PR exists, skip creation and capture its number\n 5. Look for the project's PR template at `.github/pull_request_template.md`, `.github/PULL_REQUEST_TEMPLATE.md`, or `docs/PULL_REQUEST_TEMPLATE.md`. Read whichever one exists.\n 6. Create a DRAFT PR: `gh pr create --draft --base $BASE_BRANCH`\n - Title: concise, imperative mood, under 70 chars\n - Body: if a PR template was found, fill in **every section** with details from the artifacts. Don't skip sections or leave placeholders. If no template, write a body with summary, changes, validation evidence, and `Fixes #...`.\n - Link to issue: include `Fixes #...` or `Closes #...`\n 7. Capture PR identifiers:\n ```bash\n PR_NUMBER=$(gh pr view --json number -q '.number')\n echo \"$PR_NUMBER\" > \"$ARTIFACTS_DIR/.pr-number\"\n PR_URL=$(gh pr view --json url -q '.url')\n echo \"$PR_URL\" > \"$ARTIFACTS_DIR/.pr-url\"\n ```\n depends_on: [validate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: REVIEW\n # ═══════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [create-pr]\n context: fresh\n\n - id: review-classify\n prompt: |\n You are a PR review classifier. Analyze the PR scope and determine\n which review agents should run.\n\n ## PR Scope\n\n $review-scope.output\n\n ## Rules\n\n - **Code review**: ALWAYS run. This is mandatory for every PR. It also checks\n the PR against CLAUDE.md rules and project conventions.\n - **Error handling**: Run if the diff touches code with try/catch, error handling,\n async/await, or adds new failure paths.\n - **Test coverage**: Run if the diff touches source code (not just tests, docs, or config).\n - **Comment quality**: Run if the diff adds or modifies comments, docstrings, JSDoc,\n or significant documentation within code files.\n - **Docs impact**: Run if the diff adds/removes/renames public APIs, commands, CLI flags,\n environment variables, or user-facing features.\n\n Provide your reasoning for each decision.\n depends_on: [review-scope]\n model: haiku\n allowed_tools: []\n context: fresh\n output_format:\n type: object\n properties:\n run_code_review:\n type: string\n enum: [\"true\", \"false\"]\n run_error_handling:\n type: string\n enum: [\"true\", \"false\"]\n run_test_coverage:\n type: string\n enum: [\"true\", \"false\"]\n run_comment_quality:\n type: string\n enum: [\"true\", \"false\"]\n run_docs_impact:\n type: string\n enum: [\"true\", \"false\"]\n reasoning:\n type: string\n required:\n - run_code_review\n - run_error_handling\n - run_test_coverage\n - run_comment_quality\n - run_docs_impact\n - reasoning\n\n # Code review always runs — mandatory\n - id: code-review\n command: archon-code-review-agent\n depends_on: [review-classify]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_error_handling == 'true'\"\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_test_coverage == 'true'\"\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_comment_quality == 'true'\"\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_docs_impact == 'true'\"\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 8: SYNTHESIZE + SELF-FIX\n # ═══════════════════════════════════════════════════════════════\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n - id: self-fix\n command: archon-self-fix-all\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 9: SIMPLIFY\n # ═══════════════════════════════════════════════════════════════\n\n - id: simplify\n command: archon-simplify-changes\n depends_on: [self-fix]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 10: REPORT\n # ═══════════════════════════════════════════════════════════════\n\n - id: report\n command: archon-issue-completion-report\n depends_on: [simplify]\n context: fresh\n", - "archon-idea-to-pr": "name: archon-idea-to-pr\ndescription: |\n Use when: You have a feature idea or description and want end-to-end development.\n Input: Feature description in natural language, or path to a PRD file\n Output: PR ready for merge with comprehensive review completed\n\n Full workflow:\n 1. Create comprehensive implementation plan with codebase analysis\n 2. Setup branch and extract scope limits\n 3. Verify plan research is still valid\n 4. Implement all tasks with type-checking\n 5. Run full validation suite\n 6. Create PR with template, mark ready\n 7. Comprehensive code review (5 parallel agents with scope limit awareness)\n 8. Synthesize and fix review findings\n 9. Final summary with decision matrix -> GitHub comment + follow-up recommendations\n\n NOT for: Executing existing plans (use archon-plan-to-pr), quick fixes, standalone reviews.\n\nnodes:\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 0: CREATE PLAN\n # ═══════════════════════════════════════════════════════════════════\n\n - id: create-plan\n command: archon-create-plan\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 1: SETUP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: plan-setup\n command: archon-plan-setup\n depends_on: [create-plan]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 2: CONFIRM PLAN\n # ═══════════════════════════════════════════════════════════════════\n\n - id: confirm-plan\n command: archon-confirm-plan\n depends_on: [plan-setup]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 3: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-tasks\n command: archon-implement-tasks\n depends_on: [confirm-plan]\n context: fresh\n model: claude-opus-4-6[1m]\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 4: VALIDATE\n # ═══════════════════════════════════════════════════════════════════\n\n - id: validate\n command: archon-validate\n depends_on: [implement-tasks]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 5: FINALIZE PR\n # ═══════════════════════════════════════════════════════════════════\n\n - id: finalize-pr\n command: archon-finalize-pr\n depends_on: [validate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 6: CODE REVIEW\n # ═══════════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [finalize-pr]\n context: fresh\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [review-scope]\n context: fresh\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n context: fresh\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 7: FIX REVIEW ISSUES\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 8: FINAL SUMMARY & FOLLOW-UP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: workflow-summary\n command: archon-workflow-summary\n depends_on: [implement-fixes]\n context: fresh\n", + "archon-feature-development": "name: archon-feature-development\ndescription: |\n Use when: Implementing a feature from an existing plan.\n Input: Path to a plan file ($ARTIFACTS_DIR/plan.md) or GitHub issue containing a plan.\n Does: Implements the plan with validation loops -> creates pull request.\n NOT for: Creating plans (plans should be created separately), bug fixes, code reviews.\n\nnodes:\n - id: implement\n command: archon-implement\n model: opus[1m]\n\n - id: create-pr\n command: archon-create-pr\n depends_on: [implement]\n context: fresh\n", + "archon-fix-github-issue": "name: archon-fix-github-issue\ndescription: |\n Use when: User wants to FIX, RESOLVE, or IMPLEMENT a solution for a GitHub issue.\n Triggers: \"fix this issue\", \"implement issue #123\", \"resolve this bug\", \"fix it\",\n \"fix issue\", \"resolve issue\", \"fix #123\".\n NOT for: Comprehensive multi-agent reviews (use archon-issue-review-full),\n questions about issues, CI failures, PR reviews, general exploration.\n\n DAG workflow that:\n 1. Classifies the issue (bug/feature/enhancement/etc)\n 2. Researches context (web research + codebase exploration via investigate/plan)\n 3. Routes to investigate (bugs) or plan (features) based on classification\n 4. Implements the fix/feature with validation\n 5. Creates a draft PR using the repo's PR template\n 6. Runs smart review (always code review + CLAUDE.md check, conditional additional agents)\n 7. Aggressively self-fixes all findings (tests, docs, error handling)\n 8. Simplifies changed code (implements fixes directly, not just reports)\n 9. Reports results back to the GitHub issue with follow-up suggestions\n\nprovider: claude\nmodel: sonnet\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: FETCH & CLASSIFY\n # ═══════════════════════════════════════════════════════════════\n\n - id: extract-issue-number\n prompt: |\n Find the GitHub issue number for this request.\n\n Request: $ARGUMENTS\n\n Rules:\n - If the message contains an explicit issue number (e.g., \"#709\", \"issue 709\", \"709\"), extract that number.\n - If the message is ambiguous (e.g., \"fix the SQLite timestamp bug\"), use `gh issue list` to search for matching issues and pick the best match.\n\n CRITICAL: Your final output must be ONLY the bare number with no quotes, no markdown, no explanation. Example correct output: 709\n\n - id: fetch-issue\n bash: |\n # Strip quotes, whitespace, markdown backticks from AI output\n ISSUE_NUM=$(echo \"$extract-issue-number.output\" | tr -d \"'\\\"\\`\\n \" | grep -oE '[0-9]+' | head -1)\n if [ -z \"$ISSUE_NUM\" ]; then\n echo \"Failed to extract issue number from: $extract-issue-number.output\" >&2\n exit 1\n fi\n gh issue view \"$ISSUE_NUM\" --json title,body,labels,comments,state,url,author\n depends_on: [extract-issue-number]\n\n - id: classify\n prompt: |\n You are an issue classifier. Analyze the GitHub issue below and determine its type.\n\n ## Issue Content\n\n $fetch-issue.output\n\n ## Classification Rules\n\n | Type | Indicators |\n |------|------------|\n | bug | \"broken\", \"error\", \"crash\", \"doesn't work\", stack traces, regression |\n | feature | \"add\", \"new\", \"support\", \"would be nice\", net-new capability |\n | enhancement | \"improve\", \"better\", \"update existing\", \"extend\", incremental improvement |\n | refactor | \"clean up\", \"simplify\", \"reorganize\", \"restructure\" |\n | chore | \"update deps\", \"upgrade\", \"maintenance\", \"CI/CD\" |\n | documentation | \"docs\", \"readme\", \"clarify\", \"examples\" |\n\n Provide reasoning for your classification.\n depends_on: [fetch-issue]\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n issue_type:\n type: string\n enum: [\"bug\", \"feature\", \"enhancement\", \"refactor\", \"chore\", \"documentation\"]\n title:\n type: string\n reasoning:\n type: string\n required: [issue_type, title, reasoning]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: RESEARCH (parallel with PR template fetch)\n # ═══════════════════════════════════════════════════════════════\n\n - id: web-research\n command: archon-web-research\n depends_on: [classify]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: INVESTIGATE (bugs) / PLAN (features)\n # ═══════════════════════════════════════════════════════════════\n\n - id: investigate\n command: archon-investigate-issue\n depends_on: [classify, web-research]\n when: \"$classify.output.issue_type == 'bug'\"\n context: fresh\n\n - id: plan\n command: archon-create-plan\n depends_on: [classify, web-research]\n when: \"$classify.output.issue_type != 'bug'\"\n context: fresh\n\n # Bridge: ensure investigation.md exists for the implement step\n # archon-fix-issue reads from $ARTIFACTS_DIR/investigation.md\n # archon-create-plan writes to $ARTIFACTS_DIR/plan.md\n # This node copies plan.md → investigation.md when the plan path was taken\n - id: bridge-artifacts\n bash: |\n if [ -f \"$ARTIFACTS_DIR/plan.md\" ] && [ ! -f \"$ARTIFACTS_DIR/investigation.md\" ]; then\n cp \"$ARTIFACTS_DIR/plan.md\" \"$ARTIFACTS_DIR/investigation.md\"\n echo \"Bridged plan.md to investigation.md for implement step\"\n elif [ -f \"$ARTIFACTS_DIR/investigation.md\" ]; then\n echo \"investigation.md exists from investigate step\"\n else\n echo \"WARNING: No investigation.md or plan.md found — implement may fail\"\n fi\n depends_on: [investigate, plan]\n trigger_rule: one_success\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement\n command: archon-fix-issue\n depends_on: [bridge-artifacts]\n context: fresh\n model: opus[1m]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n command: archon-validate\n depends_on: [implement]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: CREATE DRAFT PR\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a draft pull request for the current branch.\n\n ## Context\n\n - **Issue**: $ARGUMENTS\n - **Classification**: $classify.output\n - **Issue title**: $classify.output.title\n\n ## Instructions\n\n 1. Check git status — ensure all changes are committed. If uncommitted changes exist, stage and commit them.\n 2. Push the branch: `git push -u origin HEAD`\n 3. Read implementation artifacts from `$ARTIFACTS_DIR/` for context:\n - `$ARTIFACTS_DIR/investigation.md` or `$ARTIFACTS_DIR/plan.md`\n - `$ARTIFACTS_DIR/implementation.md`\n - `$ARTIFACTS_DIR/validation.md`\n 4. Check if a PR already exists for this branch: `gh pr list --head $(git branch --show-current)`\n - If PR exists, skip creation and capture its number\n 5. Look for the project's PR template at `.github/pull_request_template.md`, `.github/PULL_REQUEST_TEMPLATE.md`, or `docs/PULL_REQUEST_TEMPLATE.md`. Read whichever one exists.\n 6. Create a DRAFT PR: `gh pr create --draft --base $BASE_BRANCH`\n - Title: concise, imperative mood, under 70 chars\n - Body: if a PR template was found, fill in **every section** with details from the artifacts. Don't skip sections or leave placeholders. If no template, write a body with summary, changes, validation evidence, and `Fixes #...`.\n - Link to issue: include `Fixes #...` or `Closes #...`\n 7. Capture PR identifiers:\n ```bash\n PR_NUMBER=$(gh pr view --json number -q '.number')\n echo \"$PR_NUMBER\" > \"$ARTIFACTS_DIR/.pr-number\"\n PR_URL=$(gh pr view --json url -q '.url')\n echo \"$PR_URL\" > \"$ARTIFACTS_DIR/.pr-url\"\n ```\n depends_on: [validate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: REVIEW\n # ═══════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [create-pr]\n context: fresh\n\n - id: review-classify\n prompt: |\n You are a PR review classifier. Analyze the PR scope and determine\n which review agents should run.\n\n ## PR Scope\n\n $review-scope.output\n\n ## Rules\n\n - **Code review**: ALWAYS run. This is mandatory for every PR. It also checks\n the PR against CLAUDE.md rules and project conventions.\n - **Error handling**: Run if the diff touches code with try/catch, error handling,\n async/await, or adds new failure paths.\n - **Test coverage**: Run if the diff touches source code (not just tests, docs, or config).\n - **Comment quality**: Run if the diff adds or modifies comments, docstrings, JSDoc,\n or significant documentation within code files.\n - **Docs impact**: Run if the diff adds/removes/renames public APIs, commands, CLI flags,\n environment variables, or user-facing features.\n\n Provide your reasoning for each decision.\n depends_on: [review-scope]\n model: haiku\n allowed_tools: []\n context: fresh\n output_format:\n type: object\n properties:\n run_code_review:\n type: string\n enum: [\"true\", \"false\"]\n run_error_handling:\n type: string\n enum: [\"true\", \"false\"]\n run_test_coverage:\n type: string\n enum: [\"true\", \"false\"]\n run_comment_quality:\n type: string\n enum: [\"true\", \"false\"]\n run_docs_impact:\n type: string\n enum: [\"true\", \"false\"]\n reasoning:\n type: string\n required:\n - run_code_review\n - run_error_handling\n - run_test_coverage\n - run_comment_quality\n - run_docs_impact\n - reasoning\n\n # Code review always runs — mandatory\n - id: code-review\n command: archon-code-review-agent\n depends_on: [review-classify]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_error_handling == 'true'\"\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_test_coverage == 'true'\"\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_comment_quality == 'true'\"\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_docs_impact == 'true'\"\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 8: SYNTHESIZE + SELF-FIX\n # ═══════════════════════════════════════════════════════════════\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n - id: self-fix\n command: archon-self-fix-all\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 9: SIMPLIFY\n # ═══════════════════════════════════════════════════════════════\n\n - id: simplify\n command: archon-simplify-changes\n depends_on: [self-fix]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 10: REPORT\n # ═══════════════════════════════════════════════════════════════\n\n - id: report\n command: archon-issue-completion-report\n depends_on: [simplify]\n context: fresh\n", + "archon-idea-to-pr": "name: archon-idea-to-pr\ndescription: |\n Use when: You have a feature idea or description and want end-to-end development.\n Input: Feature description in natural language, or path to a PRD file\n Output: PR ready for merge with comprehensive review completed\n\n Full workflow:\n 1. Create comprehensive implementation plan with codebase analysis\n 2. Setup branch and extract scope limits\n 3. Verify plan research is still valid\n 4. Implement all tasks with type-checking\n 5. Run full validation suite\n 6. Create PR with template, mark ready\n 7. Comprehensive code review (5 parallel agents with scope limit awareness)\n 8. Synthesize and fix review findings\n 9. Final summary with decision matrix -> GitHub comment + follow-up recommendations\n\n NOT for: Executing existing plans (use archon-plan-to-pr), quick fixes, standalone reviews.\n\nnodes:\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 0: CREATE PLAN\n # ═══════════════════════════════════════════════════════════════════\n\n - id: create-plan\n command: archon-create-plan\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 1: SETUP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: plan-setup\n command: archon-plan-setup\n depends_on: [create-plan]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 2: CONFIRM PLAN\n # ═══════════════════════════════════════════════════════════════════\n\n - id: confirm-plan\n command: archon-confirm-plan\n depends_on: [plan-setup]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 3: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-tasks\n command: archon-implement-tasks\n depends_on: [confirm-plan]\n context: fresh\n model: opus[1m]\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 4: VALIDATE\n # ═══════════════════════════════════════════════════════════════════\n\n - id: validate\n command: archon-validate\n depends_on: [implement-tasks]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 5: FINALIZE PR\n # ═══════════════════════════════════════════════════════════════════\n\n - id: finalize-pr\n command: archon-finalize-pr\n depends_on: [validate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 6: CODE REVIEW\n # ═══════════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [finalize-pr]\n context: fresh\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [review-scope]\n context: fresh\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n context: fresh\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 7: FIX REVIEW ISSUES\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 8: FINAL SUMMARY & FOLLOW-UP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: workflow-summary\n command: archon-workflow-summary\n depends_on: [implement-fixes]\n context: fresh\n", "archon-interactive-prd": "name: archon-interactive-prd\ndescription: |\n Use when: User wants to create a PRD through guided conversation.\n Triggers: \"create a prd\", \"new prd\", \"interactive prd\", \"plan a feature\",\n \"product requirements\", \"write a prd\".\n NOT for: Autonomous PRD generation without human input (use archon-ralph-generate).\n\n Interactive workflow that guides the user through problem-first PRD creation:\n 1. Understand the idea → ask foundation questions → wait for answers\n 2. Research market & codebase → ask deep dive questions → wait for answers\n 3. Assess technical feasibility → ask scope questions → wait for answers\n 4. Generate PRD → validate technical claims against codebase → output\n\nprovider: claude\ninteractive: true\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: INITIATE — Understand the idea\n # ═══════════════════════════════════════════════════════════════\n\n - id: initiate\n model: sonnet\n prompt: |\n You are a sharp product manager starting a PRD creation process.\n You think from first principles — start with primitives, not features.\n\n The user wants to build: $ARGUMENTS\n\n If the input is clear, restate your understanding in 2-3 sentences and confirm:\n \"I understand you want to build: {restated understanding}. Is this correct?\"\n\n If the input is vague or empty, ask:\n \"What do you want to build? Describe the product, feature, or capability.\"\n\n Then present the Foundation Questions (all at once — the user will answer in the next step):\n\n **Foundation Questions:**\n\n 1. **Who** has this problem? Be specific — not just \"users\" but what type of person/role?\n 2. **What** problem are they facing? Describe the observable pain, not the assumed need.\n 3. **Why** can't they solve it today? What alternatives exist and why do they fail?\n 4. **Why now?** What changed that makes this worth building?\n 5. **How** will you know if you solved it? What would success look like?\n\n Keep it conversational. Don't generate any PRD content yet.\n\n # ═══════════════════════════════════════════════════════════════\n # GATE 1: User answers foundation questions\n # ═══════════════════════════════════════════════════════════════\n\n - id: foundation-gate\n approval:\n message: \"Answer the foundation questions above. Your answers will guide the research phase.\"\n capture_response: true\n depends_on: [initiate]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: GROUNDING — Research market & codebase\n # ═══════════════════════════════════════════════════════════════\n\n - id: research\n model: sonnet\n prompt: |\n You are researching context for a PRD. Think from first principles —\n what already exists before proposing anything new.\n\n **The idea**: $ARGUMENTS\n\n **User's foundation answers**:\n $foundation-gate.output\n\n Research the landscape:\n\n 1. Search the web for similar products, competitors, and how others solve this problem\n 2. **Explore the codebase deeply** — find related existing functionality, APIs, UI components,\n database tables, and patterns. Read actual files, don't assume. Note exact file paths and\n what each file does.\n 3. Look for common patterns, anti-patterns, and recent trends\n\n **First principles rule**: Before suggesting anything new, verify what already exists.\n If there's an existing API endpoint, UI page, or component that partially solves the\n problem, note it explicitly. The best solution extends what exists, not replaces it.\n\n Present a summary to the user:\n\n **What I found:**\n - {Market insights — similar products, competitor approaches}\n - {What already exists in the codebase — specific files, endpoints, components}\n - {Key insight that might change the approach}\n\n Then ask the **Deep Dive Questions**:\n\n 1. **Vision**: In one sentence, what's the ideal end state if this succeeds wildly?\n 2. **Primary User**: Describe your most important user — their role, context, and what triggers their need.\n 3. **Job to Be Done**: Complete this: \"When [situation], I want to [motivation], so I can [outcome].\"\n 4. **Non-Users**: Who is explicitly NOT the target?\n 5. **Constraints**: What limitations exist? (time, budget, technical, regulatory)\n\n Does the research change or refine your thinking? Answer the deep dive questions.\n depends_on: [foundation-gate]\n\n # ═══════════════════════════════════════════════════════════════\n # GATE 2: User answers deep dive questions\n # ═══════════════════════════════════════════════════════════════\n\n - id: deepdive-gate\n approval:\n message: \"Answer the deep dive questions above (vision, primary user, JTBD, constraints). Add any adjustments from the research.\"\n capture_response: true\n depends_on: [research]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: TECHNICAL GROUNDING — Feasibility from what exists\n # ═══════════════════════════════════════════════════════════════\n\n - id: technical\n model: sonnet\n prompt: |\n You are assessing technical feasibility for a PRD.\n Think from first principles — start with what exists, not what you'd build from scratch.\n\n **The idea**: $ARGUMENTS\n **Foundation answers**: $foundation-gate.output\n **Deep dive answers**: $deepdive-gate.output\n\n **CRITICAL**: Explore the codebase by READING actual files. Do not guess or assume.\n For every claim you make about the codebase, cite the exact file and line.\n\n 1. **What already exists** that partially solves this problem?\n - Read existing API endpoints, DB queries, UI components\n - Note exact function names, table schemas, component names\n - What data is already being collected/stored?\n 2. **What's the smallest change** to the existing system that solves the core problem?\n - Prefer extending existing files over creating new ones\n - Prefer using existing endpoints over creating new ones\n - Prefer adding to existing UI pages over new pages\n 3. **What are the actual primitives** we need?\n - A new DB query? An existing one that needs a parameter?\n - A new component? Or an existing component that needs a prop?\n - A new endpoint? Or an existing endpoint that already returns the data?\n 4. **What's the risk?**\n - Where could this go wrong?\n - What assumptions need validation?\n\n Present a summary:\n\n **What Already Exists (verified by reading code):**\n - {endpoint/component/query} at `{file:line}` — {what it does}\n - {endpoint/component/query} at `{file:line}` — {what it does}\n\n **Smallest Change to Solve the Problem:**\n - {change 1}: {extend/modify} `{file}` — {what to do}\n - {change 2}: {extend/modify} `{file}` — {what to do}\n\n **Technical Context:**\n - Feasibility: {HIGH/MEDIUM/LOW} because {reason}\n - Key risk: {main concern}\n - Estimated phases: {rough breakdown}\n\n Then ask the **Scope Questions**:\n\n 1. **MVP Definition**: What's the absolute minimum to test if this works?\n 2. **Must Have vs Nice to Have**: What 2-3 things MUST be in v1? What can wait?\n 3. **Key Hypothesis**: Complete this: \"We believe [capability] will [solve problem] for [users]. We'll know we're right when [measurable outcome].\"\n 4. **Out of Scope**: What are you explicitly NOT building?\n 5. **Open Questions**: What uncertainties could change the approach?\n depends_on: [deepdive-gate]\n\n # ═══════════════════════════════════════════════════════════════\n # GATE 3: User answers scope questions\n # ═══════════════════════════════════════════════════════════════\n\n - id: scope-gate\n approval:\n message: \"Answer the scope questions above (MVP, must-haves, hypothesis, exclusions). This is the final input before PRD generation.\"\n capture_response: true\n depends_on: [technical]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: GENERATE — Write the PRD\n # ═══════════════════════════════════════════════════════════════\n\n - id: generate\n model: sonnet\n prompt: |\n You are generating a PRD from the user's guided inputs.\n\n **The idea**: $ARGUMENTS\n **Foundation answers**: $foundation-gate.output\n **Deep dive answers**: $deepdive-gate.output\n **Scope answers**: $scope-gate.output\n\n Generate a complete PRD file at `$ARTIFACTS_DIR/prds/{kebab-case-name}.prd.md`.\n\n First create the directory:\n ```bash\n mkdir -p $ARTIFACTS_DIR/prds\n ```\n\n **First principles rule**: Before writing the Technical Approach section, READ the\n actual codebase files you're referencing. Verify:\n - File paths exist\n - Function/component names are correct\n - API endpoints you reference actually exist (or note they need to be created)\n - DB table and column names match the schema\n - Event type names match the constants in the code\n\n The PRD must include ALL of these sections, filled from the user's answers:\n\n 1. **Problem Statement** — from foundation answers (who/what/why)\n 2. **Evidence** — from research findings and user's evidence\n 3. **Proposed Solution** — synthesized from all inputs. Prefer extending existing\n primitives over creating new ones.\n 4. **Key Hypothesis** — from scope answers\n 5. **What We're NOT Building** — from scope answers\n 6. **Success Metrics** — from foundation \"how will you know\" + scope\n 7. **Open Questions** — from scope answers\n 8. **Users & Context** — from deep dive (primary user, JTBD, non-users)\n 9. **Solution Detail** — MoSCoW table from scope must-haves, MVP definition\n 10. **Technical Approach** — from technical feasibility. MUST reference actual\n verified file paths, function names, and schemas. Mark anything unverified\n as \"needs verification\".\n 11. **Implementation Phases** — from technical breakdown, with status table\n and parallel opportunities\n 12. **Decisions Log** — key decisions made during the conversation\n\n **Rules:**\n - If info is missing, write \"TBD — needs research\" not filler\n - Be specific and concrete, not generic\n - Every file path in Technical Approach must be verified by reading the file\n - Prefer \"extend X\" over \"create new Y\" in implementation phases\n\n After writing the file, output the file path only — the validator will check it.\n depends_on: [scope-gate]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE — Check technical claims against codebase\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n model: sonnet\n prompt: |\n You are a technical validator checking a PRD for accuracy.\n\n Read the PRD file that was just generated. The generate node output the file path:\n $generate.output\n\n Find the PRD file — check `$ARTIFACTS_DIR/prds/` for the most recently created `.prd.md` file:\n ```bash\n ls -t $ARTIFACTS_DIR/prds/*.prd.md | head -1\n ```\n\n Read the entire PRD, then verify EVERY technical claim against the actual codebase:\n\n **Check 1: File paths** — For every file referenced in \"Technical Approach\" and\n \"Implementation Phases\", verify it exists. If it doesn't, note the correction.\n\n **Check 2: API endpoints** — For every endpoint mentioned, check if it already exists\n in `packages/server/src/routes/api.ts`. If it does, the PRD should say \"extend\" not \"create\".\n If the PRD proposes a new endpoint for data that an existing endpoint already returns,\n flag it.\n\n **Check 3: DB schemas** — For every table/column referenced, verify the actual names\n in the migration files or schema code. Check event type names against the\n `WORKFLOW_EVENT_TYPES` constant.\n\n **Check 4: UI components** — For every component referenced, verify it exists.\n If the PRD proposes a new page but an existing page already serves a similar purpose,\n flag it.\n\n **Check 5: Function/type names** — Verify function names, type names, and interface\n names are correct.\n\n After checking, if there are ANY corrections needed:\n 1. Edit the PRD file directly — fix incorrect names, paths, and references\n 2. Add a `## Validation Notes` section at the bottom documenting what was corrected\n\n If everything checks out, add:\n ```\n ## Validation Notes\n\n All technical references verified against codebase. No corrections needed.\n ```\n\n Output a summary of what was checked and corrected:\n\n ```\n ## PRD Validated\n\n **File**: `{prd-path}`\n **Checks**: {N} file paths, {N} endpoints, {N} DB references, {N} components\n **Corrections**: {count}\n {list corrections if any}\n\n To start implementation: `/prp-plan {prd-path}`\n ```\n depends_on: [generate]\n", "archon-issue-review-full": "name: archon-issue-review-full\ndescription: |\n Use when: User wants a FULL, COMPREHENSIVE fix + review pipeline for a GitHub issue.\n Triggers: \"full review\", \"comprehensive fix\", \"fix with full review\", \"deep review\", \"issue review full\".\n NOT for: Simple issue fixes (use archon-fix-github-issue instead),\n questions about issues, CI failures, PR reviews, general exploration.\n\n Full workflow:\n 1. Investigate issue -> root cause analysis, implementation plan\n 2. Implement fix -> code changes, tests, PR creation\n 3. Comprehensive review -> 5 parallel agents with scope awareness\n 4. Fix review issues -> address CRITICAL/HIGH findings\n 5. Final summary -> decision matrix, follow-up recommendations\n\nnodes:\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 1: INVESTIGATE\n # ═══════════════════════════════════════════════════════════════════\n\n - id: investigate\n command: archon-investigate-issue\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 2: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement\n command: archon-implement-issue\n depends_on: [investigate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 3: CODE REVIEW\n # ═══════════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [implement]\n context: fresh\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [review-scope]\n context: fresh\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n context: fresh\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 4: FIX REVIEW ISSUES\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 5: FINAL SUMMARY\n # ═══════════════════════════════════════════════════════════════════\n\n - id: summary\n command: archon-workflow-summary\n depends_on: [implement-fixes]\n context: fresh\n", - "archon-piv-loop": "name: archon-piv-loop\ndescription: |\n Use when: User wants guided Plan-Implement-Validate development with human-in-the-loop.\n Triggers: \"piv\", \"piv loop\", \"plan implement validate\", \"guided development\",\n \"structured development\", \"build a feature\", \"develop with review\".\n NOT for: Autonomous implementation without planning (use archon-feature-development).\n NOT for: PRD creation (use archon-interactive-prd).\n NOT for: Ralph story-based implementation (use archon-ralph-dag).\n\n Interactive PIV loop workflow — the foundational AI coding methodology:\n 1. EXPLORE: Iterative conversation with human to understand the problem (arbitrary rounds)\n 2. PLAN: Create structured plan -> iterative review & revision (arbitrary rounds)\n 3. IMPLEMENT: Autonomous task-by-task implementation from plan (Ralph loop)\n 4. VALIDATE: Automated code review -> iterative human feedback & fixes (arbitrary rounds)\n\n The PIV loop comes AFTER a PRD exists. Each PIV loop focuses on ONE granular feature or bug fix.\n Input: A description of what to build, a path to an existing plan, or a GitHub issue number.\n\nprovider: claude\ninteractive: true\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: EXPLORE — Iterative exploration with human\n # Understand the idea, explore the codebase, converge on approach\n # Loops until the user says they're ready to create the plan.\n # ═══════════════════════════════════════════════════════════════\n\n - id: explore\n loop:\n prompt: |\n # PIV Loop — Exploration\n\n You are a senior engineering partner in an iterative exploration session.\n Your goal: DEEPLY UNDERSTAND what to build before any code is written.\n\n **User's request**: $ARGUMENTS\n **User's latest input**: $LOOP_USER_INPUT\n\n ---\n\n ## If this is the FIRST iteration (no user input yet):\n\n ### Step 1: Parse the Input\n\n Determine what the user provided:\n\n **If it's a file path** (ends in `.md`, `.plan.md`, or `.prd.md`):\n - Read the file\n - If it's an existing plan → summarize it and ask if they want to refine or proceed\n - If it's a PRD → identify the specific phase/feature to focus on\n\n **If it's a GitHub issue** (`#123` format):\n - Fetch it: `gh issue view {number} --json title,body,labels,comments`\n - Summarize the issue context\n\n **If it's free text**:\n - This is a feature idea or bug description. Use it directly.\n\n ### Step 2: Explore the Codebase\n\n Before asking questions, DO YOUR HOMEWORK:\n\n 1. **Read CLAUDE.md** — understand project conventions, architecture, and constraints\n 2. **Search for related code** — find existing implementations similar to what the user wants\n 3. **Read key files** — understand the current state of code the user wants to change\n 4. **Check recent git history** — `git log --oneline -20` for recent changes in the area\n\n ### Step 3: Present Your Understanding\n\n ```\n ## What I Understand\n\n You want to: {restated understanding in 2-3 sentences}\n\n ## What Already Exists\n\n - {file:line} — {what it does and how it relates}\n - {file:line} — {what it does and how it relates}\n - {pattern/component} — {how it could be extended or reused}\n\n ## Initial Architecture Thoughts\n\n Based on what exists, I'm thinking:\n - {approach 1 — extend existing X}\n - {approach 2 — if approach 1 doesn't work}\n - {key architectural decision that needs your input}\n ```\n\n ### Step 4: Ask Targeted Questions\n\n Ask 4-6 questions focused on DECISIONS, not information gathering:\n - Scope boundaries, architecture preferences, tech decisions\n - Constraints, existing code extension vs fresh build, testing expectations\n - Reference actual code you found — don't ask generic questions\n\n ---\n\n ## If the user has provided input (subsequent iterations):\n\n ### Step 1: Process Their Response\n\n Read their answers carefully. Identify:\n - Decisions they've made\n - Areas they want you to explore further\n - Questions they asked YOU back (answer these with evidence!)\n\n ### Step 2: Do Targeted Research\n\n Based on their response:\n - If they mentioned specific technologies → research best practices\n - If they pointed you to specific code → read it thoroughly\n - If they asked you to explore an area → do a thorough investigation\n - If they made architecture decisions → validate against the codebase\n\n ### Step 3: Present Updated Understanding\n\n Show what you learned, answer their questions with file:line references,\n and present your refined architecture recommendation.\n\n ### Step 4: Converge or Continue\n\n **If there are still important open questions:**\n Ask 2-4 focused questions about remaining ambiguities.\n\n **If the picture is clear and you have enough to create a plan:**\n Present a final implementation summary:\n\n ```\n ## Implementation Summary\n\n ### What We're Building\n {Clear, specific description}\n\n ### Scope Boundary\n - IN: {what's included}\n - OUT: {what's explicitly excluded}\n\n ### Architecture\n - {key decisions}\n\n ### Files That Will Change\n - `{file}` — {what changes and why}\n\n ### Success Criteria\n - [ ] {specific, testable criterion}\n - [ ] All validation passes\n\n ### Key Risks\n - {risk — and mitigation}\n ```\n\n Then tell the user: \"I have a clear picture. Say **ready** and I'll create\n the structured implementation plan, or share any final thoughts.\"\n\n **CRITICAL — READ THIS CAREFULLY**:\n - NEVER output PLAN_READY unless the user's LATEST message contains\n an EXPLICIT phrase like \"ready\", \"create the plan\", \"let's go\", \"proceed\", or \"I'm done\".\n - If the user asked a question → do NOT emit the signal. Answer the question.\n - If the user gave feedback or requested changes → do NOT emit the signal. Address it.\n - If the user said \"also check X\" or \"one more thing\" → do NOT emit the signal. Explore it.\n - If you are unsure whether the user is approving → do NOT emit the signal. Ask them.\n - The ONLY correct time to emit the signal is when the user's message CLEARLY means\n \"stop exploring, I'm ready for you to create the plan.\"\n until: PLAN_READY\n max_iterations: 15\n interactive: true\n gate_message: |\n Answer the questions above, ask me to explore specific areas,\n or say \"ready\" when you're satisfied with the exploration.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: PLAN — Create the structured implementation plan\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-plan\n model: sonnet\n depends_on: [explore]\n context: fresh\n prompt: |\n # PIV Loop — Create Structured Plan\n\n You are creating a structured implementation plan from a completed exploration phase.\n This plan will be the SOLE GUIDE for the implementation agent — it must be complete,\n specific, and actionable.\n\n **Original request**: $ARGUMENTS\n **Final exploration summary**: $explore.output\n\n ---\n\n ## Step 1: Read the Codebase (Again)\n\n Before writing the plan, verify your understanding is current:\n\n 1. **Read CLAUDE.md** — capture all relevant conventions\n 2. **Read every file you plan to change** — note exact current state\n 3. **Read example test files** — understand testing patterns\n 4. **Check for any recent changes** — `git log --oneline -10`\n\n ## Step 2: Determine Plan Location\n\n Generate a kebab-case slug from the feature name.\n Save to `.claude/archon/plans/{slug}.plan.md`.\n\n ```bash\n mkdir -p .claude/archon/plans\n ```\n\n ## Step 3: Write the Plan\n\n Use this template. Fill EVERY section with specific, verified information.\n\n ```markdown\n # Feature: {Title}\n\n ## Summary\n {1-2 sentences: what changes and why}\n\n ## Mission\n {The core goal in one clear statement}\n\n ## Success Criteria\n - [ ] {Specific, testable criterion}\n - [ ] All validation passes (`bun run validate` or equivalent)\n - [ ] No regressions in existing tests\n\n ## Scope\n ### In Scope\n - {What we ARE building}\n ### Out of Scope\n - {What we are NOT building — and why}\n\n ## Codebase Context\n ### Key Files\n | File | Role | Action |\n |------|------|--------|\n | `{path}` | {what it does} | CREATE / UPDATE |\n\n ### Patterns to Follow\n {Actual code snippets from the codebase to mirror}\n\n ## Architecture\n - {Decision 1 — with rationale}\n - {Decision 2 — with rationale}\n\n ## Task List\n Execute in order. Each task is atomic and independently verifiable.\n\n ### Task 1: {ACTION} `{file path}`\n **Action**: CREATE / UPDATE\n **Details**: {Exact changes — specific enough for an agent with no context}\n **Pattern**: Follow `{source file}:{lines}`\n **Validate**: `{command to verify this task}`\n\n ## Testing Strategy\n | Test File | Test Cases | Validates |\n |-----------|-----------|-----------|\n | `{path}` | {cases} | {what it validates} |\n\n ## Validation Commands\n 1. Type check: `{command}`\n 2. Lint: `{command}`\n 3. Tests: `{command}`\n 4. Full validation: `{command}`\n\n ## Risks\n | Risk | Impact | Mitigation |\n |------|--------|------------|\n | {risk} | {HIGH/MED/LOW} | {specific mitigation} |\n ```\n\n ## Step 4: Verify the Plan\n\n 1. Check every file path referenced — verify they exist\n 2. Check every pattern cited — verify the code matches\n 3. Check task ordering — ensure dependencies are respected\n 4. Check completeness — could an agent with NO context implement this?\n\n ## Step 5: Report\n\n ```\n ## Plan Created\n\n **File**: `.claude/archon/plans/{slug}.plan.md`\n **Tasks**: {count}\n **Files to change**: {count}\n\n Key decisions:\n - {decision 1}\n - {decision 2}\n\n Please review the plan and provide feedback.\n ```\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2b: PLAN — Iterative plan refinement\n # Review and revise the plan as many times as needed.\n # ═══════════════════════════════════════════════════════════════\n\n - id: refine-plan\n depends_on: [create-plan]\n loop:\n prompt: |\n # PIV Loop — Plan Refinement\n\n The user is reviewing the implementation plan and providing feedback.\n\n **User's feedback**: $LOOP_USER_INPUT\n\n ---\n\n ## Step 1: Find and Read the Plan\n\n ```bash\n ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1\n ```\n\n Read the entire plan file. Also read CLAUDE.md for conventions.\n\n ## Step 2: Process Feedback\n\n **If there is no user feedback yet** (first iteration, $LOOP_USER_INPUT is empty):\n - Read the plan carefully\n - Present a summary of the plan's key decisions and task list\n - Ask the user to review and provide feedback\n - Do NOT emit the completion signal on the first iteration\n\n **If the user EXPLICITLY approved** (said \"approved\", \"looks good\", \"let's go\", etc.):\n - Make no changes\n - Output: \"Plan approved. Proceeding to implementation.\"\n - Signal completion: PLAN_APPROVED\n\n **If the user provided specific feedback:**\n - Parse each piece of feedback\n - Edit the plan file directly:\n - Add/remove/modify tasks as requested\n - Update success criteria if needed\n - Adjust testing strategy if needed\n - Re-verify file paths and patterns after changes\n\n **CRITICAL**: NEVER emit PLAN_APPROVED unless the user's latest\n message EXPLICITLY says \"approved\", \"looks good\", \"ship it\", or similar approval.\n Questions, feedback, and requests for changes are NOT approval.\n\n ## Step 3: Show Changes\n\n ```\n ## Plan Revised\n\n Changes made:\n - {change 1}\n - {change 2}\n\n Updated stats:\n - Tasks: {count}\n - Files to change: {count}\n\n Review the updated plan and provide more feedback, or say \"approved\" to proceed.\n ```\n until: PLAN_APPROVED\n max_iterations: 10\n interactive: true\n gate_message: |\n Review the plan document. Provide specific feedback on what to change,\n or say \"approved\" to begin implementation.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: IMPLEMENT — Setup\n # Read the plan, prepare the environment\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement-setup\n depends_on: [refine-plan]\n bash: |\n set -e\n\n PLAN_FILE=$(ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1)\n\n if [ -z \"$PLAN_FILE\" ]; then\n echo \"ERROR: No plan file found in .claude/archon/plans/\"\n exit 1\n fi\n\n # Install dependencies if needed\n if [ -f \"bun.lock\" ] || [ -f \"bun.lockb\" ]; then\n echo \"Installing dependencies...\"\n bun install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"package-lock.json\" ]; then\n npm ci 2>&1 | tail -3\n elif [ -f \"yarn.lock\" ]; then\n yarn install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"pnpm-lock.yaml\" ]; then\n pnpm install --frozen-lockfile 2>&1 | tail -3\n fi\n\n echo \"BRANCH=$(git branch --show-current)\"\n echo \"GIT_ROOT=$(git rev-parse --show-toplevel)\"\n echo \"PLAN_FILE=$PLAN_FILE\"\n\n echo \"=== PLAN_START ===\"\n cat \"$PLAN_FILE\"\n echo \"\"\n echo \"=== PLAN_END ===\"\n\n TASK_COUNT=$(grep -c \"^### Task [0-9]\" \"$PLAN_FILE\" || true)\n echo \"TASK_COUNT=${TASK_COUNT:-0}\"\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3b: IMPLEMENT — Task-by-Task Loop (Ralph pattern)\n # Fresh context each iteration. Reads plan from disk.\n # One task per iteration. Validates before committing.\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement\n depends_on: [implement-setup]\n idle_timeout: 600000\n model: claude-opus-4-6[1m]\n loop:\n prompt: |\n # PIV Loop — Implementation Agent\n\n You are an autonomous coding agent in a FRESH session — no memory of previous iterations.\n Your job: Read the plan from disk, implement ONE task, validate, commit, update tracking, exit.\n\n **Golden Rule**: If validation fails, fix it before committing. Never commit broken code.\n\n ---\n\n ## Phase 0: CONTEXT — Load State\n\n The setup node produced this context:\n\n $implement-setup.output\n\n **User's original request**: $USER_MESSAGE\n\n ---\n\n ### 0.1 Parse Plan File\n\n Extract the `PLAN_FILE=...` line from the context above.\n\n ### 0.2 Read Current State (from disk — not from context above)\n\n The context above is a snapshot from before the loop started. Previous iterations\n may have changed things. **You MUST re-read from disk:**\n\n 1. **Read the plan file** — your implementation guide\n 2. **Read progress tracking** — check if `.claude/archon/plans/progress.txt` exists\n 3. **Read CLAUDE.md** — project conventions and constraints\n\n ### 0.3 Check Git State\n\n ```bash\n git log --oneline -10\n git status\n ```\n\n ---\n\n ## Phase 1: SELECT — Pick Next Task\n\n From the plan file, identify tasks by `### Task N:` headers.\n Cross-reference with commits from previous iterations and progress tracking.\n\n **If ALL tasks are complete** → Skip to Phase 5 (Completion).\n\n ### Announce Selection\n\n ```\n -- Task Selected ------------------------------------------------\n Task: {N} — {task title}\n Action: {CREATE / UPDATE}\n File: {file path}\n -----------------------------------------------------------------\n ```\n\n ---\n\n ## Phase 2: IMPLEMENT — Execute the Task\n\n 1. Read the file you're about to change (if it exists)\n 2. Read the pattern file referenced in the plan\n 3. Make changes following the plan EXACTLY\n 4. Type-check after each file: `bun run type-check 2>&1 || true`\n\n ---\n\n ## Phase 3: VALIDATE — Verify the Task\n\n ```bash\n bun run type-check && bun run lint && bun run test && bun run format:check\n ```\n\n If validation fails: fix, re-run (up to 3 attempts). If unfixable, note in progress\n tracking and do NOT commit broken code.\n\n ---\n\n ## Phase 4: COMMIT — Save Changes\n\n ```bash\n git add -A\n git diff --cached --stat\n git commit -m \"$(cat <<'EOF'\n {type}: {task description}\n\n PIV Task {N}: {brief details}\n EOF\n )\"\n ```\n\n Track progress in `.claude/archon/plans/progress.txt`:\n ```\n ## Task {N}: {title} — COMPLETED\n Date: {ISO date}\n Files: {list}\n Commit: {short hash}\n ---\n ```\n\n ---\n\n ## Phase 5: COMPLETE — Check All Tasks\n\n If ALL tasks are done:\n 1. Run full validation: `bun run validate 2>&1`\n 2. Push: `git push -u origin HEAD`\n 3. Signal: `COMPLETE`\n\n If tasks remain, report status and end normally. The loop engine starts a fresh iteration.\n until: COMPLETE\n max_iterations: 15\n fresh_context: true\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: VALIDATE — Automated code review\n # Review all changes against the plan\n # ═══════════════════════════════════════════════════════════════\n\n - id: code-review\n model: sonnet\n depends_on: [implement]\n context: fresh\n prompt: |\n # PIV Loop — Automated Code Review\n\n The implementation phase is complete. Review ALL changes against the plan.\n\n **Implementation output**: $implement.output\n\n ---\n\n ## Step 1: Find and Read the Plan\n\n ```bash\n ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1\n ```\n\n ## Step 2: Review All Changes\n\n ```bash\n git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD\n git diff $BASE_BRANCH..HEAD --stat\n git diff $BASE_BRANCH..HEAD\n ```\n\n ## Step 3: Check Against Plan\n\n For EACH task: was it implemented correctly? Do success criteria hold?\n For EACH file: check quality, security, patterns, CLAUDE.md compliance.\n\n ## Step 4: Run Validation\n\n ```bash\n bun run validate 2>&1 || (bun run type-check && bun run lint && bun run test && bun run format:check)\n ```\n\n ## Step 5: Fix Obvious Issues\n\n Fix type errors, lint warnings, missing imports, formatting. Commit any fixes:\n ```bash\n git add -A && git commit -m \"fix: address code review findings\" 2>/dev/null || true\n ```\n\n ## Step 6: Present Review\n\n ```\n ## Code Review Complete\n\n ### Implementation Status\n | Task | Status | Notes |\n |------|--------|-------|\n | {task} | DONE / PARTIAL / MISSING | {notes} |\n\n ### Validation Results\n - Type-check: PASS / FAIL\n - Lint: PASS / FAIL\n - Tests: PASS / FAIL\n - Format: PASS / FAIL\n\n ### Code Quality Findings\n {Issues found, or \"No issues found.\"}\n\n ### Recommendation\n {READY FOR REVIEW / NEEDS FIXES}\n ```\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4b: VALIDATE — Iterative human feedback & fixes\n # The user tests the implementation and provides feedback.\n # Loops until the user approves.\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-feedback\n depends_on: [code-review]\n loop:\n prompt: |\n # PIV Loop — Address Validation Feedback\n\n The human has reviewed the implementation and provided feedback.\n\n **Human's feedback**: $LOOP_USER_INPUT\n\n ---\n\n ## Step 1: Read Context\n\n ```bash\n ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1\n ```\n\n Read the plan file and CLAUDE.md for conventions.\n\n ## Step 2: Process Feedback\n\n **If there is no user feedback yet** (first iteration, $LOOP_USER_INPUT is empty):\n - Present the code review results and ask the user to test the implementation\n - Do NOT emit the completion signal on the first iteration\n\n **If the user EXPLICITLY approved** (said \"approved\", \"looks good\", \"ship it\", etc.):\n - Output: \"Implementation approved!\"\n - Signal: VALIDATED\n\n **CRITICAL**: NEVER emit VALIDATED unless the user's latest\n message EXPLICITLY says \"approved\", \"looks good\", \"ship it\", or similar approval.\n\n **If the user provided specific feedback:**\n 1. Read the relevant files\n 2. Understand each issue\n 3. Make the fixes\n 4. Type-check after each change\n\n ## Step 3: Full Validation\n\n ```bash\n bun run validate 2>&1 || (bun run type-check && bun run lint && bun run test && bun run format:check)\n ```\n\n ## Step 4: Commit Fixes\n\n ```bash\n git add -A\n git commit -m \"$(cat <<'EOF'\n fix: address review feedback\n\n Changes:\n - {fix 1}\n - {fix 2}\n EOF\n )\"\n ```\n\n ## Step 5: Report\n\n ```\n ## Feedback Addressed\n\n Changes made:\n - {fix 1}\n - {fix 2}\n\n Validation: {PASS / FAIL with details}\n\n Review again, or say \"approved\" to finalize.\n ```\n until: VALIDATED\n max_iterations: 10\n interactive: true\n gate_message: |\n Test the implementation yourself and review the code changes.\n Provide specific feedback on what needs fixing, or say \"approved\" to finalize.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: FINALIZE — Push, create PR, generate summary\n # ═══════════════════════════════════════════════════════════════\n\n - id: finalize\n model: sonnet\n depends_on: [fix-feedback]\n context: fresh\n prompt: |\n # PIV Loop — Finalize\n\n The implementation has been approved. Push changes and create a PR.\n\n ---\n\n ## Step 1: Push Changes\n\n ```bash\n git push -u origin HEAD 2>&1 || true\n ```\n\n ## Step 2: Generate Summary\n\n ```bash\n git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD\n git diff --stat $(git merge-base HEAD $BASE_BRANCH)..HEAD\n ```\n\n Read the plan file and progress tracking for context.\n\n ## Step 3: Create PR (if not already created)\n\n ```bash\n gh pr view HEAD --json url 2>/dev/null || echo \"NO_PR\"\n ```\n\n If no PR exists:\n\n ```bash\n cat .github/pull_request_template.md 2>/dev/null || echo \"NO_TEMPLATE\"\n ```\n\n Create with `gh pr create --draft --base $BASE_BRANCH`:\n - Title from the plan's feature name\n - Body summarizing the implementation\n - Use a HEREDOC for the body\n\n ## Step 4: Output Summary\n\n ```\n ===============================================================\n PIV LOOP — COMPLETE\n ===============================================================\n\n Feature: {from plan}\n Plan: {plan file path}\n Branch: {branch name}\n PR: {url}\n\n -- Tasks Completed -----------------------------------------------\n {list from progress tracking}\n\n -- Commits -------------------------------------------------------\n {git log output}\n\n -- Files Changed -------------------------------------------------\n {git diff --stat output}\n\n -- Validation ----------------------------------------------------\n All checks passed.\n ===============================================================\n ```\n", - "archon-plan-to-pr": "name: archon-plan-to-pr\ndescription: |\n Use when: You have an existing implementation plan and want to execute it end-to-end.\n Input: Path to a plan file ($ARTIFACTS_DIR/plan.md or .agents/plans/*.md)\n Output: PR ready for merge with comprehensive review completed\n\n Full workflow:\n 1. Read plan, setup branch, extract scope limits\n 2. Verify plan research is still valid\n 3. Implement all tasks with type-checking\n 4. Run full validation suite\n 5. Create PR with template, mark ready\n 6. Comprehensive code review (5 parallel agents with scope limit awareness)\n 7. Synthesize and fix review findings\n 8. Final summary with decision matrix -> GitHub comment + follow-up recommendations\n\n NOT for: Creating plans from scratch (use archon-idea-to-pr), quick fixes, standalone reviews.\n\nnodes:\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 1: SETUP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: plan-setup\n command: archon-plan-setup\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 2: CONFIRM PLAN\n # ═══════════════════════════════════════════════════════════════════\n\n - id: confirm-plan\n command: archon-confirm-plan\n depends_on: [plan-setup]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 3: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-tasks\n command: archon-implement-tasks\n depends_on: [confirm-plan]\n context: fresh\n model: claude-opus-4-6[1m]\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 4: VALIDATE\n # ═══════════════════════════════════════════════════════════════════\n\n - id: validate\n command: archon-validate\n depends_on: [implement-tasks]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 5: FINALIZE PR\n # ═══════════════════════════════════════════════════════════════════\n\n - id: finalize-pr\n command: archon-finalize-pr\n depends_on: [validate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 6: CODE REVIEW\n # ═══════════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [finalize-pr]\n context: fresh\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [review-scope]\n context: fresh\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n context: fresh\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 7: FIX REVIEW ISSUES\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 8: FINAL SUMMARY & FOLLOW-UP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: workflow-summary\n command: archon-workflow-summary\n depends_on: [implement-fixes]\n context: fresh\n", - "archon-ralph-dag": "name: archon-ralph-dag\ndescription: |\n Use when: User wants to run a Ralph implementation loop.\n Triggers: \"ralph\", \"run ralph\", \"ralph dag\", \"run ralph dag\".\n\n DAG workflow that:\n 1. Detects input: existing prd.json, existing prd.md (needs stories), or raw idea\n 2. Generates prd.md + prd.json if needed (explores codebase, breaks into stories)\n 3. Validates PRD files, reads project context, installs dependencies\n 4. Runs Ralph loop (fresh context per iteration) implementing one story per iteration\n 5. Creates PR and reports completion\n\n Accepts: An idea description, a path to an existing prd.md, or a directory with prd.md + prd.json\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # NODE 1: DETECT INPUT\n # Determines what the user provided: full PRD, partial PRD, or idea\n # ═══════════════════════════════════════════════════════════════\n\n - id: detect-input\n model: haiku\n prompt: |\n # Detect Ralph Input\n\n **User input**: $ARGUMENTS\n\n Determine what the user provided and prepare the PRD directory. Follow these steps exactly:\n\n ## Step 1: Detect worktree\n\n Run `git worktree list --porcelain` to check if you're in a worktree.\n If you see multiple entries, you ARE in a worktree. The first entry (the one without \"branch\" pointing to your current branch) is the **main repo root**. Save it — you'll need it to find files.\n\n ## Step 2: Classify the input\n\n Look at the user input above. It's one of three things:\n\n **Case A — Ralph directory path** (contains `.archon/ralph/`):\n Extract the directory. Check if both `prd.json` and `prd.md` exist there (try locally first, then in the main repo root if in a worktree).\n\n **Case B — File path** (ends in `.md`):\n This is an external PRD file. Find it:\n 1. Try the path as-is (relative to cwd)\n 2. Try it as an absolute path\n 3. If in a worktree, try it relative to the **main repo root** from Step 1\n Once found, read the file to confirm it's a PRD.\n\n **Case C — Free text**:\n Not a file path — it's a feature idea.\n\n ## Step 3: Auto-discover existing ralph PRDs\n\n If the input didn't point to a specific path, check if `.archon/ralph/` contains any `prd.json` files:\n ```bash\n find .archon/ralph -name \"prd.json\" -type f 2>/dev/null\n ```\n\n ## Step 4: Take action based on classification\n\n **If Case A and both files exist** → output `ready` (no further action needed)\n\n **If Case B (external PRD found)**:\n 1. Derive a kebab-case slug from the PRD filename or title (e.g., `workflow-lifecycle-overhaul`)\n 2. Create the ralph directory: `mkdir -p .archon/ralph/{slug}`\n 3. Copy the PRD content to `.archon/ralph/{slug}/prd.md`\n 4. Output `external_prd` with the new prd_dir\n\n **If Case C or auto-discovered ralph dir has prd.md but no prd.json** → output `needs_generation`\n\n ## Output\n\n Your final output MUST be exactly one JSON object:\n ```json\n {\"input_type\": \"ready|external_prd|needs_generation\", \"prd_dir\": \".archon/ralph/{slug}\"}\n ```\n output_format:\n type: object\n properties:\n input_type:\n type: string\n enum: [ready, external_prd, needs_generation]\n prd_dir:\n type: string\n required: [input_type, prd_dir]\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 2: GENERATE PRD\n # Scenario 1: User has an idea → generate prd.md + prd.json\n # Scenario 2: User has prd.md → generate prd.json with stories\n # Skipped if prd.json already exists\n # ═══════════════════════════════════════════════════════════════\n\n - id: generate-prd\n depends_on: [detect-input]\n when: \"$detect-input.output.input_type != 'ready'\"\n command: archon-ralph-generate\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 3: VALIDATE & SETUP\n # Finds PRD directory, reads all state files, installs deps,\n # verifies the environment is ready for implementation.\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate-prd\n depends_on: [detect-input, generate-prd]\n trigger_rule: one_success\n bash: |\n set -e\n\n # ── 1. Find PRD directory (passed from detect-input) ──────\n PRD_DIR=$detect-input.output.prd_dir\n\n # If detect-input didn't know the PRD dir (generated from scratch), discover it\n if [ -z \"$PRD_DIR\" ] || [ ! -f \"$PRD_DIR/prd.json\" ]; then\n FOUND=$(find .archon/ralph -name \"prd.json\" -type f 2>/dev/null | head -1)\n if [ -n \"$FOUND\" ]; then\n PRD_DIR=$(dirname \"$FOUND\")\n fi\n fi\n\n if [ -z \"$PRD_DIR\" ] || [ ! -f \"$PRD_DIR/prd.json\" ]; then\n echo \"ERROR: No prd.json found after generation step.\"\n echo \"Check the generate-prd node output for errors.\"\n exit 1\n fi\n\n if [ ! -f \"$PRD_DIR/prd.md\" ]; then\n echo \"ERROR: prd.md not found in $PRD_DIR\"\n exit 1\n fi\n\n # ── 2. Install dependencies (worktrees lack node_modules) ──\n if [ -f \"bun.lock\" ] || [ -f \"bun.lockb\" ]; then\n echo \"Installing dependencies (bun)...\"\n bun install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"package-lock.json\" ]; then\n echo \"Installing dependencies (npm)...\"\n npm ci 2>&1 | tail -3\n elif [ -f \"yarn.lock\" ]; then\n echo \"Installing dependencies (yarn)...\"\n yarn install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"pnpm-lock.yaml\" ]; then\n echo \"Installing dependencies (pnpm)...\"\n pnpm install --frozen-lockfile 2>&1 | tail -3\n fi\n\n # ── 3. Git state ──────────────────────────────────────────\n echo \"BRANCH=$(git branch --show-current)\"\n echo \"GIT_ROOT=$(git rev-parse --show-toplevel)\"\n\n # ── 4. Output PRD context ─────────────────────────────────\n echo \"PRD_DIR=$PRD_DIR\"\n echo \"=== PRD_JSON_START ===\"\n cat \"$PRD_DIR/prd.json\"\n echo \"\"\n echo \"=== PRD_JSON_END ===\"\n echo \"=== PRD_MD_START ===\"\n cat \"$PRD_DIR/prd.md\"\n echo \"\"\n echo \"=== PRD_MD_END ===\"\n echo \"=== PROGRESS_START ===\"\n if [ -f \"$PRD_DIR/progress.txt\" ]; then\n cat \"$PRD_DIR/progress.txt\"\n else\n echo \"(no progress yet)\"\n fi\n echo \"\"\n echo \"=== PROGRESS_END ===\"\n\n # ── 5. Summary ────────────────────────────────────────────\n TOTAL=$(grep -c '\"passes\"' \"$PRD_DIR/prd.json\" || true)\n DONE=$(grep -c '\"passes\": true' \"$PRD_DIR/prd.json\" || true)\n TOTAL=${TOTAL:-0}\n DONE=${DONE:-0}\n echo \"STORIES_TOTAL=$TOTAL\"\n echo \"STORIES_DONE=$DONE\"\n echo \"STORIES_REMAINING=$(( TOTAL - DONE ))\"\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 4: RALPH IMPLEMENTATION LOOP\n # Fresh context each iteration. Reads PRD state from disk.\n # One story per iteration. Validates before committing.\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement\n depends_on: [validate-prd]\n idle_timeout: 600000\n model: claude-opus-4-6[1m]\n loop:\n prompt: |\n # Ralph Agent — Autonomous Story Implementation\n\n You are an autonomous coding agent in a FRESH session — you have no memory of previous iterations.\n Your job: Read state from disk, implement ONE story, validate, commit, update tracking, exit.\n\n **Golden Rule**: If validation fails, fix it before committing. Never commit broken code. Never skip validation.\n\n ---\n\n ## Phase 0: CONTEXT — Load Project State\n\n The upstream setup node produced this context:\n\n $validate-prd.output\n\n **User message**: $USER_MESSAGE\n\n ---\n\n ### 0.1 Parse PRD Directory\n\n Extract the `PRD_DIR=...` line from the context above. This is the directory containing your PRD files.\n Store this path — use it for ALL file operations below.\n\n ### 0.2 Read Current State (from disk, not from context above)\n\n The context above is a snapshot from before the loop started. Previous iterations may have changed files.\n **You MUST re-read from disk to get the current state:**\n\n 1. **Read `{prd-dir}/progress.txt`** — your only link to previous iterations\n - Check the `## Codebase Patterns` section FIRST for learnings from prior iterations\n - Check recent entries for gotchas to avoid\n 2. **Read `{prd-dir}/prd.json`** — the source of truth for story completion state\n 3. **Read `{prd-dir}/prd.md`** — full requirements, technical patterns, acceptance criteria\n\n ### 0.3 Read Project Rules\n\n ```bash\n cat CLAUDE.md\n ```\n\n Note all coding standards, patterns, and rules. Follow them exactly.\n\n **PHASE_0_CHECKPOINT:**\n - [ ] PRD directory identified\n - [ ] progress.txt read (or noted as absent)\n - [ ] prd.json read — know which stories pass/fail\n - [ ] prd.md read — understand requirements\n - [ ] CLAUDE.md rules noted\n\n ---\n\n ## Phase 1: SELECT — Pick Next Story\n\n ### 1.1 Find Eligible Story\n\n From `prd.json`, find the **highest priority** story where:\n - `passes` is `false`\n - ALL stories in `dependsOn` have `passes: true`\n\n **If ALL stories have `passes: true`** → Skip to Phase 6 (Completion).\n\n **If no eligible stories exist** (all remaining are blocked):\n ```\n BLOCKED: No eligible stories. Remaining stories and their blockers:\n - {story-id}: blocked by {dep-id} (passes: false)\n ```\n End normally. The loop will terminate on max_iterations.\n\n ### 1.2 Announce Selection\n\n ```\n ── Story Selected ──────────────────────────────────\n ID: {story-id}\n Title: {story-title}\n Priority: {priority}\n Dependencies: {deps or \"none\"}\n\n Acceptance Criteria:\n - {criterion 1}\n - {criterion 2}\n - ...\n ────────────────────────────────────────────────────\n ```\n\n After announcing the selected story, emit the story started event:\n ```bash\n bun run cli workflow event emit --run-id $WORKFLOW_ID --type ralph_story_started --data '{\"story_id\":\"{story-id}\",\"title\":\"{story-title}\"}' || true\n ```\n\n **PHASE_1_CHECKPOINT:**\n - [ ] Eligible story found (or all complete / all blocked)\n - [ ] Acceptance criteria understood\n - [ ] Dependencies verified as complete\n\n ---\n\n ## Phase 2: IMPLEMENT — Code the Story\n\n ### 2.1 Explore Before Coding\n\n Before writing any code:\n 1. Read all files you plan to modify — understand current state\n 2. Check `## Codebase Patterns` in progress.txt for discovered patterns\n 3. Look for similar implementations in the codebase to mirror\n 4. Read the `technicalNotes` field from the story in prd.json\n\n ### 2.2 Implementation Rules\n\n **DO:**\n - Implement ONLY the selected story — one story per iteration\n - Follow existing code patterns exactly (naming, structure, imports, error handling)\n - Match the project's coding standards from CLAUDE.md\n - Write or update tests as required by acceptance criteria\n - Keep changes minimal and focused\n\n **DON'T:**\n - Refactor unrelated code\n - Add improvements not in the acceptance criteria\n - Change formatting of lines you didn't modify\n - Install new dependencies without justification from prd.md\n - Touch files unrelated to this story\n - Over-engineer — do the simplest thing that satisfies the criteria\n\n ### 2.3 Verify Types After Each File\n\n After modifying each file, run:\n ```bash\n bun run type-check\n ```\n\n **If types fail:**\n 1. Read the error carefully\n 2. Fix the type issue in your code\n 3. Re-run type-check\n 4. Do NOT proceed to the next file until types pass\n\n **PHASE_2_CHECKPOINT:**\n - [ ] Only the selected story was implemented\n - [ ] Types compile after each file change\n - [ ] Tests written/updated as needed\n - [ ] No unrelated changes\n\n ---\n\n ## Phase 3: VALIDATE — Full Verification\n\n ### 3.1 Static Analysis\n\n ```bash\n bun run type-check && bun run lint\n ```\n\n **Must pass with zero errors and zero warnings.**\n\n **If lint fails:**\n 1. Run `bun run lint:fix` for auto-fixable issues\n 2. Manually fix remaining issues\n 3. Re-run lint\n 4. Proceed only when clean\n\n ### 3.2 Tests\n\n ```bash\n bun run test\n ```\n\n **All tests must pass.**\n\n **If tests fail:**\n 1. Read the failure output\n 2. Determine: bug in your implementation or pre-existing failure?\n 3. If your bug → fix the implementation (not the test)\n 4. If pre-existing → note it but don't fix unrelated tests\n 5. Re-run tests\n 6. Repeat until green\n\n ### 3.3 Format Check\n\n ```bash\n bun run format:check\n ```\n\n **If formatting fails:**\n ```bash\n bun run format\n ```\n\n ### 3.4 Verify Acceptance Criteria\n\n Go through EACH acceptance criterion from the story:\n - Is it satisfied by your implementation?\n - Can you verify it (read the code, run a command, check a file)?\n\n If a criterion is NOT met, go back to Phase 2 and fix it.\n\n **PHASE_3_CHECKPOINT:**\n - [ ] Type-check passes\n - [ ] Lint passes (0 errors, 0 warnings)\n - [ ] All tests pass\n - [ ] Format is clean\n - [ ] Every acceptance criterion verified\n\n ---\n\n ## Phase 4: COMMIT — Save Changes\n\n ### 4.1 Review Staged Changes\n\n ```bash\n git add -A\n git status\n git diff --cached --stat\n ```\n\n Verify only expected files are staged. If unexpected files appear, investigate before committing.\n\n ### 4.2 Write Commit Message\n\n ```bash\n git commit -m \"$(cat <<'EOF'\n feat: {story-title}\n\n Implements {story-id} from PRD.\n\n Changes:\n - {change 1}\n - {change 2}\n - {change 3}\n EOF\n )\"\n ```\n\n **Commit message rules:**\n - Prefix: `feat:` for features, `fix:` for bugs, `refactor:` for refactors\n - Title: the story title (not the PRD name)\n - Body: list the actual changes made\n - Do NOT include AI attribution\n\n **PHASE_4_CHECKPOINT:**\n - [ ] Only expected files committed\n - [ ] Commit message is clear and accurate\n - [ ] Working directory is clean after commit\n\n ---\n\n ## Phase 5: TRACK — Update Progress Files\n\n ### 5.1 Update prd.json\n\n Set `passes: true` and add a note for the completed story:\n\n ```json\n {\n \"id\": \"{story-id}\",\n \"passes\": true,\n \"notes\": \"Implemented in iteration {N}. Files: {list}.\"\n }\n ```\n\n After updating prd.json, emit the story completed event:\n ```bash\n bun run cli workflow event emit --run-id $WORKFLOW_ID --type ralph_story_completed --data '{\"story_id\":\"{story-id}\",\"title\":\"{story-title}\"}' || true\n ```\n\n ### 5.2 Update progress.txt\n\n **Append** to `{prd-dir}/progress.txt`:\n\n ```\n ## {ISO Date} — {story-id}: {story-title}\n\n **Status**: PASSED\n **Files changed**:\n - {file1} — {what changed}\n - {file2} — {what changed}\n\n **Acceptance criteria verified**:\n - [x] {criterion 1}\n - [x] {criterion 2}\n\n **Learnings**:\n - {Any pattern discovered}\n - {Any gotcha encountered}\n - {Any deviation from expected approach}\n\n ---\n ```\n\n ### 5.3 Update Codebase Patterns (if applicable)\n\n If you discovered a **reusable pattern** that future iterations should know about, **prepend** it to the `## Codebase Patterns` section at the TOP of progress.txt.\n\n Format:\n ```\n ## Codebase Patterns\n\n ### {Pattern Name}\n - **Where**: `{file:lines}`\n - **Pattern**: {description}\n - **Example**: `{code snippet}`\n ```\n\n If the `## Codebase Patterns` section doesn't exist yet, create it at the top of the file.\n\n **PHASE_5_CHECKPOINT:**\n - [ ] prd.json updated with `passes: true`\n - [ ] progress.txt appended with iteration details\n - [ ] Codebase patterns updated (if applicable)\n\n ---\n\n ## Phase 6: COMPLETE — Check All Stories\n\n ### 6.1 Re-read prd.json\n\n ```bash\n cat {prd-dir}/prd.json\n ```\n\n Count stories where `passes: false`.\n\n ### 6.2 If ALL Stories Pass\n\n 1. **Push the branch:**\n ```bash\n git push -u origin HEAD\n ```\n\n 2. **Read the PR template:**\n Look for a PR template in the repo — check `.github/pull_request_template.md`, `.github/PULL_REQUEST_TEMPLATE.md`, and `docs/pull_request_template.md`. Read whichever one exists.\n\n If a template was found, fill in **every section** using the context from this implementation. Don't skip sections or leave placeholders — fill them honestly based on the actual changes (summary, architecture, validation evidence, security, compatibility, rollback, etc.).\n\n If no template was found, write a summary with: problem, what changed, stories table, and validation evidence.\n\n 3. **Create a draft PR** using `gh pr create --draft --base $BASE_BRANCH --title \"feat: {PRD feature name}\"` with the filled-in template as the body. Use a HEREDOC for the body.\n\n 4. **Output completion signal:**\n ```\n COMPLETE\n ```\n\n ### 6.3 If Stories Remain\n\n Report status and end normally:\n ```\n ── Iteration Complete ──────────────────────────────\n Story completed: {story-id} — {story-title}\n Stories remaining: {count}\n Next eligible: {next-story-id} — {next-story-title}\n ────────────────────────────────────────────────────\n ```\n\n The loop engine will start the next iteration with a fresh context.\n\n ---\n\n ## Handling Edge Cases\n\n ### Validation fails repeatedly\n - If type-check or tests fail 3+ times on the same error, step back\n - Re-read the acceptance criteria — you may be misunderstanding the requirement\n - Check if the story is too large (needs breaking down)\n - Note the blocker in progress.txt and end the iteration\n\n ### Story is too large for one iteration\n - Implement the minimum viable subset that satisfies the most critical acceptance criteria\n - Set `passes: true` only if ALL criteria are met\n - If you can't meet all criteria, leave `passes: false` and note what's done in progress.txt\n - The next iteration will pick it up and continue\n\n ### Pre-existing test failures\n - If tests were failing BEFORE your changes, note them but don't fix unrelated code\n - Run only the test files related to your changes if the full suite has pre-existing issues\n - Document pre-existing failures in progress.txt\n\n ### Dependency install fails\n - Check if `bun.lock` or equivalent exists\n - Try `bun install` without `--frozen-lockfile`\n - Note the issue in progress.txt\n\n ### Git state is dirty at iteration start\n - This shouldn't happen (fresh worktree), but if it does:\n - Run `git status` to understand what's dirty\n - If it's leftover from a failed previous iteration, commit or stash\n - Never discard changes silently\n\n ### Blocked stories — all remaining have unmet dependencies\n - Report the dependency chain in your output\n - Check if a dependency was incorrectly left as `passes: false`\n - If a dependency should be `passes: true` (the code exists and works), fix prd.json\n - Otherwise, end the iteration — the loop will exhaust max_iterations\n\n ---\n\n ## File Format Reference\n\n ### prd.json Schema\n\n ```json\n {\n \"feature\": \"Feature Name\",\n \"issueNumber\": 123,\n \"userStories\": [\n {\n \"id\": \"US-001\",\n \"title\": \"Short title\",\n \"description\": \"As a..., I want..., so that...\",\n \"acceptanceCriteria\": [\"criterion 1\", \"criterion 2\"],\n \"technicalNotes\": \"Implementation hints\",\n \"dependsOn\": [\"US-000\"],\n \"priority\": 1,\n \"passes\": false,\n \"notes\": \"\"\n }\n ]\n }\n ```\n\n ### progress.txt Format\n\n ```\n ## Codebase Patterns\n\n ### {Pattern Name}\n - Where: `file:lines`\n - Pattern: description\n - Example: `code`\n\n ---\n\n ## {Date} — {story-id}: {title}\n\n **Status**: PASSED\n **Files changed**: ...\n **Acceptance criteria verified**: ...\n **Learnings**: ...\n\n ---\n ```\n\n ---\n\n ## Success Criteria\n\n - **ONE_STORY**: Exactly one story implemented per iteration\n - **VALIDATED**: Type-check + lint + tests + format all pass before commit\n - **COMMITTED**: Changes committed with clear message\n - **TRACKED**: prd.json and progress.txt updated accurately\n - **PATTERNS_SHARED**: Discovered patterns added to progress.txt for future iterations\n - **NO_SCOPE_CREEP**: No unrelated changes, no refactoring, no \"improvements\"\n until: COMPLETE\n max_iterations: 15\n fresh_context: true\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 5: COMPLETION REPORT\n # Reads final state and produces a summary.\n # ═══════════════════════════════════════════════════════════════\n\n - id: report\n depends_on: [implement]\n prompt: |\n # Completion Report\n\n The Ralph implementation loop has finished. Generate a completion report.\n\n ## Context\n\n **Loop output (last iteration):**\n\n $implement.output\n\n **Setup context:**\n\n $validate-prd.output\n\n ---\n\n ## Instructions\n\n ### 1. Read Final State\n\n Extract the `PRD_DIR=...` from the setup context above.\n Read the CURRENT files from disk:\n\n ```bash\n cat {prd-dir}/prd.json\n cat {prd-dir}/progress.txt\n ```\n\n ### 2. Gather Git Info\n\n ```bash\n git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD\n git diff --stat $(git merge-base HEAD $BASE_BRANCH)..HEAD\n ```\n\n ### 3. Check PR Status\n\n ```bash\n gh pr view HEAD --json url,number,state 2>/dev/null || echo \"No PR found\"\n ```\n\n ### 4. Generate Report\n\n Output this format:\n\n ```\n ═══════════════════════════════════════════════════════\n RALPH DAG — COMPLETION REPORT\n ═══════════════════════════════════════════════════════\n\n Feature: {feature name from prd.json}\n PRD: {prd-dir}\n Branch: {branch name}\n PR: {url or \"not created\"}\n\n ── Stories ─────────────────────────────────────────\n\n | ID | Title | Status |\n |----|-------|--------|\n {for each story from prd.json}\n\n Total: {N}/{M} stories passing\n\n ── Commits ─────────────────────────────────────────\n\n {git log output}\n\n ── Files Changed ─────────────────────────────────\n\n {git diff --stat output}\n\n ── Patterns Discovered ─────────────────────────────\n\n {from ## Codebase Patterns in progress.txt, or \"None\"}\n\n ═══════════════════════════════════════════════════════\n ```\n\n Keep it factual. No commentary — just the data.\n", - "archon-refactor-safely": "name: archon-refactor-safely\ndescription: |\n Use when: User wants to refactor code safely with continuous validation and behavior preservation.\n Triggers: \"refactor\", \"refactor safely\", \"split this file\", \"extract module\", \"break up\",\n \"decompose\", \"safe refactor\", \"split file\", \"extract into modules\".\n Does: Scans refactoring scope -> analyzes impact (read-only) -> plans ordered task list ->\n executes with type-check hooks after every edit -> validates full suite ->\n verifies behavior preservation (read-only) -> creates PR with before/after comparison.\n NOT for: Bug fixes (use archon-fix-github-issue), feature development (use archon-feature-development),\n general architecture sweeps (use archon-architect), PR reviews.\n\n Key safety features:\n - Analysis and verification nodes are read-only (denied_tools: [Write, Edit, Bash])\n - PreToolUse hooks check if each edit is in the plan\n - PostToolUse hooks force type-check after every file change\n - Behavior verification confirms no logic changes after refactoring\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: SCAN — Find files matching the refactoring target\n # ═══════════════════════════════════════════════════════════════\n\n - id: scan-scope\n bash: |\n echo \"=== REFACTORING TARGET ===\"\n echo \"User request: $ARGUMENTS\"\n echo \"\"\n\n echo \"=== FILE SIZE ANALYSIS (source files by size) ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts' \\\n -exec wc -l {} + 2>/dev/null | sort -rn | head -30\n echo \"\"\n\n echo \"=== FILES OVER 500 LINES ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts' \\\n -exec sh -c 'lines=$(wc -l < \"$1\"); if [ \"$lines\" -gt 500 ]; then echo \"$lines $1\"; fi' _ {} \\; 2>/dev/null | sort -rn\n echo \"\"\n\n echo \"=== FUNCTION COUNT PER FILE (top 20) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts'); do\n count=$(grep -cE '^\\s*(export\\s+)?(async\\s+)?function\\s|=>\\s*\\{' \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count functions: $f\"\n fi\n done | sort -rn | head -20\n echo \"\"\n\n echo \"=== EXPORT ANALYSIS (files with many exports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts'); do\n count=$(grep -c \"^export \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count exports: $f\"\n fi\n done | sort -rn | head -20\n timeout: 60000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: ANALYZE IMPACT — Read-only deep analysis\n # Maps call sites, identifies risk areas, understands dependencies\n # ═══════════════════════════════════════════════════════════════\n\n - id: analyze-impact\n prompt: |\n You are a senior software engineer analyzing code for a safe refactoring.\n\n ## Refactoring Request\n\n $ARGUMENTS\n\n ## Codebase Scan Results\n\n $scan-scope.output\n\n ## Instructions\n\n 1. Identify the PRIMARY file(s) targeted for refactoring based on the user's request\n and the scan results above\n 2. Read each target file thoroughly — understand every function, type, and export\n 3. For each target file, map ALL call sites:\n - Use Grep to find every import of the target file across the codebase\n - Track which specific exports are used and where\n - Note any dynamic imports or re-exports through index files\n 4. Identify risk areas:\n - Functions with complex internal dependencies (shared closures, module-level state)\n - Circular dependencies between functions in the file\n - Any module-level side effects (top-level `const`, initialization code)\n - Exports that are part of the public API vs internal-only\n 5. Check for existing tests:\n - Find test files for the target module(s)\n - Note what's tested and what isn't\n\n ## Output\n\n Write a thorough impact analysis to `$ARTIFACTS_DIR/impact-analysis.md` with:\n\n ### Target Files\n - File path, line count, function count\n - List of all exported symbols with brief descriptions\n\n ### Dependency Map\n - Which files import from the target (with specific imports used)\n - Which files the target imports from\n\n ### Risk Assessment\n - Module-level state or side effects\n - Complex internal dependencies between functions\n - Public API surface that must be preserved exactly\n\n ### Test Coverage\n - Existing test files and what they cover\n - Critical paths that must remain tested\n\n ### Recommended Decomposition Strategy\n - Suggested module boundaries (which functions group together)\n - Rationale for each grouping (cohesion, shared dependencies)\n depends_on: [scan-scope]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: PLAN REFACTOR — Ordered task list with rollback strategy\n # Read-only: produces the plan, does not execute it\n # ═══════════════════════════════════════════════════════════════\n\n - id: plan-refactor\n prompt: |\n You are planning a safe refactoring. You must produce a precise, ordered plan\n that another agent will follow literally.\n\n ## Impact Analysis\n\n $analyze-impact.output\n\n ## Refactoring Goal\n\n $ARGUMENTS\n\n ## Principles\n\n - **Behavior preservation**: The refactoring must NOT change any behavior — only structure\n - **Incremental**: Each step must leave the codebase in a compilable state\n - **Reversible**: Each step can be independently reverted\n - **No mixed concerns**: Do not combine refactoring with bug fixes or improvements\n - **Preserve public API**: All existing exports must remain accessible from the same import paths\n - **Maximum file size**: Target 500 lines or fewer per file after refactoring\n\n ## Instructions\n\n 1. Read the impact analysis from `$ARTIFACTS_DIR/impact-analysis.md`\n 2. Read the target file(s) to understand the current structure\n 3. Design the decomposition:\n - Group related functions into cohesive modules\n - Identify shared utilities, types, and constants\n - Plan the new file structure with descriptive names\n 4. Write an ordered task list where each task is:\n - Independent and leaves code compilable after completion\n - Specific about what to extract and where\n - Clear about import updates needed\n\n ## Output\n\n Write the plan to `$ARTIFACTS_DIR/refactor-plan.md` with:\n\n ### File Structure (Before)\n ```\n [current structure with line counts]\n ```\n\n ### File Structure (After)\n ```\n [planned structure with estimated line counts]\n ```\n\n ### Ordered Tasks\n\n For each task:\n ```\n ## Task N: [brief description]\n\n **Action**: CREATE | EXTRACT | UPDATE\n **Source**: [source file]\n **Target**: [target file]\n **What moves**:\n - function functionName (lines X-Y)\n - type TypeName (lines X-Y)\n\n **Import updates needed**:\n - [file]: change import from [old] to [new]\n\n **Rollback**: [how to undo this specific step]\n ```\n\n ### Validation Commands\n - Type check: `bun run type-check`\n - Lint: `bun run lint`\n - Tests: `bun run test`\n - Format: `bun run format:check`\n depends_on: [analyze-impact]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: EXECUTE REFACTOR — Implements the plan with guardrails\n # Hooks enforce type-check after every edit and plan adherence\n # ═══════════════════════════════════════════════════════════════\n\n - id: execute-refactor\n model: claude-opus-4-6[1m]\n prompt: |\n You are executing a refactoring plan with strict safety guardrails.\n\n ## Plan\n\n Read the full plan from `$ARTIFACTS_DIR/refactor-plan.md` — follow it LITERALLY.\n\n ## Rules\n\n - **Follow the plan exactly** — do not add extra improvements or cleanups\n - **One task at a time** — complete each task fully before starting the next\n - **Type-check after every file change** — you'll be prompted to do this after each edit\n - **Preserve all behavior** — refactoring means moving code, not changing it\n - **Preserve the public API** — if the original file exported something, it must still be\n importable from the same path (use re-exports in the original file if needed)\n - **Update all import sites** — every file that imported from the original must be updated\n - **Commit after each logical task** — one commit per plan task with a clear message\n\n ## Process for Each Task\n\n 1. Read the plan task\n 2. Read the source file to understand current state\n 3. Create the new file (if extracting) with the functions/types being moved\n 4. Update the source file to remove the moved code and add imports from the new file\n 5. Update the original file's exports to re-export from the new module (API preservation)\n 6. Use Grep to find and update ALL import sites across the codebase\n 7. Run `bun run type-check` to verify (you'll be reminded by hooks)\n 8. Commit: `git add -A && git commit -m \"refactor: [task description]\"`\n 9. Move to next task\n\n ## Handling Problems\n\n - If type-check fails after a change: fix it immediately before proceeding\n - If a task is more complex than planned: complete it anyway, note the deviation\n - If you discover the plan missed an import site: update it and note it\n - NEVER skip a task — complete them in order\n depends_on: [plan-refactor]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n Before modifying this file: Is this file in your refactoring plan\n ($ARTIFACTS_DIR/refactor-plan.md)? If it's not a planned target file\n AND not a file that imports from the target, explain why you're touching it.\n Unplanned changes increase risk.\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just modified a file. STOP and do these things NOW before making any\n other changes:\n 1. Run `bun run type-check` to verify the change compiles\n 2. If type-check fails, fix the error immediately\n 3. Verify you preserved the exact same behavior — no logic changes, only structural moves\n Only proceed to the next change after type-check passes.\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Check the exit code. If type-check or any validation failed, fix the issue\n before continuing. Do not accumulate broken state.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE — Full test suite (bash, no AI escape hatch)\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n bash: |\n echo \"=== TYPE CHECK ===\"\n bun run type-check 2>&1\n TC_EXIT=$?\n\n echo \"\"\n echo \"=== LINT ===\"\n bun run lint 2>&1\n LINT_EXIT=$?\n\n echo \"\"\n echo \"=== FORMAT CHECK ===\"\n bun run format:check 2>&1\n FMT_EXIT=$?\n\n echo \"\"\n echo \"=== TESTS ===\"\n bun run test 2>&1\n TEST_EXIT=$?\n\n echo \"\"\n echo \"=== FILE SIZE CHECK ===\"\n echo \"Files still over 500 lines:\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts' \\\n -exec sh -c 'lines=$(wc -l < \"$1\"); if [ \"$lines\" -gt 500 ]; then echo \"$lines $1\"; fi' _ {} \\; 2>/dev/null | sort -rn\n echo \"\"\n\n echo \"=== RESULTS ===\"\n echo \"Type check: $([ $TC_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Lint: $([ $LINT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Format: $([ $FMT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Tests: $([ $TEST_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n\n if [ $TC_EXIT -eq 0 ] && [ $LINT_EXIT -eq 0 ] && [ $FMT_EXIT -eq 0 ] && [ $TEST_EXIT -eq 0 ]; then\n echo \"VALIDATION_STATUS: PASS\"\n else\n echo \"VALIDATION_STATUS: FAIL\"\n fi\n depends_on: [execute-refactor]\n timeout: 300000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: FIX VALIDATION FAILURES (if any)\n # Only does real work if validation failed\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-failures\n prompt: |\n Review the validation output below.\n\n ## Validation Output\n\n $validate.output\n\n ## Instructions\n\n If the output ends with \"VALIDATION_STATUS: PASS\", respond with\n \"All checks passed — no fixes needed.\" and stop.\n\n If there are failures:\n\n 1. Read the validation failures carefully\n 2. Fix ONLY what's broken — do not make additional improvements\n 3. If a fix requires changing behavior (not just fixing a type/lint error),\n revert the original change instead\n 4. Run the specific failing check after each fix to confirm it passes\n 5. After all fixes, run the full validation suite: `bun run validate`\n\n If there are files still over 500 lines, note them but do NOT attempt further\n splitting in this node — that would require a new plan cycle.\n depends_on: [validate]\n context: fresh\n hooks:\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just made a fix. Run the specific failing validation check NOW\n to verify your fix works. Do not batch fixes — verify each one.\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n You are fixing validation failures only. Do not make any changes\n beyond what's needed to pass the failing checks. If in doubt, revert\n the original change that caused the failure.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: VERIFY BEHAVIOR — Read-only confirmation\n # Ensures the refactoring preserved behavior by tracing call paths\n # ═══════════════════════════════════════════════════════════════\n\n - id: verify-behavior\n prompt: |\n You are a code reviewer verifying that a refactoring preserved exact behavior.\n You can ONLY read files — you cannot make any changes.\n\n ## Refactoring Plan\n\n Read the plan from `$ARTIFACTS_DIR/refactor-plan.md` to understand what was intended.\n\n ## Instructions\n\n 1. Use Grep and Glob to find all files in the new module locations listed in\n the plan, then Read each one. (Note: Bash is denied in this read-only node,\n so use Grep/Glob/Read to discover changes instead of git commands.)\n 2. For each new file created by the refactoring:\n - Verify the extracted functions match the originals exactly (no logic changes)\n - Check that all types and interfaces are preserved\n 3. For the original file(s):\n - Verify re-exports exist for all symbols that were previously exported\n - Confirm no function bodies were changed (only moved)\n 4. For all import sites updated:\n - Verify imports resolve to the correct new locations\n - Check that no import was missed\n 5. Verify the public API is preserved:\n - Any code that imported from the original file should still work unchanged\n - Re-exports in the original file should cover all moved symbols\n\n ## Output\n\n Write your verification report to `$ARTIFACTS_DIR/behavior-verification.md`:\n\n ### Verdict: PASS | FAIL\n\n ### Functions Verified\n | Function | Original Location | New Location | Behavior Preserved |\n |----------|------------------|--------------|-------------------|\n | funcName | file.ts:42 | new-file.ts:10 | Yes/No |\n\n ### Public API Check\n - [ ] All original exports still accessible from original import path\n - [ ] Re-exports correctly configured\n\n ### Import Sites Updated\n - [ ] All N import sites verified\n\n ### Issues Found\n [List any behavior changes detected, or \"None — refactoring is behavior-preserving\"]\n depends_on: [fix-failures]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 8: CREATE PR — Detailed description with before/after\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a pull request for the refactoring.\n\n ## Context\n\n - **Refactoring goal**: $ARGUMENTS\n - **Impact analysis**: Read `$ARTIFACTS_DIR/impact-analysis.md`\n - **Refactoring plan**: Read `$ARTIFACTS_DIR/refactor-plan.md`\n - **Validation**: $validate.output\n - **Behavior verification**: Read `$ARTIFACTS_DIR/behavior-verification.md`\n\n ## Instructions\n\n 1. Stage all changes and create a final commit if there are uncommitted changes\n 2. Push the branch: `git push -u origin HEAD`\n 3. Check if a PR already exists: `gh pr list --head $(git branch --show-current)`\n 4. Create the PR with the format below\n 5. Save the PR URL to `$ARTIFACTS_DIR/.pr-url`\n\n ## PR Format\n\n - **Title**: `refactor: [concise description]` (under 70 chars)\n - **Body**:\n\n ```markdown\n ## Refactoring: [goal]\n\n ### Motivation\n\n [Why this refactoring was needed — file sizes, complexity, maintainability]\n\n ### Before\n\n ```\n [Original file structure with line counts from the plan]\n ```\n\n ### After\n\n ```\n [New file structure with line counts]\n ```\n\n ### Changes\n\n [For each new module: what was extracted and why it's a cohesive unit]\n\n ### Safety\n\n - [x] Type check passes\n - [x] Lint passes\n - [x] Tests pass (all existing tests still green)\n - [x] Public API preserved (re-exports maintain backward compatibility)\n - [x] Behavior verification passed (read-only audit confirmed no logic changes)\n - [x] Each task committed separately for easy review/revert\n\n ### Review Guide\n\n Each commit represents one extraction step. Review commits individually for easiest review.\n All commits are behavior-preserving structural moves.\n ```\n depends_on: [verify-behavior]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n permissionDecision: deny\n permissionDecisionReason: \"PR creation node — do not modify source files. Use only git and gh commands.\"\n PostToolUse:\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Verify this command succeeded. If git push or gh pr create failed,\n read the error message carefully before retrying.\n", + "archon-piv-loop": "name: archon-piv-loop\ndescription: |\n Use when: User wants guided Plan-Implement-Validate development with human-in-the-loop.\n Triggers: \"piv\", \"piv loop\", \"plan implement validate\", \"guided development\",\n \"structured development\", \"build a feature\", \"develop with review\".\n NOT for: Autonomous implementation without planning (use archon-feature-development).\n NOT for: PRD creation (use archon-interactive-prd).\n NOT for: Ralph story-based implementation (use archon-ralph-dag).\n\n Interactive PIV loop workflow — the foundational AI coding methodology:\n 1. EXPLORE: Iterative conversation with human to understand the problem (arbitrary rounds)\n 2. PLAN: Create structured plan -> iterative review & revision (arbitrary rounds)\n 3. IMPLEMENT: Autonomous task-by-task implementation from plan (Ralph loop)\n 4. VALIDATE: Automated code review -> iterative human feedback & fixes (arbitrary rounds)\n\n The PIV loop comes AFTER a PRD exists. Each PIV loop focuses on ONE granular feature or bug fix.\n Input: A description of what to build, a path to an existing plan, or a GitHub issue number.\n\nprovider: claude\ninteractive: true\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: EXPLORE — Iterative exploration with human\n # Understand the idea, explore the codebase, converge on approach\n # Loops until the user says they're ready to create the plan.\n # ═══════════════════════════════════════════════════════════════\n\n - id: explore\n loop:\n prompt: |\n # PIV Loop — Exploration\n\n You are a senior engineering partner in an iterative exploration session.\n Your goal: DEEPLY UNDERSTAND what to build before any code is written.\n\n **User's request**: $ARGUMENTS\n **User's latest input**: $LOOP_USER_INPUT\n\n ---\n\n ## If this is the FIRST iteration (no user input yet):\n\n ### Step 1: Parse the Input\n\n Determine what the user provided:\n\n **If it's a file path** (ends in `.md`, `.plan.md`, or `.prd.md`):\n - Read the file\n - If it's an existing plan → summarize it and ask if they want to refine or proceed\n - If it's a PRD → identify the specific phase/feature to focus on\n\n **If it's a GitHub issue** (`#123` format):\n - Fetch it: `gh issue view {number} --json title,body,labels,comments`\n - Summarize the issue context\n\n **If it's free text**:\n - This is a feature idea or bug description. Use it directly.\n\n ### Step 2: Explore the Codebase\n\n Before asking questions, DO YOUR HOMEWORK:\n\n 1. **Read CLAUDE.md** — understand project conventions, architecture, and constraints\n 2. **Search for related code** — find existing implementations similar to what the user wants\n 3. **Read key files** — understand the current state of code the user wants to change\n 4. **Check recent git history** — `git log --oneline -20` for recent changes in the area\n\n ### Step 3: Present Your Understanding\n\n ```\n ## What I Understand\n\n You want to: {restated understanding in 2-3 sentences}\n\n ## What Already Exists\n\n - {file:line} — {what it does and how it relates}\n - {file:line} — {what it does and how it relates}\n - {pattern/component} — {how it could be extended or reused}\n\n ## Initial Architecture Thoughts\n\n Based on what exists, I'm thinking:\n - {approach 1 — extend existing X}\n - {approach 2 — if approach 1 doesn't work}\n - {key architectural decision that needs your input}\n ```\n\n ### Step 4: Ask Targeted Questions\n\n Ask 4-6 questions focused on DECISIONS, not information gathering:\n - Scope boundaries, architecture preferences, tech decisions\n - Constraints, existing code extension vs fresh build, testing expectations\n - Reference actual code you found — don't ask generic questions\n\n ---\n\n ## If the user has provided input (subsequent iterations):\n\n ### Step 1: Process Their Response\n\n Read their answers carefully. Identify:\n - Decisions they've made\n - Areas they want you to explore further\n - Questions they asked YOU back (answer these with evidence!)\n\n ### Step 2: Do Targeted Research\n\n Based on their response:\n - If they mentioned specific technologies → research best practices\n - If they pointed you to specific code → read it thoroughly\n - If they asked you to explore an area → do a thorough investigation\n - If they made architecture decisions → validate against the codebase\n\n ### Step 3: Present Updated Understanding\n\n Show what you learned, answer their questions with file:line references,\n and present your refined architecture recommendation.\n\n ### Step 4: Converge or Continue\n\n **If there are still important open questions:**\n Ask 2-4 focused questions about remaining ambiguities.\n\n **If the picture is clear and you have enough to create a plan:**\n Present a final implementation summary:\n\n ```\n ## Implementation Summary\n\n ### What We're Building\n {Clear, specific description}\n\n ### Scope Boundary\n - IN: {what's included}\n - OUT: {what's explicitly excluded}\n\n ### Architecture\n - {key decisions}\n\n ### Files That Will Change\n - `{file}` — {what changes and why}\n\n ### Success Criteria\n - [ ] {specific, testable criterion}\n - [ ] All validation passes\n\n ### Key Risks\n - {risk — and mitigation}\n ```\n\n Then tell the user: \"I have a clear picture. Say **ready** and I'll create\n the structured implementation plan, or share any final thoughts.\"\n\n **CRITICAL — READ THIS CAREFULLY**:\n - NEVER output PLAN_READY unless the user's LATEST message contains\n an EXPLICIT phrase like \"ready\", \"create the plan\", \"let's go\", \"proceed\", or \"I'm done\".\n - If the user asked a question → do NOT emit the signal. Answer the question.\n - If the user gave feedback or requested changes → do NOT emit the signal. Address it.\n - If the user said \"also check X\" or \"one more thing\" → do NOT emit the signal. Explore it.\n - If you are unsure whether the user is approving → do NOT emit the signal. Ask them.\n - The ONLY correct time to emit the signal is when the user's message CLEARLY means\n \"stop exploring, I'm ready for you to create the plan.\"\n until: PLAN_READY\n max_iterations: 15\n interactive: true\n gate_message: |\n Answer the questions above, ask me to explore specific areas,\n or say \"ready\" when you're satisfied with the exploration.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: PLAN — Create the structured implementation plan\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-plan\n model: sonnet\n depends_on: [explore]\n context: fresh\n prompt: |\n # PIV Loop — Create Structured Plan\n\n You are creating a structured implementation plan from a completed exploration phase.\n This plan will be the SOLE GUIDE for the implementation agent — it must be complete,\n specific, and actionable.\n\n **Original request**: $ARGUMENTS\n **Final exploration summary**: $explore.output\n\n ---\n\n ## Step 1: Read the Codebase (Again)\n\n Before writing the plan, verify your understanding is current:\n\n 1. **Read CLAUDE.md** — capture all relevant conventions\n 2. **Read every file you plan to change** — note exact current state\n 3. **Read example test files** — understand testing patterns\n 4. **Check for any recent changes** — `git log --oneline -10`\n\n ## Step 2: Determine Plan Location\n\n Generate a kebab-case slug from the feature name.\n Save to `.claude/archon/plans/{slug}.plan.md`.\n\n ```bash\n mkdir -p .claude/archon/plans\n ```\n\n ## Step 3: Write the Plan\n\n Use this template. Fill EVERY section with specific, verified information.\n\n ```markdown\n # Feature: {Title}\n\n ## Summary\n {1-2 sentences: what changes and why}\n\n ## Mission\n {The core goal in one clear statement}\n\n ## Success Criteria\n - [ ] {Specific, testable criterion}\n - [ ] All validation passes (`bun run validate` or equivalent)\n - [ ] No regressions in existing tests\n\n ## Scope\n ### In Scope\n - {What we ARE building}\n ### Out of Scope\n - {What we are NOT building — and why}\n\n ## Codebase Context\n ### Key Files\n | File | Role | Action |\n |------|------|--------|\n | `{path}` | {what it does} | CREATE / UPDATE |\n\n ### Patterns to Follow\n {Actual code snippets from the codebase to mirror}\n\n ## Architecture\n - {Decision 1 — with rationale}\n - {Decision 2 — with rationale}\n\n ## Task List\n Execute in order. Each task is atomic and independently verifiable.\n\n ### Task 1: {ACTION} `{file path}`\n **Action**: CREATE / UPDATE\n **Details**: {Exact changes — specific enough for an agent with no context}\n **Pattern**: Follow `{source file}:{lines}`\n **Validate**: `{command to verify this task}`\n\n ## Testing Strategy\n | Test File | Test Cases | Validates |\n |-----------|-----------|-----------|\n | `{path}` | {cases} | {what it validates} |\n\n ## Validation Commands\n 1. Type check: `{command}`\n 2. Lint: `{command}`\n 3. Tests: `{command}`\n 4. Full validation: `{command}`\n\n ## Risks\n | Risk | Impact | Mitigation |\n |------|--------|------------|\n | {risk} | {HIGH/MED/LOW} | {specific mitigation} |\n ```\n\n ## Step 4: Verify the Plan\n\n 1. Check every file path referenced — verify they exist\n 2. Check every pattern cited — verify the code matches\n 3. Check task ordering — ensure dependencies are respected\n 4. Check completeness — could an agent with NO context implement this?\n\n ## Step 5: Report\n\n ```\n ## Plan Created\n\n **File**: `.claude/archon/plans/{slug}.plan.md`\n **Tasks**: {count}\n **Files to change**: {count}\n\n Key decisions:\n - {decision 1}\n - {decision 2}\n\n Please review the plan and provide feedback.\n ```\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2b: PLAN — Iterative plan refinement\n # Review and revise the plan as many times as needed.\n # ═══════════════════════════════════════════════════════════════\n\n - id: refine-plan\n depends_on: [create-plan]\n loop:\n prompt: |\n # PIV Loop — Plan Refinement\n\n The user is reviewing the implementation plan and providing feedback.\n\n **User's feedback**: $LOOP_USER_INPUT\n\n ---\n\n ## Step 1: Find and Read the Plan\n\n ```bash\n ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1\n ```\n\n Read the entire plan file. Also read CLAUDE.md for conventions.\n\n ## Step 2: Process Feedback\n\n **If there is no user feedback yet** (first iteration, $LOOP_USER_INPUT is empty):\n - Read the plan carefully\n - Present a summary of the plan's key decisions and task list\n - Ask the user to review and provide feedback\n - Do NOT emit the completion signal on the first iteration\n\n **If the user EXPLICITLY approved** (said \"approved\", \"looks good\", \"let's go\", etc.):\n - Make no changes\n - Output: \"Plan approved. Proceeding to implementation.\"\n - Signal completion: PLAN_APPROVED\n\n **If the user provided specific feedback:**\n - Parse each piece of feedback\n - Edit the plan file directly:\n - Add/remove/modify tasks as requested\n - Update success criteria if needed\n - Adjust testing strategy if needed\n - Re-verify file paths and patterns after changes\n\n **CRITICAL**: NEVER emit PLAN_APPROVED unless the user's latest\n message EXPLICITLY says \"approved\", \"looks good\", \"ship it\", or similar approval.\n Questions, feedback, and requests for changes are NOT approval.\n\n ## Step 3: Show Changes\n\n ```\n ## Plan Revised\n\n Changes made:\n - {change 1}\n - {change 2}\n\n Updated stats:\n - Tasks: {count}\n - Files to change: {count}\n\n Review the updated plan and provide more feedback, or say \"approved\" to proceed.\n ```\n until: PLAN_APPROVED\n max_iterations: 10\n interactive: true\n gate_message: |\n Review the plan document. Provide specific feedback on what to change,\n or say \"approved\" to begin implementation.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: IMPLEMENT — Setup\n # Read the plan, prepare the environment\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement-setup\n depends_on: [refine-plan]\n bash: |\n set -e\n\n PLAN_FILE=$(ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1)\n\n if [ -z \"$PLAN_FILE\" ]; then\n echo \"ERROR: No plan file found in .claude/archon/plans/\"\n exit 1\n fi\n\n # Install dependencies if needed\n if [ -f \"bun.lock\" ] || [ -f \"bun.lockb\" ]; then\n echo \"Installing dependencies...\"\n bun install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"package-lock.json\" ]; then\n npm ci 2>&1 | tail -3\n elif [ -f \"yarn.lock\" ]; then\n yarn install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"pnpm-lock.yaml\" ]; then\n pnpm install --frozen-lockfile 2>&1 | tail -3\n fi\n\n echo \"BRANCH=$(git branch --show-current)\"\n echo \"GIT_ROOT=$(git rev-parse --show-toplevel)\"\n echo \"PLAN_FILE=$PLAN_FILE\"\n\n echo \"=== PLAN_START ===\"\n cat \"$PLAN_FILE\"\n echo \"\"\n echo \"=== PLAN_END ===\"\n\n TASK_COUNT=$(grep -c \"^### Task [0-9]\" \"$PLAN_FILE\" || true)\n echo \"TASK_COUNT=${TASK_COUNT:-0}\"\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3b: IMPLEMENT — Task-by-Task Loop (Ralph pattern)\n # Fresh context each iteration. Reads plan from disk.\n # One task per iteration. Validates before committing.\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement\n depends_on: [implement-setup]\n idle_timeout: 600000\n model: opus[1m]\n loop:\n prompt: |\n # PIV Loop — Implementation Agent\n\n You are an autonomous coding agent in a FRESH session — no memory of previous iterations.\n Your job: Read the plan from disk, implement ONE task, validate, commit, update tracking, exit.\n\n **Golden Rule**: If validation fails, fix it before committing. Never commit broken code.\n\n ---\n\n ## Phase 0: CONTEXT — Load State\n\n The setup node produced this context:\n\n $implement-setup.output\n\n **User's original request**: $USER_MESSAGE\n\n ---\n\n ### 0.1 Parse Plan File\n\n Extract the `PLAN_FILE=...` line from the context above.\n\n ### 0.2 Read Current State (from disk — not from context above)\n\n The context above is a snapshot from before the loop started. Previous iterations\n may have changed things. **You MUST re-read from disk:**\n\n 1. **Read the plan file** — your implementation guide\n 2. **Read progress tracking** — check if `.claude/archon/plans/progress.txt` exists\n 3. **Read CLAUDE.md** — project conventions and constraints\n\n ### 0.3 Check Git State\n\n ```bash\n git log --oneline -10\n git status\n ```\n\n ---\n\n ## Phase 1: SELECT — Pick Next Task\n\n From the plan file, identify tasks by `### Task N:` headers.\n Cross-reference with commits from previous iterations and progress tracking.\n\n **If ALL tasks are complete** → Skip to Phase 5 (Completion).\n\n ### Announce Selection\n\n ```\n -- Task Selected ------------------------------------------------\n Task: {N} — {task title}\n Action: {CREATE / UPDATE}\n File: {file path}\n -----------------------------------------------------------------\n ```\n\n ---\n\n ## Phase 2: IMPLEMENT — Execute the Task\n\n 1. Read the file you're about to change (if it exists)\n 2. Read the pattern file referenced in the plan\n 3. Make changes following the plan EXACTLY\n 4. Type-check after each file: `bun run type-check 2>&1 || true`\n\n ---\n\n ## Phase 3: VALIDATE — Verify the Task\n\n ```bash\n bun run type-check && bun run lint && bun run test && bun run format:check\n ```\n\n If validation fails: fix, re-run (up to 3 attempts). If unfixable, note in progress\n tracking and do NOT commit broken code.\n\n ---\n\n ## Phase 4: COMMIT — Save Changes\n\n ```bash\n git add -A\n git diff --cached --stat\n git commit -m \"$(cat <<'EOF'\n {type}: {task description}\n\n PIV Task {N}: {brief details}\n EOF\n )\"\n ```\n\n Track progress in `.claude/archon/plans/progress.txt`:\n ```\n ## Task {N}: {title} — COMPLETED\n Date: {ISO date}\n Files: {list}\n Commit: {short hash}\n ---\n ```\n\n ---\n\n ## Phase 5: COMPLETE — Check All Tasks\n\n If ALL tasks are done:\n 1. Run full validation: `bun run validate 2>&1`\n 2. Push: `git push -u origin HEAD`\n 3. Signal: `COMPLETE`\n\n If tasks remain, report status and end normally. The loop engine starts a fresh iteration.\n until: COMPLETE\n max_iterations: 15\n fresh_context: true\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: VALIDATE — Automated code review\n # Review all changes against the plan\n # ═══════════════════════════════════════════════════════════════\n\n - id: code-review\n model: sonnet\n depends_on: [implement]\n context: fresh\n prompt: |\n # PIV Loop — Automated Code Review\n\n The implementation phase is complete. Review ALL changes against the plan.\n\n **Implementation output**: $implement.output\n\n ---\n\n ## Step 1: Find and Read the Plan\n\n ```bash\n ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1\n ```\n\n ## Step 2: Review All Changes\n\n ```bash\n git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD\n git diff $BASE_BRANCH..HEAD --stat\n git diff $BASE_BRANCH..HEAD\n ```\n\n ## Step 3: Check Against Plan\n\n For EACH task: was it implemented correctly? Do success criteria hold?\n For EACH file: check quality, security, patterns, CLAUDE.md compliance.\n\n ## Step 4: Run Validation\n\n ```bash\n bun run validate 2>&1 || (bun run type-check && bun run lint && bun run test && bun run format:check)\n ```\n\n ## Step 5: Fix Obvious Issues\n\n Fix type errors, lint warnings, missing imports, formatting. Commit any fixes:\n ```bash\n git add -A && git commit -m \"fix: address code review findings\" 2>/dev/null || true\n ```\n\n ## Step 6: Present Review\n\n ```\n ## Code Review Complete\n\n ### Implementation Status\n | Task | Status | Notes |\n |------|--------|-------|\n | {task} | DONE / PARTIAL / MISSING | {notes} |\n\n ### Validation Results\n - Type-check: PASS / FAIL\n - Lint: PASS / FAIL\n - Tests: PASS / FAIL\n - Format: PASS / FAIL\n\n ### Code Quality Findings\n {Issues found, or \"No issues found.\"}\n\n ### Recommendation\n {READY FOR REVIEW / NEEDS FIXES}\n ```\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4b: VALIDATE — Iterative human feedback & fixes\n # The user tests the implementation and provides feedback.\n # Loops until the user approves.\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-feedback\n depends_on: [code-review]\n loop:\n prompt: |\n # PIV Loop — Address Validation Feedback\n\n The human has reviewed the implementation and provided feedback.\n\n **Human's feedback**: $LOOP_USER_INPUT\n\n ---\n\n ## Step 1: Read Context\n\n ```bash\n ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1\n ```\n\n Read the plan file and CLAUDE.md for conventions.\n\n ## Step 2: Process Feedback\n\n **If there is no user feedback yet** (first iteration, $LOOP_USER_INPUT is empty):\n - Present the code review results and ask the user to test the implementation\n - Do NOT emit the completion signal on the first iteration\n\n **If the user EXPLICITLY approved** (said \"approved\", \"looks good\", \"ship it\", etc.):\n - Output: \"Implementation approved!\"\n - Signal: VALIDATED\n\n **CRITICAL**: NEVER emit VALIDATED unless the user's latest\n message EXPLICITLY says \"approved\", \"looks good\", \"ship it\", or similar approval.\n\n **If the user provided specific feedback:**\n 1. Read the relevant files\n 2. Understand each issue\n 3. Make the fixes\n 4. Type-check after each change\n\n ## Step 3: Full Validation\n\n ```bash\n bun run validate 2>&1 || (bun run type-check && bun run lint && bun run test && bun run format:check)\n ```\n\n ## Step 4: Commit Fixes\n\n ```bash\n git add -A\n git commit -m \"$(cat <<'EOF'\n fix: address review feedback\n\n Changes:\n - {fix 1}\n - {fix 2}\n EOF\n )\"\n ```\n\n ## Step 5: Report\n\n ```\n ## Feedback Addressed\n\n Changes made:\n - {fix 1}\n - {fix 2}\n\n Validation: {PASS / FAIL with details}\n\n Review again, or say \"approved\" to finalize.\n ```\n until: VALIDATED\n max_iterations: 10\n interactive: true\n gate_message: |\n Test the implementation yourself and review the code changes.\n Provide specific feedback on what needs fixing, or say \"approved\" to finalize.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: FINALIZE — Push, create PR, generate summary\n # ═══════════════════════════════════════════════════════════════\n\n - id: finalize\n model: sonnet\n depends_on: [fix-feedback]\n context: fresh\n prompt: |\n # PIV Loop — Finalize\n\n The implementation has been approved. Push changes and create a PR.\n\n ---\n\n ## Step 1: Push Changes\n\n ```bash\n git push -u origin HEAD 2>&1 || true\n ```\n\n ## Step 2: Generate Summary\n\n ```bash\n git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD\n git diff --stat $(git merge-base HEAD $BASE_BRANCH)..HEAD\n ```\n\n Read the plan file and progress tracking for context.\n\n ## Step 3: Create PR (if not already created)\n\n ```bash\n gh pr view HEAD --json url 2>/dev/null || echo \"NO_PR\"\n ```\n\n If no PR exists:\n\n ```bash\n cat .github/pull_request_template.md 2>/dev/null || echo \"NO_TEMPLATE\"\n ```\n\n Create with `gh pr create --draft --base $BASE_BRANCH`:\n - Title from the plan's feature name\n - Body summarizing the implementation\n - Use a HEREDOC for the body\n\n ## Step 4: Output Summary\n\n ```\n ===============================================================\n PIV LOOP — COMPLETE\n ===============================================================\n\n Feature: {from plan}\n Plan: {plan file path}\n Branch: {branch name}\n PR: {url}\n\n -- Tasks Completed -----------------------------------------------\n {list from progress tracking}\n\n -- Commits -------------------------------------------------------\n {git log output}\n\n -- Files Changed -------------------------------------------------\n {git diff --stat output}\n\n -- Validation ----------------------------------------------------\n All checks passed.\n ===============================================================\n ```\n", + "archon-plan-to-pr": "name: archon-plan-to-pr\ndescription: |\n Use when: You have an existing implementation plan and want to execute it end-to-end.\n Input: Path to a plan file ($ARTIFACTS_DIR/plan.md or .agents/plans/*.md)\n Output: PR ready for merge with comprehensive review completed\n\n Full workflow:\n 1. Read plan, setup branch, extract scope limits\n 2. Verify plan research is still valid\n 3. Implement all tasks with type-checking\n 4. Run full validation suite\n 5. Create PR with template, mark ready\n 6. Comprehensive code review (5 parallel agents with scope limit awareness)\n 7. Synthesize and fix review findings\n 8. Final summary with decision matrix -> GitHub comment + follow-up recommendations\n\n NOT for: Creating plans from scratch (use archon-idea-to-pr), quick fixes, standalone reviews.\n\nnodes:\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 1: SETUP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: plan-setup\n command: archon-plan-setup\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 2: CONFIRM PLAN\n # ═══════════════════════════════════════════════════════════════════\n\n - id: confirm-plan\n command: archon-confirm-plan\n depends_on: [plan-setup]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 3: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-tasks\n command: archon-implement-tasks\n depends_on: [confirm-plan]\n context: fresh\n model: opus[1m]\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 4: VALIDATE\n # ═══════════════════════════════════════════════════════════════════\n\n - id: validate\n command: archon-validate\n depends_on: [implement-tasks]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 5: FINALIZE PR\n # ═══════════════════════════════════════════════════════════════════\n\n - id: finalize-pr\n command: archon-finalize-pr\n depends_on: [validate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 6: CODE REVIEW\n # ═══════════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [finalize-pr]\n context: fresh\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [review-scope]\n context: fresh\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n context: fresh\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 7: FIX REVIEW ISSUES\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 8: FINAL SUMMARY & FOLLOW-UP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: workflow-summary\n command: archon-workflow-summary\n depends_on: [implement-fixes]\n context: fresh\n", + "archon-ralph-dag": "name: archon-ralph-dag\ndescription: |\n Use when: User wants to run a Ralph implementation loop.\n Triggers: \"ralph\", \"run ralph\", \"ralph dag\", \"run ralph dag\".\n\n DAG workflow that:\n 1. Detects input: existing prd.json, existing prd.md (needs stories), or raw idea\n 2. Generates prd.md + prd.json if needed (explores codebase, breaks into stories)\n 3. Validates PRD files, reads project context, installs dependencies\n 4. Runs Ralph loop (fresh context per iteration) implementing one story per iteration\n 5. Creates PR and reports completion\n\n Accepts: An idea description, a path to an existing prd.md, or a directory with prd.md + prd.json\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # NODE 1: DETECT INPUT\n # Determines what the user provided: full PRD, partial PRD, or idea\n # ═══════════════════════════════════════════════════════════════\n\n - id: detect-input\n model: haiku\n prompt: |\n # Detect Ralph Input\n\n **User input**: $ARGUMENTS\n\n Determine what the user provided and prepare the PRD directory. Follow these steps exactly:\n\n ## Step 1: Detect worktree\n\n Run `git worktree list --porcelain` to check if you're in a worktree.\n If you see multiple entries, you ARE in a worktree. The first entry (the one without \"branch\" pointing to your current branch) is the **main repo root**. Save it — you'll need it to find files.\n\n ## Step 2: Classify the input\n\n Look at the user input above. It's one of three things:\n\n **Case A — Ralph directory path** (contains `.archon/ralph/`):\n Extract the directory. Check if both `prd.json` and `prd.md` exist there (try locally first, then in the main repo root if in a worktree).\n\n **Case B — File path** (ends in `.md`):\n This is an external PRD file. Find it:\n 1. Try the path as-is (relative to cwd)\n 2. Try it as an absolute path\n 3. If in a worktree, try it relative to the **main repo root** from Step 1\n Once found, read the file to confirm it's a PRD.\n\n **Case C — Free text**:\n Not a file path — it's a feature idea.\n\n ## Step 3: Auto-discover existing ralph PRDs\n\n If the input didn't point to a specific path, check if `.archon/ralph/` contains any `prd.json` files:\n ```bash\n find .archon/ralph -name \"prd.json\" -type f 2>/dev/null\n ```\n\n ## Step 4: Take action based on classification\n\n **If Case A and both files exist** → output `ready` (no further action needed)\n\n **If Case B (external PRD found)**:\n 1. Derive a kebab-case slug from the PRD filename or title (e.g., `workflow-lifecycle-overhaul`)\n 2. Create the ralph directory: `mkdir -p .archon/ralph/{slug}`\n 3. Copy the PRD content to `.archon/ralph/{slug}/prd.md`\n 4. Output `external_prd` with the new prd_dir\n\n **If Case C or auto-discovered ralph dir has prd.md but no prd.json** → output `needs_generation`\n\n ## Output\n\n Your final output MUST be exactly one JSON object:\n ```json\n {\"input_type\": \"ready|external_prd|needs_generation\", \"prd_dir\": \".archon/ralph/{slug}\"}\n ```\n output_format:\n type: object\n properties:\n input_type:\n type: string\n enum: [ready, external_prd, needs_generation]\n prd_dir:\n type: string\n required: [input_type, prd_dir]\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 2: GENERATE PRD\n # Scenario 1: User has an idea → generate prd.md + prd.json\n # Scenario 2: User has prd.md → generate prd.json with stories\n # Skipped if prd.json already exists\n # ═══════════════════════════════════════════════════════════════\n\n - id: generate-prd\n depends_on: [detect-input]\n when: \"$detect-input.output.input_type != 'ready'\"\n command: archon-ralph-generate\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 3: VALIDATE & SETUP\n # Finds PRD directory, reads all state files, installs deps,\n # verifies the environment is ready for implementation.\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate-prd\n depends_on: [detect-input, generate-prd]\n trigger_rule: one_success\n bash: |\n set -e\n\n # ── 1. Find PRD directory (passed from detect-input) ──────\n PRD_DIR=$detect-input.output.prd_dir\n\n # If detect-input didn't know the PRD dir (generated from scratch), discover it\n if [ -z \"$PRD_DIR\" ] || [ ! -f \"$PRD_DIR/prd.json\" ]; then\n FOUND=$(find .archon/ralph -name \"prd.json\" -type f 2>/dev/null | head -1)\n if [ -n \"$FOUND\" ]; then\n PRD_DIR=$(dirname \"$FOUND\")\n fi\n fi\n\n if [ -z \"$PRD_DIR\" ] || [ ! -f \"$PRD_DIR/prd.json\" ]; then\n echo \"ERROR: No prd.json found after generation step.\"\n echo \"Check the generate-prd node output for errors.\"\n exit 1\n fi\n\n if [ ! -f \"$PRD_DIR/prd.md\" ]; then\n echo \"ERROR: prd.md not found in $PRD_DIR\"\n exit 1\n fi\n\n # ── 2. Install dependencies (worktrees lack node_modules) ──\n if [ -f \"bun.lock\" ] || [ -f \"bun.lockb\" ]; then\n echo \"Installing dependencies (bun)...\"\n bun install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"package-lock.json\" ]; then\n echo \"Installing dependencies (npm)...\"\n npm ci 2>&1 | tail -3\n elif [ -f \"yarn.lock\" ]; then\n echo \"Installing dependencies (yarn)...\"\n yarn install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"pnpm-lock.yaml\" ]; then\n echo \"Installing dependencies (pnpm)...\"\n pnpm install --frozen-lockfile 2>&1 | tail -3\n fi\n\n # ── 3. Git state ──────────────────────────────────────────\n echo \"BRANCH=$(git branch --show-current)\"\n echo \"GIT_ROOT=$(git rev-parse --show-toplevel)\"\n\n # ── 4. Output PRD context ─────────────────────────────────\n echo \"PRD_DIR=$PRD_DIR\"\n echo \"=== PRD_JSON_START ===\"\n cat \"$PRD_DIR/prd.json\"\n echo \"\"\n echo \"=== PRD_JSON_END ===\"\n echo \"=== PRD_MD_START ===\"\n cat \"$PRD_DIR/prd.md\"\n echo \"\"\n echo \"=== PRD_MD_END ===\"\n echo \"=== PROGRESS_START ===\"\n if [ -f \"$PRD_DIR/progress.txt\" ]; then\n cat \"$PRD_DIR/progress.txt\"\n else\n echo \"(no progress yet)\"\n fi\n echo \"\"\n echo \"=== PROGRESS_END ===\"\n\n # ── 5. Summary ────────────────────────────────────────────\n TOTAL=$(grep -c '\"passes\"' \"$PRD_DIR/prd.json\" || true)\n DONE=$(grep -c '\"passes\": true' \"$PRD_DIR/prd.json\" || true)\n TOTAL=${TOTAL:-0}\n DONE=${DONE:-0}\n echo \"STORIES_TOTAL=$TOTAL\"\n echo \"STORIES_DONE=$DONE\"\n echo \"STORIES_REMAINING=$(( TOTAL - DONE ))\"\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 4: RALPH IMPLEMENTATION LOOP\n # Fresh context each iteration. Reads PRD state from disk.\n # One story per iteration. Validates before committing.\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement\n depends_on: [validate-prd]\n idle_timeout: 600000\n model: opus[1m]\n loop:\n prompt: |\n # Ralph Agent — Autonomous Story Implementation\n\n You are an autonomous coding agent in a FRESH session — you have no memory of previous iterations.\n Your job: Read state from disk, implement ONE story, validate, commit, update tracking, exit.\n\n **Golden Rule**: If validation fails, fix it before committing. Never commit broken code. Never skip validation.\n\n ---\n\n ## Phase 0: CONTEXT — Load Project State\n\n The upstream setup node produced this context:\n\n $validate-prd.output\n\n **User message**: $USER_MESSAGE\n\n ---\n\n ### 0.1 Parse PRD Directory\n\n Extract the `PRD_DIR=...` line from the context above. This is the directory containing your PRD files.\n Store this path — use it for ALL file operations below.\n\n ### 0.2 Read Current State (from disk, not from context above)\n\n The context above is a snapshot from before the loop started. Previous iterations may have changed files.\n **You MUST re-read from disk to get the current state:**\n\n 1. **Read `{prd-dir}/progress.txt`** — your only link to previous iterations\n - Check the `## Codebase Patterns` section FIRST for learnings from prior iterations\n - Check recent entries for gotchas to avoid\n 2. **Read `{prd-dir}/prd.json`** — the source of truth for story completion state\n 3. **Read `{prd-dir}/prd.md`** — full requirements, technical patterns, acceptance criteria\n\n ### 0.3 Read Project Rules\n\n ```bash\n cat CLAUDE.md\n ```\n\n Note all coding standards, patterns, and rules. Follow them exactly.\n\n **PHASE_0_CHECKPOINT:**\n - [ ] PRD directory identified\n - [ ] progress.txt read (or noted as absent)\n - [ ] prd.json read — know which stories pass/fail\n - [ ] prd.md read — understand requirements\n - [ ] CLAUDE.md rules noted\n\n ---\n\n ## Phase 1: SELECT — Pick Next Story\n\n ### 1.1 Find Eligible Story\n\n From `prd.json`, find the **highest priority** story where:\n - `passes` is `false`\n - ALL stories in `dependsOn` have `passes: true`\n\n **If ALL stories have `passes: true`** → Skip to Phase 6 (Completion).\n\n **If no eligible stories exist** (all remaining are blocked):\n ```\n BLOCKED: No eligible stories. Remaining stories and their blockers:\n - {story-id}: blocked by {dep-id} (passes: false)\n ```\n End normally. The loop will terminate on max_iterations.\n\n ### 1.2 Announce Selection\n\n ```\n ── Story Selected ──────────────────────────────────\n ID: {story-id}\n Title: {story-title}\n Priority: {priority}\n Dependencies: {deps or \"none\"}\n\n Acceptance Criteria:\n - {criterion 1}\n - {criterion 2}\n - ...\n ────────────────────────────────────────────────────\n ```\n\n After announcing the selected story, emit the story started event:\n ```bash\n bun run cli workflow event emit --run-id $WORKFLOW_ID --type ralph_story_started --data '{\"story_id\":\"{story-id}\",\"title\":\"{story-title}\"}' || true\n ```\n\n **PHASE_1_CHECKPOINT:**\n - [ ] Eligible story found (or all complete / all blocked)\n - [ ] Acceptance criteria understood\n - [ ] Dependencies verified as complete\n\n ---\n\n ## Phase 2: IMPLEMENT — Code the Story\n\n ### 2.1 Explore Before Coding\n\n Before writing any code:\n 1. Read all files you plan to modify — understand current state\n 2. Check `## Codebase Patterns` in progress.txt for discovered patterns\n 3. Look for similar implementations in the codebase to mirror\n 4. Read the `technicalNotes` field from the story in prd.json\n\n ### 2.2 Implementation Rules\n\n **DO:**\n - Implement ONLY the selected story — one story per iteration\n - Follow existing code patterns exactly (naming, structure, imports, error handling)\n - Match the project's coding standards from CLAUDE.md\n - Write or update tests as required by acceptance criteria\n - Keep changes minimal and focused\n\n **DON'T:**\n - Refactor unrelated code\n - Add improvements not in the acceptance criteria\n - Change formatting of lines you didn't modify\n - Install new dependencies without justification from prd.md\n - Touch files unrelated to this story\n - Over-engineer — do the simplest thing that satisfies the criteria\n\n ### 2.3 Verify Types After Each File\n\n After modifying each file, run:\n ```bash\n bun run type-check\n ```\n\n **If types fail:**\n 1. Read the error carefully\n 2. Fix the type issue in your code\n 3. Re-run type-check\n 4. Do NOT proceed to the next file until types pass\n\n **PHASE_2_CHECKPOINT:**\n - [ ] Only the selected story was implemented\n - [ ] Types compile after each file change\n - [ ] Tests written/updated as needed\n - [ ] No unrelated changes\n\n ---\n\n ## Phase 3: VALIDATE — Full Verification\n\n ### 3.1 Static Analysis\n\n ```bash\n bun run type-check && bun run lint\n ```\n\n **Must pass with zero errors and zero warnings.**\n\n **If lint fails:**\n 1. Run `bun run lint:fix` for auto-fixable issues\n 2. Manually fix remaining issues\n 3. Re-run lint\n 4. Proceed only when clean\n\n ### 3.2 Tests\n\n ```bash\n bun run test\n ```\n\n **All tests must pass.**\n\n **If tests fail:**\n 1. Read the failure output\n 2. Determine: bug in your implementation or pre-existing failure?\n 3. If your bug → fix the implementation (not the test)\n 4. If pre-existing → note it but don't fix unrelated tests\n 5. Re-run tests\n 6. Repeat until green\n\n ### 3.3 Format Check\n\n ```bash\n bun run format:check\n ```\n\n **If formatting fails:**\n ```bash\n bun run format\n ```\n\n ### 3.4 Verify Acceptance Criteria\n\n Go through EACH acceptance criterion from the story:\n - Is it satisfied by your implementation?\n - Can you verify it (read the code, run a command, check a file)?\n\n If a criterion is NOT met, go back to Phase 2 and fix it.\n\n **PHASE_3_CHECKPOINT:**\n - [ ] Type-check passes\n - [ ] Lint passes (0 errors, 0 warnings)\n - [ ] All tests pass\n - [ ] Format is clean\n - [ ] Every acceptance criterion verified\n\n ---\n\n ## Phase 4: COMMIT — Save Changes\n\n ### 4.1 Review Staged Changes\n\n ```bash\n git add -A\n git status\n git diff --cached --stat\n ```\n\n Verify only expected files are staged. If unexpected files appear, investigate before committing.\n\n ### 4.2 Write Commit Message\n\n ```bash\n git commit -m \"$(cat <<'EOF'\n feat: {story-title}\n\n Implements {story-id} from PRD.\n\n Changes:\n - {change 1}\n - {change 2}\n - {change 3}\n EOF\n )\"\n ```\n\n **Commit message rules:**\n - Prefix: `feat:` for features, `fix:` for bugs, `refactor:` for refactors\n - Title: the story title (not the PRD name)\n - Body: list the actual changes made\n - Do NOT include AI attribution\n\n **PHASE_4_CHECKPOINT:**\n - [ ] Only expected files committed\n - [ ] Commit message is clear and accurate\n - [ ] Working directory is clean after commit\n\n ---\n\n ## Phase 5: TRACK — Update Progress Files\n\n ### 5.1 Update prd.json\n\n Set `passes: true` and add a note for the completed story:\n\n ```json\n {\n \"id\": \"{story-id}\",\n \"passes\": true,\n \"notes\": \"Implemented in iteration {N}. Files: {list}.\"\n }\n ```\n\n After updating prd.json, emit the story completed event:\n ```bash\n bun run cli workflow event emit --run-id $WORKFLOW_ID --type ralph_story_completed --data '{\"story_id\":\"{story-id}\",\"title\":\"{story-title}\"}' || true\n ```\n\n ### 5.2 Update progress.txt\n\n **Append** to `{prd-dir}/progress.txt`:\n\n ```\n ## {ISO Date} — {story-id}: {story-title}\n\n **Status**: PASSED\n **Files changed**:\n - {file1} — {what changed}\n - {file2} — {what changed}\n\n **Acceptance criteria verified**:\n - [x] {criterion 1}\n - [x] {criterion 2}\n\n **Learnings**:\n - {Any pattern discovered}\n - {Any gotcha encountered}\n - {Any deviation from expected approach}\n\n ---\n ```\n\n ### 5.3 Update Codebase Patterns (if applicable)\n\n If you discovered a **reusable pattern** that future iterations should know about, **prepend** it to the `## Codebase Patterns` section at the TOP of progress.txt.\n\n Format:\n ```\n ## Codebase Patterns\n\n ### {Pattern Name}\n - **Where**: `{file:lines}`\n - **Pattern**: {description}\n - **Example**: `{code snippet}`\n ```\n\n If the `## Codebase Patterns` section doesn't exist yet, create it at the top of the file.\n\n **PHASE_5_CHECKPOINT:**\n - [ ] prd.json updated with `passes: true`\n - [ ] progress.txt appended with iteration details\n - [ ] Codebase patterns updated (if applicable)\n\n ---\n\n ## Phase 6: COMPLETE — Check All Stories\n\n ### 6.1 Re-read prd.json\n\n ```bash\n cat {prd-dir}/prd.json\n ```\n\n Count stories where `passes: false`.\n\n ### 6.2 If ALL Stories Pass\n\n 1. **Push the branch:**\n ```bash\n git push -u origin HEAD\n ```\n\n 2. **Read the PR template:**\n Look for a PR template in the repo — check `.github/pull_request_template.md`, `.github/PULL_REQUEST_TEMPLATE.md`, and `docs/pull_request_template.md`. Read whichever one exists.\n\n If a template was found, fill in **every section** using the context from this implementation. Don't skip sections or leave placeholders — fill them honestly based on the actual changes (summary, architecture, validation evidence, security, compatibility, rollback, etc.).\n\n If no template was found, write a summary with: problem, what changed, stories table, and validation evidence.\n\n 3. **Create a draft PR** using `gh pr create --draft --base $BASE_BRANCH --title \"feat: {PRD feature name}\"` with the filled-in template as the body. Use a HEREDOC for the body.\n\n 4. **Output completion signal:**\n ```\n COMPLETE\n ```\n\n ### 6.3 If Stories Remain\n\n Report status and end normally:\n ```\n ── Iteration Complete ──────────────────────────────\n Story completed: {story-id} — {story-title}\n Stories remaining: {count}\n Next eligible: {next-story-id} — {next-story-title}\n ────────────────────────────────────────────────────\n ```\n\n The loop engine will start the next iteration with a fresh context.\n\n ---\n\n ## Handling Edge Cases\n\n ### Validation fails repeatedly\n - If type-check or tests fail 3+ times on the same error, step back\n - Re-read the acceptance criteria — you may be misunderstanding the requirement\n - Check if the story is too large (needs breaking down)\n - Note the blocker in progress.txt and end the iteration\n\n ### Story is too large for one iteration\n - Implement the minimum viable subset that satisfies the most critical acceptance criteria\n - Set `passes: true` only if ALL criteria are met\n - If you can't meet all criteria, leave `passes: false` and note what's done in progress.txt\n - The next iteration will pick it up and continue\n\n ### Pre-existing test failures\n - If tests were failing BEFORE your changes, note them but don't fix unrelated code\n - Run only the test files related to your changes if the full suite has pre-existing issues\n - Document pre-existing failures in progress.txt\n\n ### Dependency install fails\n - Check if `bun.lock` or equivalent exists\n - Try `bun install` without `--frozen-lockfile`\n - Note the issue in progress.txt\n\n ### Git state is dirty at iteration start\n - This shouldn't happen (fresh worktree), but if it does:\n - Run `git status` to understand what's dirty\n - If it's leftover from a failed previous iteration, commit or stash\n - Never discard changes silently\n\n ### Blocked stories — all remaining have unmet dependencies\n - Report the dependency chain in your output\n - Check if a dependency was incorrectly left as `passes: false`\n - If a dependency should be `passes: true` (the code exists and works), fix prd.json\n - Otherwise, end the iteration — the loop will exhaust max_iterations\n\n ---\n\n ## File Format Reference\n\n ### prd.json Schema\n\n ```json\n {\n \"feature\": \"Feature Name\",\n \"issueNumber\": 123,\n \"userStories\": [\n {\n \"id\": \"US-001\",\n \"title\": \"Short title\",\n \"description\": \"As a..., I want..., so that...\",\n \"acceptanceCriteria\": [\"criterion 1\", \"criterion 2\"],\n \"technicalNotes\": \"Implementation hints\",\n \"dependsOn\": [\"US-000\"],\n \"priority\": 1,\n \"passes\": false,\n \"notes\": \"\"\n }\n ]\n }\n ```\n\n ### progress.txt Format\n\n ```\n ## Codebase Patterns\n\n ### {Pattern Name}\n - Where: `file:lines`\n - Pattern: description\n - Example: `code`\n\n ---\n\n ## {Date} — {story-id}: {title}\n\n **Status**: PASSED\n **Files changed**: ...\n **Acceptance criteria verified**: ...\n **Learnings**: ...\n\n ---\n ```\n\n ---\n\n ## Success Criteria\n\n - **ONE_STORY**: Exactly one story implemented per iteration\n - **VALIDATED**: Type-check + lint + tests + format all pass before commit\n - **COMMITTED**: Changes committed with clear message\n - **TRACKED**: prd.json and progress.txt updated accurately\n - **PATTERNS_SHARED**: Discovered patterns added to progress.txt for future iterations\n - **NO_SCOPE_CREEP**: No unrelated changes, no refactoring, no \"improvements\"\n until: COMPLETE\n max_iterations: 15\n fresh_context: true\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 5: COMPLETION REPORT\n # Reads final state and produces a summary.\n # ═══════════════════════════════════════════════════════════════\n\n - id: report\n depends_on: [implement]\n prompt: |\n # Completion Report\n\n The Ralph implementation loop has finished. Generate a completion report.\n\n ## Context\n\n **Loop output (last iteration):**\n\n $implement.output\n\n **Setup context:**\n\n $validate-prd.output\n\n ---\n\n ## Instructions\n\n ### 1. Read Final State\n\n Extract the `PRD_DIR=...` from the setup context above.\n Read the CURRENT files from disk:\n\n ```bash\n cat {prd-dir}/prd.json\n cat {prd-dir}/progress.txt\n ```\n\n ### 2. Gather Git Info\n\n ```bash\n git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD\n git diff --stat $(git merge-base HEAD $BASE_BRANCH)..HEAD\n ```\n\n ### 3. Check PR Status\n\n ```bash\n gh pr view HEAD --json url,number,state 2>/dev/null || echo \"No PR found\"\n ```\n\n ### 4. Generate Report\n\n Output this format:\n\n ```\n ═══════════════════════════════════════════════════════\n RALPH DAG — COMPLETION REPORT\n ═══════════════════════════════════════════════════════\n\n Feature: {feature name from prd.json}\n PRD: {prd-dir}\n Branch: {branch name}\n PR: {url or \"not created\"}\n\n ── Stories ─────────────────────────────────────────\n\n | ID | Title | Status |\n |----|-------|--------|\n {for each story from prd.json}\n\n Total: {N}/{M} stories passing\n\n ── Commits ─────────────────────────────────────────\n\n {git log output}\n\n ── Files Changed ─────────────────────────────────\n\n {git diff --stat output}\n\n ── Patterns Discovered ─────────────────────────────\n\n {from ## Codebase Patterns in progress.txt, or \"None\"}\n\n ═══════════════════════════════════════════════════════\n ```\n\n Keep it factual. No commentary — just the data.\n", + "archon-refactor-safely": "name: archon-refactor-safely\ndescription: |\n Use when: User wants to refactor code safely with continuous validation and behavior preservation.\n Triggers: \"refactor\", \"refactor safely\", \"split this file\", \"extract module\", \"break up\",\n \"decompose\", \"safe refactor\", \"split file\", \"extract into modules\".\n Does: Scans refactoring scope -> analyzes impact (read-only) -> plans ordered task list ->\n executes with type-check hooks after every edit -> validates full suite ->\n verifies behavior preservation (read-only) -> creates PR with before/after comparison.\n NOT for: Bug fixes (use archon-fix-github-issue), feature development (use archon-feature-development),\n general architecture sweeps (use archon-architect), PR reviews.\n\n Key safety features:\n - Analysis and verification nodes are read-only (denied_tools: [Write, Edit, Bash])\n - PreToolUse hooks check if each edit is in the plan\n - PostToolUse hooks force type-check after every file change\n - Behavior verification confirms no logic changes after refactoring\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: SCAN — Find files matching the refactoring target\n # ═══════════════════════════════════════════════════════════════\n\n - id: scan-scope\n bash: |\n echo \"=== REFACTORING TARGET ===\"\n echo \"User request: $ARGUMENTS\"\n echo \"\"\n\n echo \"=== FILE SIZE ANALYSIS (source files by size) ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts' \\\n -exec wc -l {} + 2>/dev/null | sort -rn | head -30\n echo \"\"\n\n echo \"=== FILES OVER 500 LINES ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts' \\\n -exec sh -c 'lines=$(wc -l < \"$1\"); if [ \"$lines\" -gt 500 ]; then echo \"$lines $1\"; fi' _ {} \\; 2>/dev/null | sort -rn\n echo \"\"\n\n echo \"=== FUNCTION COUNT PER FILE (top 20) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts'); do\n count=$(grep -cE '^\\s*(export\\s+)?(async\\s+)?function\\s|=>\\s*\\{' \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count functions: $f\"\n fi\n done | sort -rn | head -20\n echo \"\"\n\n echo \"=== EXPORT ANALYSIS (files with many exports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts'); do\n count=$(grep -c \"^export \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count exports: $f\"\n fi\n done | sort -rn | head -20\n timeout: 60000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: ANALYZE IMPACT — Read-only deep analysis\n # Maps call sites, identifies risk areas, understands dependencies\n # ═══════════════════════════════════════════════════════════════\n\n - id: analyze-impact\n prompt: |\n You are a senior software engineer analyzing code for a safe refactoring.\n\n ## Refactoring Request\n\n $ARGUMENTS\n\n ## Codebase Scan Results\n\n $scan-scope.output\n\n ## Instructions\n\n 1. Identify the PRIMARY file(s) targeted for refactoring based on the user's request\n and the scan results above\n 2. Read each target file thoroughly — understand every function, type, and export\n 3. For each target file, map ALL call sites:\n - Use Grep to find every import of the target file across the codebase\n - Track which specific exports are used and where\n - Note any dynamic imports or re-exports through index files\n 4. Identify risk areas:\n - Functions with complex internal dependencies (shared closures, module-level state)\n - Circular dependencies between functions in the file\n - Any module-level side effects (top-level `const`, initialization code)\n - Exports that are part of the public API vs internal-only\n 5. Check for existing tests:\n - Find test files for the target module(s)\n - Note what's tested and what isn't\n\n ## Output\n\n Write a thorough impact analysis to `$ARTIFACTS_DIR/impact-analysis.md` with:\n\n ### Target Files\n - File path, line count, function count\n - List of all exported symbols with brief descriptions\n\n ### Dependency Map\n - Which files import from the target (with specific imports used)\n - Which files the target imports from\n\n ### Risk Assessment\n - Module-level state or side effects\n - Complex internal dependencies between functions\n - Public API surface that must be preserved exactly\n\n ### Test Coverage\n - Existing test files and what they cover\n - Critical paths that must remain tested\n\n ### Recommended Decomposition Strategy\n - Suggested module boundaries (which functions group together)\n - Rationale for each grouping (cohesion, shared dependencies)\n depends_on: [scan-scope]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: PLAN REFACTOR — Ordered task list with rollback strategy\n # Read-only: produces the plan, does not execute it\n # ═══════════════════════════════════════════════════════════════\n\n - id: plan-refactor\n prompt: |\n You are planning a safe refactoring. You must produce a precise, ordered plan\n that another agent will follow literally.\n\n ## Impact Analysis\n\n $analyze-impact.output\n\n ## Refactoring Goal\n\n $ARGUMENTS\n\n ## Principles\n\n - **Behavior preservation**: The refactoring must NOT change any behavior — only structure\n - **Incremental**: Each step must leave the codebase in a compilable state\n - **Reversible**: Each step can be independently reverted\n - **No mixed concerns**: Do not combine refactoring with bug fixes or improvements\n - **Preserve public API**: All existing exports must remain accessible from the same import paths\n - **Maximum file size**: Target 500 lines or fewer per file after refactoring\n\n ## Instructions\n\n 1. Read the impact analysis from `$ARTIFACTS_DIR/impact-analysis.md`\n 2. Read the target file(s) to understand the current structure\n 3. Design the decomposition:\n - Group related functions into cohesive modules\n - Identify shared utilities, types, and constants\n - Plan the new file structure with descriptive names\n 4. Write an ordered task list where each task is:\n - Independent and leaves code compilable after completion\n - Specific about what to extract and where\n - Clear about import updates needed\n\n ## Output\n\n Write the plan to `$ARTIFACTS_DIR/refactor-plan.md` with:\n\n ### File Structure (Before)\n ```\n [current structure with line counts]\n ```\n\n ### File Structure (After)\n ```\n [planned structure with estimated line counts]\n ```\n\n ### Ordered Tasks\n\n For each task:\n ```\n ## Task N: [brief description]\n\n **Action**: CREATE | EXTRACT | UPDATE\n **Source**: [source file]\n **Target**: [target file]\n **What moves**:\n - function functionName (lines X-Y)\n - type TypeName (lines X-Y)\n\n **Import updates needed**:\n - [file]: change import from [old] to [new]\n\n **Rollback**: [how to undo this specific step]\n ```\n\n ### Validation Commands\n - Type check: `bun run type-check`\n - Lint: `bun run lint`\n - Tests: `bun run test`\n - Format: `bun run format:check`\n depends_on: [analyze-impact]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: EXECUTE REFACTOR — Implements the plan with guardrails\n # Hooks enforce type-check after every edit and plan adherence\n # ═══════════════════════════════════════════════════════════════\n\n - id: execute-refactor\n model: opus[1m]\n prompt: |\n You are executing a refactoring plan with strict safety guardrails.\n\n ## Plan\n\n Read the full plan from `$ARTIFACTS_DIR/refactor-plan.md` — follow it LITERALLY.\n\n ## Rules\n\n - **Follow the plan exactly** — do not add extra improvements or cleanups\n - **One task at a time** — complete each task fully before starting the next\n - **Type-check after every file change** — you'll be prompted to do this after each edit\n - **Preserve all behavior** — refactoring means moving code, not changing it\n - **Preserve the public API** — if the original file exported something, it must still be\n importable from the same path (use re-exports in the original file if needed)\n - **Update all import sites** — every file that imported from the original must be updated\n - **Commit after each logical task** — one commit per plan task with a clear message\n\n ## Process for Each Task\n\n 1. Read the plan task\n 2. Read the source file to understand current state\n 3. Create the new file (if extracting) with the functions/types being moved\n 4. Update the source file to remove the moved code and add imports from the new file\n 5. Update the original file's exports to re-export from the new module (API preservation)\n 6. Use Grep to find and update ALL import sites across the codebase\n 7. Run `bun run type-check` to verify (you'll be reminded by hooks)\n 8. Commit: `git add -A && git commit -m \"refactor: [task description]\"`\n 9. Move to next task\n\n ## Handling Problems\n\n - If type-check fails after a change: fix it immediately before proceeding\n - If a task is more complex than planned: complete it anyway, note the deviation\n - If you discover the plan missed an import site: update it and note it\n - NEVER skip a task — complete them in order\n depends_on: [plan-refactor]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n Before modifying this file: Is this file in your refactoring plan\n ($ARTIFACTS_DIR/refactor-plan.md)? If it's not a planned target file\n AND not a file that imports from the target, explain why you're touching it.\n Unplanned changes increase risk.\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just modified a file. STOP and do these things NOW before making any\n other changes:\n 1. Run `bun run type-check` to verify the change compiles\n 2. If type-check fails, fix the error immediately\n 3. Verify you preserved the exact same behavior — no logic changes, only structural moves\n Only proceed to the next change after type-check passes.\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Check the exit code. If type-check or any validation failed, fix the issue\n before continuing. Do not accumulate broken state.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE — Full test suite (bash, no AI escape hatch)\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n bash: |\n echo \"=== TYPE CHECK ===\"\n bun run type-check 2>&1\n TC_EXIT=$?\n\n echo \"\"\n echo \"=== LINT ===\"\n bun run lint 2>&1\n LINT_EXIT=$?\n\n echo \"\"\n echo \"=== FORMAT CHECK ===\"\n bun run format:check 2>&1\n FMT_EXIT=$?\n\n echo \"\"\n echo \"=== TESTS ===\"\n bun run test 2>&1\n TEST_EXIT=$?\n\n echo \"\"\n echo \"=== FILE SIZE CHECK ===\"\n echo \"Files still over 500 lines:\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts' \\\n -exec sh -c 'lines=$(wc -l < \"$1\"); if [ \"$lines\" -gt 500 ]; then echo \"$lines $1\"; fi' _ {} \\; 2>/dev/null | sort -rn\n echo \"\"\n\n echo \"=== RESULTS ===\"\n echo \"Type check: $([ $TC_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Lint: $([ $LINT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Format: $([ $FMT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Tests: $([ $TEST_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n\n if [ $TC_EXIT -eq 0 ] && [ $LINT_EXIT -eq 0 ] && [ $FMT_EXIT -eq 0 ] && [ $TEST_EXIT -eq 0 ]; then\n echo \"VALIDATION_STATUS: PASS\"\n else\n echo \"VALIDATION_STATUS: FAIL\"\n fi\n depends_on: [execute-refactor]\n timeout: 300000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: FIX VALIDATION FAILURES (if any)\n # Only does real work if validation failed\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-failures\n prompt: |\n Review the validation output below.\n\n ## Validation Output\n\n $validate.output\n\n ## Instructions\n\n If the output ends with \"VALIDATION_STATUS: PASS\", respond with\n \"All checks passed — no fixes needed.\" and stop.\n\n If there are failures:\n\n 1. Read the validation failures carefully\n 2. Fix ONLY what's broken — do not make additional improvements\n 3. If a fix requires changing behavior (not just fixing a type/lint error),\n revert the original change instead\n 4. Run the specific failing check after each fix to confirm it passes\n 5. After all fixes, run the full validation suite: `bun run validate`\n\n If there are files still over 500 lines, note them but do NOT attempt further\n splitting in this node — that would require a new plan cycle.\n depends_on: [validate]\n context: fresh\n hooks:\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just made a fix. Run the specific failing validation check NOW\n to verify your fix works. Do not batch fixes — verify each one.\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n You are fixing validation failures only. Do not make any changes\n beyond what's needed to pass the failing checks. If in doubt, revert\n the original change that caused the failure.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: VERIFY BEHAVIOR — Read-only confirmation\n # Ensures the refactoring preserved behavior by tracing call paths\n # ═══════════════════════════════════════════════════════════════\n\n - id: verify-behavior\n prompt: |\n You are a code reviewer verifying that a refactoring preserved exact behavior.\n You can ONLY read files — you cannot make any changes.\n\n ## Refactoring Plan\n\n Read the plan from `$ARTIFACTS_DIR/refactor-plan.md` to understand what was intended.\n\n ## Instructions\n\n 1. Use Grep and Glob to find all files in the new module locations listed in\n the plan, then Read each one. (Note: Bash is denied in this read-only node,\n so use Grep/Glob/Read to discover changes instead of git commands.)\n 2. For each new file created by the refactoring:\n - Verify the extracted functions match the originals exactly (no logic changes)\n - Check that all types and interfaces are preserved\n 3. For the original file(s):\n - Verify re-exports exist for all symbols that were previously exported\n - Confirm no function bodies were changed (only moved)\n 4. For all import sites updated:\n - Verify imports resolve to the correct new locations\n - Check that no import was missed\n 5. Verify the public API is preserved:\n - Any code that imported from the original file should still work unchanged\n - Re-exports in the original file should cover all moved symbols\n\n ## Output\n\n Write your verification report to `$ARTIFACTS_DIR/behavior-verification.md`:\n\n ### Verdict: PASS | FAIL\n\n ### Functions Verified\n | Function | Original Location | New Location | Behavior Preserved |\n |----------|------------------|--------------|-------------------|\n | funcName | file.ts:42 | new-file.ts:10 | Yes/No |\n\n ### Public API Check\n - [ ] All original exports still accessible from original import path\n - [ ] Re-exports correctly configured\n\n ### Import Sites Updated\n - [ ] All N import sites verified\n\n ### Issues Found\n [List any behavior changes detected, or \"None — refactoring is behavior-preserving\"]\n depends_on: [fix-failures]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 8: CREATE PR — Detailed description with before/after\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a pull request for the refactoring.\n\n ## Context\n\n - **Refactoring goal**: $ARGUMENTS\n - **Impact analysis**: Read `$ARTIFACTS_DIR/impact-analysis.md`\n - **Refactoring plan**: Read `$ARTIFACTS_DIR/refactor-plan.md`\n - **Validation**: $validate.output\n - **Behavior verification**: Read `$ARTIFACTS_DIR/behavior-verification.md`\n\n ## Instructions\n\n 1. Stage all changes and create a final commit if there are uncommitted changes\n 2. Push the branch: `git push -u origin HEAD`\n 3. Check if a PR already exists: `gh pr list --head $(git branch --show-current)`\n 4. Create the PR with the format below\n 5. Save the PR URL to `$ARTIFACTS_DIR/.pr-url`\n\n ## PR Format\n\n - **Title**: `refactor: [concise description]` (under 70 chars)\n - **Body**:\n\n ```markdown\n ## Refactoring: [goal]\n\n ### Motivation\n\n [Why this refactoring was needed — file sizes, complexity, maintainability]\n\n ### Before\n\n ```\n [Original file structure with line counts from the plan]\n ```\n\n ### After\n\n ```\n [New file structure with line counts]\n ```\n\n ### Changes\n\n [For each new module: what was extracted and why it's a cohesive unit]\n\n ### Safety\n\n - [x] Type check passes\n - [x] Lint passes\n - [x] Tests pass (all existing tests still green)\n - [x] Public API preserved (re-exports maintain backward compatibility)\n - [x] Behavior verification passed (read-only audit confirmed no logic changes)\n - [x] Each task committed separately for easy review/revert\n\n ### Review Guide\n\n Each commit represents one extraction step. Review commits individually for easiest review.\n All commits are behavior-preserving structural moves.\n ```\n depends_on: [verify-behavior]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n permissionDecision: deny\n permissionDecisionReason: \"PR creation node — do not modify source files. Use only git and gh commands.\"\n PostToolUse:\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Verify this command succeeded. If git push or gh pr create failed,\n read the error message carefully before retrying.\n", "archon-remotion-generate": "name: archon-remotion-generate\ndescription: |\n Use when: User wants to generate or modify a Remotion video composition using AI.\n Triggers: \"create a video\", \"generate video\", \"remotion\", \"make an animation\",\n \"video about\", \"animate\".\n Does: AI writes Remotion React code -> renders preview stills -> renders full video ->\n summarizes the output.\n Requires: A Remotion project in the working directory (src/index.ts, src/Root.tsx).\n Optional: Install the remotion-best-practices skill for higher quality output:\n npx skills add remotion-dev/skills\n\nnodes:\n # ── Layer 0: Check project structure ──────────────────────────────────\n - id: check-project\n bash: |\n if [ ! -f \"src/index.ts\" ] || [ ! -f \"src/Root.tsx\" ]; then\n echo \"ERROR: Not a Remotion project. Expected src/index.ts and src/Root.tsx.\"\n echo \"Run 'npx create-video@latest' first, then run this workflow from that directory.\"\n exit 1\n fi\n echo \"Remotion project detected.\"\n npx remotion compositions src/index.ts 2>&1 | tail -5\n echo \"\"\n echo \"PROJECT_READY\"\n timeout: 60000\n\n # ── Layer 1: Generate composition code ────────────────────────────────\n - id: generate\n prompt: |\n You are working in a Remotion video project. The project root is the current directory.\n\n Find and read the existing composition files to understand the project structure.\n Look in src/ for Root.tsx and any composition components.\n\n Now create or modify the composition to match this request:\n\n $ARGUMENTS\n\n Rules:\n - Use useCurrentFrame() and interpolate()/spring() for ALL animations\n - Never use CSS transitions, Math.random(), setTimeout, or Date.now()\n - Use AbsoluteFill for layout, Sequence for scene timing\n - Use the component from 'remotion' (not native ) for images\n - Keep dimensions 1920x1080 at 30 fps unless the user specifies otherwise\n - Update the Zod schema and defaultProps in Root.tsx if you change props\n - Use even numbers for width/height (required for MP4)\n - Always clamp interpolations: extrapolateLeft: 'clamp', extrapolateRight: 'clamp'\n\n After writing the code, read it back to verify it looks correct.\n depends_on: [check-project]\n skills:\n - remotion-best-practices\n allowed_tools:\n - Read\n - Write\n - Edit\n - Glob\n\n # ── Layer 2: Render preview stills ────────────────────────────────────\n - id: render-preview\n bash: |\n mkdir -p out\n COMP_ID=$(npx remotion compositions src/index.ts 2>&1 | grep -E '^\\S' | head -1 | awk '{print $1}')\n if [ -z \"$COMP_ID\" ]; then\n echo \"RENDER_FAILED: Could not detect composition ID\"\n exit 1\n fi\n echo \"Composition: $COMP_ID\"\n\n DURATION=$(npx remotion compositions src/index.ts 2>&1 | grep -E '^\\S' | head -1 | awk '{print $4}')\n MID_FRAME=$(( ${DURATION:-150} / 2 ))\n LATE_FRAME=$(( ${DURATION:-150} * 3 / 4 ))\n\n echo \"Rendering preview stills at frames 1, $MID_FRAME, $LATE_FRAME...\"\n npx remotion still src/index.ts \"$COMP_ID\" out/preview-early.png --frame=1 2>&1 | tail -2\n npx remotion still src/index.ts \"$COMP_ID\" out/preview-mid.png --frame=$MID_FRAME 2>&1 | tail -2\n npx remotion still src/index.ts \"$COMP_ID\" out/preview-late.png --frame=$LATE_FRAME 2>&1 | tail -2\n RESULT=$?\n\n if [ $RESULT -eq 0 ]; then\n echo \"\"\n echo \"RENDER_SUCCESS\"\n ls -la out/preview-*.png\n else\n echo \"RENDER_FAILED\"\n fi\n depends_on: [generate]\n timeout: 120000\n\n # ── Layer 3: Render full video ────────────────────────────────────────\n - id: render-video\n bash: |\n COMP_ID=$(npx remotion compositions src/index.ts 2>&1 | grep -E '^\\S' | head -1 | awk '{print $1}')\n echo \"Rendering full video: $COMP_ID\"\n npx remotion render src/index.ts \"$COMP_ID\" out/video.mp4 --codec=h264 --crf=18 2>&1 | tail -10\n RESULT=$?\n\n if [ $RESULT -eq 0 ]; then\n echo \"\"\n echo \"VIDEO_RENDER_SUCCESS\"\n ls -la out/video.mp4\n else\n echo \"VIDEO_RENDER_FAILED\"\n fi\n depends_on: [render-preview]\n timeout: 300000\n\n # ── Layer 4: Summary ──────────────────────────────────────────────────\n - id: summary\n prompt: |\n A Remotion video was generated and rendered.\n\n Original request: $ARGUMENTS\n\n Preview render: $render-preview.output\n Video render: $render-video.output\n\n Read the generated composition code and the preview stills (out/preview-early.png,\n out/preview-mid.png, out/preview-late.png) to verify the output.\n\n Summarize:\n 1. What the video contains (based on code and stills)\n 2. Whether the renders succeeded\n 3. Where the output file is (out/video.mp4)\n depends_on: [render-video]\n allowed_tools:\n - Read\n model: haiku\n", "archon-resolve-conflicts": "name: archon-resolve-conflicts\ndescription: |\n Use when: PR has merge conflicts that need resolution.\n Triggers: \"resolve conflicts\", \"fix merge conflicts\", \"rebase this PR\", \"resolve this\",\n \"fix conflicts\", \"merge conflicts\", \"rebase and fix\".\n Does: Fetches latest base branch -> analyzes conflicts -> auto-resolves simple conflicts ->\n presents options for complex conflicts -> commits and pushes resolution.\n NOT for: PRs without conflicts, general rebasing without conflicts, squashing commits.\n\n This workflow helps resolve merge conflicts by analyzing the conflicting changes,\n automatically resolving where intent is clear, and presenting options for complex conflicts.\n\nnodes:\n - id: resolve\n command: archon-resolve-merge-conflicts\n", "archon-smart-pr-review": "name: archon-smart-pr-review\ndescription: |\n Use when: User wants a smart, efficient PR review that adapts to PR complexity.\n Triggers: \"smart review\", \"review this PR\", \"review PR #123\", \"efficient review\",\n \"smart PR review\", \"quick review\".\n Does: Gathers PR scope -> classifies complexity -> routes to only relevant review agents ->\n synthesizes findings -> auto-fixes CRITICAL/HIGH issues.\n NOT for: When you explicitly want ALL review agents (use archon-comprehensive-pr-review instead).\n\n Unlike the comprehensive review, this workflow classifies the PR first and only runs\n the review agents that are relevant. A 3-line typo fix skips test-coverage and docs-impact.\n\nnodes:\n - id: scope\n command: archon-pr-review-scope\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [scope]\n\n - id: classify\n prompt: |\n You are a PR complexity classifier. Analyze the PR scope below and determine\n which review agents should run.\n\n ## PR Scope\n $scope.output\n\n ## Rules\n - **Code review**: Always run unless the diff is empty or only touches non-code files\n (e.g. README-only, config-only, or .yaml-only changes).\n - **Error handling**: Run if the diff touches code with try/catch, error handling,\n async/await, or adds new failure paths.\n - **Test coverage**: Run if the diff touches source code (not just tests, docs, or config).\n - **Comment quality**: Run if the diff adds or modifies comments, docstrings, JSDoc,\n or significant documentation within code files.\n - **Docs impact**: Run if the diff adds/removes/renames public APIs, commands, CLI flags,\n environment variables, or user-facing features.\n\n Classify the PR complexity:\n - **trivial**: Typo fixes, formatting, single-line changes, version bumps\n - **small**: 1-3 files, straightforward logic, no architectural changes\n - **medium**: 4-10 files, moderate logic changes, some cross-cutting concerns\n - **large**: 10+ files, architectural changes, new subsystems, complex refactors\n\n Provide your reasoning for each decision.\n depends_on: [scope]\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n run_code_review:\n type: string\n enum: [\"true\", \"false\"]\n run_error_handling:\n type: string\n enum: [\"true\", \"false\"]\n run_test_coverage:\n type: string\n enum: [\"true\", \"false\"]\n run_comment_quality:\n type: string\n enum: [\"true\", \"false\"]\n run_docs_impact:\n type: string\n enum: [\"true\", \"false\"]\n complexity:\n type: string\n enum: [\"trivial\", \"small\", \"medium\", \"large\"]\n reasoning:\n type: string\n required:\n - run_code_review\n - run_error_handling\n - run_test_coverage\n - run_comment_quality\n - run_docs_impact\n - complexity\n - reasoning\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [classify, sync]\n when: \"$classify.output.run_code_review == 'true'\"\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [classify, sync]\n when: \"$classify.output.run_error_handling == 'true'\"\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [classify, sync]\n when: \"$classify.output.run_test_coverage == 'true'\"\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [classify, sync]\n when: \"$classify.output.run_comment_quality == 'true'\"\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [classify, sync]\n when: \"$classify.output.run_docs_impact == 'true'\"\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n\n # Optional: push notification when review completes.\n # To enable, create .archon/mcp/ntfy.json — see docs/mcp-servers.md\n - id: check-ntfy\n bash: \"test -f .archon/mcp/ntfy.json && echo 'true' || echo 'false'\"\n depends_on: [implement-fixes]\n\n - id: notify\n depends_on: [check-ntfy, synthesize, implement-fixes]\n when: \"$check-ntfy.output == 'true'\"\n trigger_rule: all_success\n mcp: .archon/mcp/ntfy.json\n allowed_tools: []\n prompt: |\n Send a push notification summarizing the PR review results.\n\n Review synthesis:\n $synthesize.output\n\n Fix results:\n $implement-fixes.output\n\n Send with:\n - title: \"PR Review Complete\"\n - message: 1-2 sentence summary — verdict and issue count. Short enough for a lock screen.\n - priority: 3 if ready to merge, 4 if needs fixes, 5 if critical issues remain\n", "archon-test-loop-dag": "name: archon-test-loop-dag\ndescription: |\n Use when: User explicitly says \"test-loop-dag\" or \"run test-loop-dag\".\n IMPORTANT: This is a DAG workflow with a loop node that iterates until completion.\n NOT for: General testing questions or debugging.\n Does: Initializes a counter, iterates until it reaches 3, then reports completion.\n\nnodes:\n - id: setup\n bash: |\n echo \"0\" > .archon/test-loop-dag-counter.txt\n echo \"Counter initialized to 0\"\n\n - id: loop-counter\n depends_on: [setup]\n loop:\n prompt: |\n You are testing the loop node functionality within a DAG workflow.\n\n ## Your Task\n\n 1. Read the file `.archon/test-loop-dag-counter.txt`\n 2. Parse the current counter value\n 3. Increment it by 1\n 4. Write the new value back to the file\n 5. Report the current iteration\n\n ## User Intent\n\n $USER_MESSAGE\n\n ## Completion Criteria\n\n - If the counter reaches 3 or higher, output: COMPLETE\n - Otherwise, just report your progress and end normally\n\n ## Important\n\n Be concise. Just do the task and report the counter value.\n until: COMPLETE\n max_iterations: 5\n fresh_context: false\n\n - id: report\n depends_on: [loop-counter]\n prompt: |\n The loop counter test has completed. The loop node output was:\n\n $loop-counter.output\n\n Read `.archon/test-loop-dag-counter.txt` and confirm the final counter value.\n Report: \"Test loop DAG completed successfully. Final counter: {value}\"\n", "archon-validate-pr": "name: archon-validate-pr\ndescription: |\n Use when: User wants a thorough PR validation that tests both main (bug present) and feature branch (bug fixed).\n Triggers: \"validate PR\", \"validate pr #123\", \"test this PR\", \"verify PR\", \"full PR validation\",\n \"validate pull request\", \"test PR end-to-end\".\n Does: Fetches PR info -> finds free ports -> parallel code review (main vs feature) ->\n E2E test on main (reproduce bug) -> E2E test on feature (verify fix) -> final verdict report.\n NOT for: Quick code-only reviews (use archon-smart-pr-review), fixing issues, general exploration.\n\n This workflow is designed for running in parallel — each instance finds its own free ports\n to avoid conflicts. Produces artifacts in $ARTIFACTS_DIR/ and posts a validation report.\n\nprovider: claude\nmodel: opus\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: SETUP — Fetch PR info and allocate ports\n # ═══════════════════════════════════════════════════════════════\n\n - id: fetch-pr\n bash: |\n # Extract PR number from arguments\n PR_NUMBER=$(echo \"$ARGUMENTS\" | grep -oE '/pull/[0-9]+' | grep -oE '[0-9]+' | head -1)\n # Fallback: extract first number if no URL path found (e.g., \"validate PR 42\")\n if [ -z \"$PR_NUMBER\" ]; then\n PR_NUMBER=$(echo \"$ARGUMENTS\" | grep -oE '[0-9]+' | head -1)\n fi\n if [ -z \"$PR_NUMBER\" ]; then\n # Try getting PR from current branch\n PR_NUMBER=$(gh pr view --json number -q '.number' 2>/dev/null)\n fi\n\n if [ -z \"$PR_NUMBER\" ]; then\n echo \"ERROR: No PR number found in arguments: $ARGUMENTS\"\n exit 1\n fi\n\n echo \"$PR_NUMBER\" > \"$ARTIFACTS_DIR/.pr-number\"\n\n # Fetch full PR details\n gh pr view \"$PR_NUMBER\" --json number,title,body,url,headRefName,baseRefName,files,additions,deletions,changedFiles,state,author,labels,isDraft\n\n - id: find-ports\n bash: |\n # Use Bun to let the OS pick truly free ports (cross-platform: Linux, macOS, Windows)\n BACKEND_PORT=$(bun -e \"const s = Bun.serve({port: 0, fetch: () => new Response('')}); console.log(s.port); s.stop()\")\n FRONTEND_PORT=$(bun -e \"const s = Bun.serve({port: 0, fetch: () => new Response('')}); console.log(s.port); s.stop()\")\n\n echo \"$BACKEND_PORT\" > \"$ARTIFACTS_DIR/.backend-port\"\n echo \"$FRONTEND_PORT\" > \"$ARTIFACTS_DIR/.frontend-port\"\n\n echo \"BACKEND_PORT=$BACKEND_PORT\"\n echo \"FRONTEND_PORT=$FRONTEND_PORT\"\n\n - id: resolve-paths\n bash: |\n # Resolve canonical repo path (main branch) vs worktree path (feature branch)\n CANONICAL_REPO=$(git rev-parse --path-format=absolute --git-common-dir 2>/dev/null | sed 's|/\\.git$||')\n WORKTREE_PATH=$(pwd)\n FEATURE_BRANCH=$(git branch --show-current)\n\n # Get PR branch info\n PR_NUMBER=$(cat \"$ARTIFACTS_DIR/.pr-number\")\n PR_HEAD=$(gh pr view \"$PR_NUMBER\" --json headRefName -q '.headRefName')\n PR_BASE=$(gh pr view \"$PR_NUMBER\" --json baseRefName -q '.baseRefName')\n\n echo \"$CANONICAL_REPO\" > \"$ARTIFACTS_DIR/.canonical-repo\"\n echo \"$WORKTREE_PATH\" > \"$ARTIFACTS_DIR/.worktree-path\"\n echo \"$FEATURE_BRANCH\" > \"$ARTIFACTS_DIR/.feature-branch\"\n echo \"$PR_HEAD\" > \"$ARTIFACTS_DIR/.pr-head\"\n echo \"$PR_BASE\" > \"$ARTIFACTS_DIR/.pr-base\"\n\n echo \"CANONICAL_REPO=$CANONICAL_REPO\"\n echo \"WORKTREE_PATH=$WORKTREE_PATH\"\n echo \"FEATURE_BRANCH=$FEATURE_BRANCH\"\n echo \"PR_HEAD=$PR_HEAD\"\n echo \"PR_BASE=$PR_BASE\"\n depends_on: [fetch-pr]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: CODE REVIEW — Parallel analysis of main vs feature\n # ═══════════════════════════════════════════════════════════════\n\n - id: code-review-main\n command: archon-validate-pr-code-review-main\n depends_on: [fetch-pr, resolve-paths]\n context: fresh\n\n - id: code-review-feature\n command: archon-validate-pr-code-review-feature\n depends_on: [fetch-pr, resolve-paths, code-review-main]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: E2E TESTING — Sequential (after code reviews finish)\n # ═══════════════════════════════════════════════════════════════\n\n - id: classify-testability\n prompt: |\n You are a PR testability classifier. Determine whether this PR's changes can be\n validated via browser E2E testing, or if it requires code-review-only validation.\n\n ## PR Details\n\n $fetch-pr.output\n\n ## Rules\n\n - **e2e_testable**: Changes affect the Web UI (components, hooks, styles, API routes\n that serve the frontend, SSE streaming, layout, user-visible behavior). These can be\n validated by starting Archon and using agent-browser to interact with the UI.\n - **code_review_only**: Changes are purely backend logic, CLI-only, workflow engine,\n database schemas, git operations, build tooling, tests, documentation, or other\n non-UI code. No visual validation possible.\n\n Consider: even if a change is backend, if it affects what the frontend displays\n (e.g., API response format changes, SSE event changes), it IS e2e_testable.\n depends_on: [fetch-pr]\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n testable:\n type: string\n enum: [\"e2e_testable\", \"code_review_only\"]\n reasoning:\n type: string\n test_plan:\n type: string\n required: [testable, reasoning, test_plan]\n\n - id: e2e-test-main\n command: archon-validate-pr-e2e-main\n depends_on: [classify-testability, find-ports, resolve-paths, code-review-main, code-review-feature]\n when: \"$classify-testability.output.testable == 'e2e_testable'\"\n context: fresh\n idle_timeout: 1800000\n\n - id: e2e-test-feature\n command: archon-validate-pr-e2e-feature\n depends_on: [e2e-test-main, find-ports, resolve-paths]\n when: \"$classify-testability.output.testable == 'e2e_testable'\"\n context: fresh\n idle_timeout: 1800000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: FINAL REPORT — Synthesize all findings\n # ═══════════════════════════════════════════════════════════════\n\n - id: cleanup-processes\n bash: |\n # Safety net: kill any orphaned processes from E2E testing\n # This runs after E2E nodes complete (or timeout/fail) to prevent process accumulation\n BACKEND_PORT=$(cat \"$ARTIFACTS_DIR/.backend-port\" 2>/dev/null | tr -d '\\n')\n FRONTEND_PORT=$(cat \"$ARTIFACTS_DIR/.frontend-port\" 2>/dev/null | tr -d '\\n')\n\n if [ -z \"$BACKEND_PORT\" ] || [ -z \"$FRONTEND_PORT\" ]; then\n echo \"No port files found — skipping cleanup\"\n exit 0\n fi\n\n echo \"Cleaning up ports $BACKEND_PORT and $FRONTEND_PORT...\"\n\n # Kill by all recorded PID files\n for pidfile in \"$ARTIFACTS_DIR\"/.e2e-*-pid; do\n if [ -f \"$pidfile\" ]; then\n PID=$(cat \"$pidfile\" | tr -d '\\n')\n echo \"Killing PID $PID from $pidfile\"\n kill \"$PID\" 2>/dev/null || taskkill //F //T //PID \"$PID\" 2>/dev/null || true\n fi\n done\n\n # Kill by port (cross-platform fallback)\n for PORT in $BACKEND_PORT $FRONTEND_PORT; do\n fuser -k \"$PORT/tcp\" 2>/dev/null || true\n lsof -ti:\"$PORT\" 2>/dev/null | xargs kill -9 2>/dev/null || true\n netstat -ano 2>/dev/null | grep \":$PORT \" | grep LISTENING | awk '{print $5}' | sort -u | while read pid; do\n taskkill //F //T //PID \"$pid\" 2>/dev/null || true\n done\n done\n\n # pkill fallback: catch processes that escaped PID/port cleanup\n pkill -f \"PORT=$BACKEND_PORT.*bun\" 2>/dev/null || true\n pkill -f \"vite.*port.*$FRONTEND_PORT\" 2>/dev/null || true\n\n # Close this workflow's browser session only (scoped by session ID)\n BROWSER_SESSION=$(cat \"$ARTIFACTS_DIR/.browser-session\" 2>/dev/null | tr -d '\\n')\n if [ -n \"$BROWSER_SESSION\" ]; then\n agent-browser --session \"$BROWSER_SESSION\" close 2>/dev/null || true\n fi\n\n # Remove main E2E worktree if it still exists (safety net)\n CANONICAL_REPO=$(cat \"$ARTIFACTS_DIR/.canonical-repo\" 2>/dev/null | tr -d '\\n')\n MAIN_E2E_PATH=$(cat \"$ARTIFACTS_DIR/.e2e-main-worktree\" 2>/dev/null | tr -d '\\n')\n if [ -n \"$MAIN_E2E_PATH\" ] && [ -n \"$CANONICAL_REPO\" ] && [ -d \"$MAIN_E2E_PATH\" ]; then\n echo \"Removing leftover main E2E worktree: $MAIN_E2E_PATH\"\n git -C \"$CANONICAL_REPO\" worktree remove \"$MAIN_E2E_PATH\" --force 2>/dev/null || rm -rf \"$MAIN_E2E_PATH\"\n fi\n\n sleep 1\n echo \"Process cleanup complete\"\n depends_on: [e2e-test-main, e2e-test-feature]\n trigger_rule: all_done\n\n - id: final-report\n command: archon-validate-pr-report\n depends_on: [code-review-main, code-review-feature, e2e-test-main, e2e-test-feature, classify-testability, cleanup-processes]\n trigger_rule: all_done\n context: fresh\n", - "archon-workflow-builder": "name: archon-workflow-builder\ndescription: |\n Use when: User wants to create a new custom workflow for their project.\n Triggers: \"build me a workflow\", \"create a workflow\", \"generate a workflow\",\n \"new workflow\", \"make a workflow for\", \"workflow builder\".\n Does: Scans codebase -> extracts intent (JSON) -> generates YAML -> validates -> saves.\n NOT for: Editing existing workflows or creating non-workflow files.\n\nnodes:\n - id: scan-codebase\n bash: |\n echo \"=== Existing Commands ===\"\n if [ -d \".archon/commands\" ]; then\n find .archon/commands -type f -name \"*.md\" 2>/dev/null | head -30\n else\n echo \"(no .archon/commands/ directory)\"\n fi\n\n echo \"\"\n echo \"=== Existing Workflows ===\"\n if [ -d \".archon/workflows\" ]; then\n find .archon/workflows -type f \\( -name \"*.yaml\" -o -name \"*.yml\" \\) 2>/dev/null | head -30\n else\n echo \"(no .archon/workflows/ directory)\"\n fi\n\n echo \"\"\n echo \"=== Package Info ===\"\n if [ -f \"package.json\" ]; then\n grep -E '\"name\"|\"scripts\"' package.json | head -10\n else\n echo \"(no package.json)\"\n fi\n\n echo \"\"\n echo \"=== Project Context (CLAUDE.md first 50 lines) ===\"\n if [ -f \"CLAUDE.md\" ]; then\n head -50 CLAUDE.md\n else\n echo \"(no CLAUDE.md)\"\n fi\n\n - id: extract-intent\n prompt: |\n You are a workflow design classifier. Given a user's description of what they want\n a workflow to do, extract structured intent.\n\n ## User's Request\n $ARGUMENTS\n\n ## Codebase Context\n $scan-codebase.output\n\n ## Instructions\n\n Analyze the user's request and the existing codebase to determine:\n 1. A kebab-case workflow name (e.g., \"lint-and-test\", \"deploy-staging\")\n 2. A description following the Archon pattern (Use when / Triggers / Does / NOT for)\n 3. Trigger phrases the router should match\n 4. A list of proposed nodes with their types and purposes\n 5. Whether this should be a simple DAG or include a loop node\n\n Be specific and concrete. Each proposed node should have a clear type\n (bash, prompt, command, or loop) and a one-line description of what it does.\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n workflow_name:\n type: string\n description:\n type: string\n trigger_phrases:\n type: string\n proposed_nodes:\n type: string\n execution_mode:\n type: string\n enum: [\"dag\", \"loop\"]\n required: [workflow_name, description, trigger_phrases, proposed_nodes, execution_mode]\n depends_on: [scan-codebase]\n\n - id: generate-yaml\n prompt: |\n You are an Archon workflow author. Generate a complete, valid workflow YAML file\n based on the structured intent provided.\n\n ## Intent\n - **Name**: $extract-intent.output.workflow_name\n - **Description**: $extract-intent.output.description\n - **Trigger Phrases**: $extract-intent.output.trigger_phrases\n - **Proposed Nodes**: $extract-intent.output.proposed_nodes\n - **Execution Mode**: $extract-intent.output.execution_mode\n\n ## Original User Request\n $ARGUMENTS\n\n ## Archon Workflow YAML Schema Reference\n\n A workflow YAML file has this structure:\n\n ```yaml\n name: workflow-name\n description: |\n Use when: ...\n Triggers: ...\n Does: ...\n NOT for: ...\n\n # Optional top-level settings:\n # provider: claude (or codex)\n # model: sonnet (or haiku, opus, etc.)\n # interactive: true (forces foreground execution in web UI)\n\n nodes:\n - id: node-id-kebab-case\n # Choose ONE of: prompt, bash, command, loop\n\n # --- prompt node (AI-executed) ---\n prompt: |\n Instructions for the AI...\n # Optional: model, allowed_tools, denied_tools, output_format, context, idle_timeout\n\n # --- bash node (shell script, no AI, stdout = $.output) ---\n bash: |\n #!/bin/bash\n set -e\n echo \"result\"\n\n # --- command node (references a .archon/commands/ file) ---\n command: command-name\n\n # --- loop node (iterative AI execution) ---\n loop:\n prompt: |\n Instructions repeated each iteration...\n until: COMPLETION_SIGNAL\n max_iterations: 10\n fresh_context: true # optional: reset context each iteration\n\n # Common options for all node types:\n depends_on: [other-node-id] # DAG edges\n when: \"$.output == 'value'\" # conditional execution\n trigger_rule: all_success # all_success | one_success | all_done\n timeout: 120000 # ms, for bash nodes\n ```\n\n ## Variable Reference\n - `$ARGUMENTS` — user's input text\n - `$ARTIFACTS_DIR` — pre-created directory for workflow artifacts\n - `$.output` — stdout from a bash node or AI response from a prompt node\n - `$.output.field` — JSON field from a node with output_format\n - `$BASE_BRANCH` — base git branch\n\n ## Rules\n 1. The `name:` field MUST match: $extract-intent.output.workflow_name\n 2. The `description:` MUST follow the \"Use when / Triggers / Does / NOT for\" pattern\n 3. Every node MUST have a unique kebab-case `id`\n 4. Use `depends_on` to define execution order\n 5. Use `bash` nodes for deterministic operations (file checks, git commands, installs)\n 6. Use `prompt` nodes for AI reasoning tasks\n 7. Use `output_format` on prompt nodes when downstream nodes need structured data\n 8. Use `allowed_tools: []` on classification/analysis nodes that don't need tools\n 9. Use `denied_tools: [Edit, Bash]` when a node should only use Write (not edit existing files)\n 10. Prefer `model: haiku` for simple classification tasks to save cost\n\n ## Output\n\n Write the complete workflow YAML to: `$ARTIFACTS_DIR/generated-workflow.yaml`\n\n Use the Write tool. Do NOT use Edit or Bash. The file must be valid YAML and follow\n all the patterns above.\n denied_tools: [Edit, Bash]\n depends_on: [extract-intent]\n\n - id: validate-yaml\n bash: |\n FILE=\"$ARTIFACTS_DIR/generated-workflow.yaml\"\n\n if [ ! -f \"$FILE\" ]; then\n echo \"ERROR: generated-workflow.yaml not found at $FILE\"\n exit 1\n fi\n\n if [ ! -s \"$FILE\" ]; then\n echo \"ERROR: generated-workflow.yaml is empty\"\n exit 1\n fi\n\n if ! grep -q \"^name:\" \"$FILE\"; then\n echo \"ERROR: missing 'name:' field\"\n exit 1\n fi\n\n if ! grep -q \"^nodes:\" \"$FILE\"; then\n echo \"ERROR: missing 'nodes:' field\"\n exit 1\n fi\n\n echo \"VALID\"\n depends_on: [generate-yaml]\n\n - id: save-or-report\n prompt: |\n You are a workflow installer. Save the generated workflow and report to the user.\n\n ## Workflow Details\n - **Name**: $extract-intent.output.workflow_name\n - **Trigger Phrases**: $extract-intent.output.trigger_phrases\n\n ## Instructions\n\n 1. Read the generated workflow from `$ARTIFACTS_DIR/generated-workflow.yaml`\n 2. Create the directory `.archon/workflows/` if it doesn't exist (use Bash: `mkdir -p .archon/workflows/`)\n 3. Save the workflow to `.archon/workflows/$extract-intent.output.workflow_name.yaml`\n Use the Write tool to write the file.\n 4. Report to the user:\n - Workflow name and file location\n - Trigger phrases that will invoke it\n - How to run it: `bun run cli workflow run $extract-intent.output.workflow_name \"your input\"`\n - How to test it: `bun run cli validate workflows $extract-intent.output.workflow_name`\n depends_on: [validate-yaml]\n", + "archon-workflow-builder": "name: archon-workflow-builder\ndescription: |\n Use when: User wants to create a new custom workflow for their project.\n Triggers: \"build me a workflow\", \"create a workflow\", \"generate a workflow\",\n \"new workflow\", \"make a workflow for\", \"workflow builder\".\n Does: Scans codebase -> extracts intent (JSON) -> generates YAML -> validates -> saves.\n NOT for: Editing existing workflows or creating non-workflow files.\n\nnodes:\n - id: scan-codebase\n bash: |\n echo \"=== Existing Commands ===\"\n if [ -d \".archon/commands\" ]; then\n find .archon/commands -type f -name \"*.md\" 2>/dev/null | head -30\n else\n echo \"(no .archon/commands/ directory)\"\n fi\n\n echo \"\"\n echo \"=== Existing Workflows ===\"\n if [ -d \".archon/workflows\" ]; then\n find .archon/workflows -type f \\( -name \"*.yaml\" -o -name \"*.yml\" \\) 2>/dev/null | head -30\n else\n echo \"(no .archon/workflows/ directory)\"\n fi\n\n echo \"\"\n echo \"=== Package Info ===\"\n if [ -f \"package.json\" ]; then\n grep -E '\"name\"|\"scripts\"' package.json | head -10\n else\n echo \"(no package.json)\"\n fi\n\n echo \"\"\n echo \"=== Project Context (CLAUDE.md first 50 lines) ===\"\n if [ -f \"CLAUDE.md\" ]; then\n head -50 CLAUDE.md\n else\n echo \"(no CLAUDE.md)\"\n fi\n\n - id: extract-intent\n prompt: |\n You are a workflow design classifier. Given a user's description of what they want\n a workflow to do, extract structured intent.\n\n ## User's Request\n $ARGUMENTS\n\n ## Codebase Context\n $scan-codebase.output\n\n ## Instructions\n\n Analyze the user's request and the existing codebase to determine:\n 1. A kebab-case workflow name (e.g., \"lint-and-test\", \"deploy-staging\")\n 2. A description following the Archon pattern (Use when / Triggers / Does / NOT for)\n 3. Trigger phrases the router should match\n 4. A list of proposed nodes with their types and purposes\n 5. Whether this should be a simple DAG or include a loop node\n\n Be specific and concrete. Each proposed node should have a clear type\n (bash, prompt, command, script, loop, or approval) and a one-line\n description of what it does.\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n workflow_name:\n type: string\n description:\n type: string\n trigger_phrases:\n type: string\n proposed_nodes:\n type: string\n execution_mode:\n type: string\n enum: [\"dag\", \"loop\"]\n required: [workflow_name, description, trigger_phrases, proposed_nodes, execution_mode]\n depends_on: [scan-codebase]\n\n - id: generate-yaml\n prompt: |\n You are an Archon workflow author. Generate a complete, valid workflow YAML file\n based on the structured intent provided.\n\n ## Intent\n - **Name**: $extract-intent.output.workflow_name\n - **Description**: $extract-intent.output.description\n - **Trigger Phrases**: $extract-intent.output.trigger_phrases\n - **Proposed Nodes**: $extract-intent.output.proposed_nodes\n - **Execution Mode**: $extract-intent.output.execution_mode\n\n ## Original User Request\n $ARGUMENTS\n\n ## Archon Workflow YAML Schema Reference\n\n A workflow YAML file has this structure:\n\n ```yaml\n name: workflow-name\n description: |\n Use when: ...\n Triggers: ...\n Does: ...\n NOT for: ...\n\n # Optional top-level settings:\n # provider: claude (or codex)\n # model: sonnet (or haiku, opus, etc.)\n # interactive: true (forces foreground execution in web UI)\n\n nodes:\n - id: node-id-kebab-case\n # Choose ONE of: prompt, bash, command, script, loop, approval\n\n # --- prompt node (AI-executed) ---\n prompt: |\n Instructions for the AI...\n # Optional: model, allowed_tools, denied_tools, output_format, context, idle_timeout\n\n # --- bash node (shell script, no AI, stdout = $.output) ---\n bash: |\n #!/bin/bash\n set -e\n echo \"result\"\n\n # --- command node (references a .archon/commands/ file) ---\n command: command-name\n\n # --- script node (TypeScript via bun, or Python via uv — no AI, stdout = $.output) ---\n # Use for deterministic data transforms the shell would mangle (JSON parsing, etc.)\n script: |\n const raw = String.raw`$other-node.output`;\n const data = JSON.parse(raw);\n console.log(JSON.stringify({ count: data.items.length }));\n runtime: bun # required: 'bun' (.ts/.js) or 'uv' (.py)\n # deps: [requests] # uv only\n # Or reference a named script in .archon/scripts/:\n # script: extract-labels # no extension; bun resolves .ts/.js, uv resolves .py\n\n # --- loop node (iterative AI execution) ---\n loop:\n prompt: |\n Instructions repeated each iteration...\n until: COMPLETION_SIGNAL\n max_iterations: 10\n fresh_context: true # optional: reset context each iteration\n\n # --- approval node (human gate — pauses workflow) ---\n approval:\n message: \"Review the plan above. Approve to continue.\"\n # capture_response: true # store reviewer comment as $.output\n\n # Common options for all node types:\n depends_on: [other-node-id] # DAG edges\n when: \"$.output == 'value'\" # conditional execution\n trigger_rule: all_success # all_success | one_success | all_done\n timeout: 120000 # ms, for bash and script nodes\n ```\n\n ## Variable Reference\n - `$ARGUMENTS` — user's input text\n - `$ARTIFACTS_DIR` — pre-created directory for workflow artifacts\n - `$.output` — stdout from a bash/script node or AI response from a prompt node\n - `$.output.field` — JSON field from a node with output_format\n - `$BASE_BRANCH` — base git branch\n\n ## Rules\n 1. The `name:` field MUST match: $extract-intent.output.workflow_name\n 2. The `description:` MUST follow the \"Use when / Triggers / Does / NOT for\" pattern\n 3. Every node MUST have a unique kebab-case `id`\n 4. Use `depends_on` to define execution order\n 5. Use `bash` nodes for deterministic shell operations (file checks, git commands, installs)\n 6. Use `script` nodes for typed data transforms (TypeScript JSON parsing, Python with deps) — stdout is captured as output, stderr is forwarded as a warning. $nodeId.output is NOT shell-quoted in script bodies — parse with JSON.parse / json.loads, not shell interpolation\n 7. Use `prompt` nodes for AI reasoning tasks\n 8. Use `approval` nodes to pause for human review at risky gates (plan→execute boundary, destructive actions)\n 9. Use `output_format` on prompt nodes when downstream nodes need structured data\n 10. Use `allowed_tools: []` on classification/analysis nodes that don't need tools\n 11. Use `denied_tools: [Edit, Bash]` when a node should only use Write (not edit existing files)\n 12. Prefer `model: haiku` for simple classification tasks to save cost\n\n ## Output\n\n Write the complete workflow YAML to: `$ARTIFACTS_DIR/generated-workflow.yaml`\n\n Use the Write tool. Do NOT use Edit or Bash. The file must be valid YAML and follow\n all the patterns above.\n denied_tools: [Edit, Bash]\n depends_on: [extract-intent]\n\n - id: validate-yaml\n bash: |\n FILE=\"$ARTIFACTS_DIR/generated-workflow.yaml\"\n\n if [ ! -f \"$FILE\" ]; then\n echo \"ERROR: generated-workflow.yaml not found at $FILE\"\n exit 1\n fi\n\n if [ ! -s \"$FILE\" ]; then\n echo \"ERROR: generated-workflow.yaml is empty\"\n exit 1\n fi\n\n if ! grep -q \"^name:\" \"$FILE\"; then\n echo \"ERROR: missing 'name:' field\"\n exit 1\n fi\n\n if ! grep -q \"^nodes:\" \"$FILE\"; then\n echo \"ERROR: missing 'nodes:' field\"\n exit 1\n fi\n\n echo \"VALID\"\n depends_on: [generate-yaml]\n\n - id: save-or-report\n prompt: |\n You are a workflow installer. Save the generated workflow and report to the user.\n\n ## Workflow Details\n - **Name**: $extract-intent.output.workflow_name\n - **Trigger Phrases**: $extract-intent.output.trigger_phrases\n\n ## Instructions\n\n 1. Read the generated workflow from `$ARTIFACTS_DIR/generated-workflow.yaml`\n 2. Create the directory `.archon/workflows/` if it doesn't exist (use Bash: `mkdir -p .archon/workflows/`)\n 3. Save the workflow to `.archon/workflows/$extract-intent.output.workflow_name.yaml`\n Use the Write tool to write the file.\n 4. Report to the user:\n - Workflow name and file location\n - Trigger phrases that will invoke it\n - How to run it: `bun run cli workflow run $extract-intent.output.workflow_name \"your input\"`\n - How to test it: `bun run cli validate workflows $extract-intent.output.workflow_name`\n depends_on: [validate-yaml]\n", }; diff --git a/packages/workflows/src/logger.test.ts b/packages/workflows/src/logger.test.ts index 24cee58524..e8182268a6 100644 --- a/packages/workflows/src/logger.test.ts +++ b/packages/workflows/src/logger.test.ts @@ -30,6 +30,7 @@ import { logValidation, logWorkflowError, logWorkflowComplete, + logNodeComplete, type WorkflowEvent, } from './logger'; @@ -209,6 +210,20 @@ Line 3`; }); }); + describe('logNodeComplete', () => { + it('should preserve provider cache token fields', async () => { + await logNodeComplete(testDir, 'token-test', 'review', 'archon-code-review-agent', { + durationMs: 123, + tokens: { input: 100, output: 25, cacheRead: 80 }, + }); + + const events = await readLogFile('token-test'); + expect(events).toHaveLength(1); + expect(events[0].type).toBe('node_complete'); + expect(events[0].tokens).toEqual({ input: 100, output: 25, cacheRead: 80 }); + }); + }); + describe('full workflow logging scenario', () => { it('should log complete workflow execution', async () => { const runId = 'full-workflow-test';