diff --git a/.claude/skills/archon/SKILL.md b/.claude/skills/archon/SKILL.md
index f36e7391b8..1995248174 100644
--- a/.claude/skills/archon/SKILL.md
+++ b/.claude/skills/archon/SKILL.md
@@ -37,17 +37,60 @@ Determine the user's intent and dispatch to the appropriate guide:
 | **Config / settings** | Read `guides/config.md` — interactive config editor |
 | **Initialize .archon/ in a repo** | Read `references/repo-init.md` |
 | **Create a workflow** | Read `references/workflow-dag.md` — the complete workflow authoring guide |
+| **Quick parameter lookup — which field works on which node type** | Read `references/parameter-matrix.md` — master matrix, intent-based lookup, silent-failure catalog |
 | **Advanced features (hooks/MCP/skills)** | Read `references/dag-advanced.md` |
 | **Create a command file** | Read `references/authoring-commands.md` |
 | **Variable substitution reference** | Read `references/variables.md` |
 | **CLI command reference** | Read `references/cli-commands.md` |
 | **Run an interactive workflow** | Read `references/interactive-workflows.md` — transparent relay protocol |
+| **Workflow good practices / anti-patterns** | Read `references/good-practices.md` — read before designing a non-trivial workflow |
+| **Troubleshoot a failing / stuck workflow** | Read `references/troubleshooting.md` — log locations, common failure modes |
 | **Run a workflow (default)** | Continue with "Running Workflows" below |
 
 If the intent is ambiguous, ask the user to clarify.
 
 ---
 
+## Richer Context: [archon.diy](https://archon.diy)
+
+The references in this skill are a distilled subset. The full, canonical docs live at **[archon.diy](https://archon.diy)** (Starlight site from `packages/docs-web/`). If the skill's reference pages don't cover what you need — an edge case, a worked example, a diagram, a deeper section on a feature — fetch the matching page from archon.diy.
+
+### When to reach for the live docs
+
+- You need an end-to-end example that's longer than what the skill shows (e.g. full patterns for hooks, MCP config, sandbox schema, approval flows)
+- You're explaining a concept to the user and want the most readable framing (the `book/` series is written as a tutorial, not a reference)
+- You hit a feature the skill only mentions in passing (e.g. `agents:` inline sub-agents, advanced Codex options, the full SyncHookJSONOutput schema)
+- The user asks "where is this documented?" — point them at the archon.diy URL, not a skill file path
+
+### URL map
+
+| Topic | URL |
+|-------|-----|
+| Landing + install | [archon.diy](https://archon.diy) |
+| Getting started (installation, quick start, concepts) | [archon.diy/getting-started/](https://archon.diy/getting-started/overview/) |
+| The book (tutorial-style walkthrough) | [archon.diy/book/](https://archon.diy/book/) |
+| Workflow authoring guide | [archon.diy/guides/authoring-workflows/](https://archon.diy/guides/authoring-workflows/) |
+| Command authoring guide | [archon.diy/guides/authoring-commands/](https://archon.diy/guides/authoring-commands/) |
+| Node type guides | [archon.diy/guides/loop-nodes/](https://archon.diy/guides/loop-nodes/), [/approval-nodes/](https://archon.diy/guides/approval-nodes/), [/script-nodes/](https://archon.diy/guides/script-nodes/) |
+| Per-node features (Claude only) | [/hooks/](https://archon.diy/guides/hooks/), [/mcp-servers/](https://archon.diy/guides/mcp-servers/), [/skills/](https://archon.diy/guides/skills/) |
+| Global workflows/commands/scripts | [archon.diy/guides/global-workflows/](https://archon.diy/guides/global-workflows/) |
+| Variables reference | [archon.diy/reference/variables/](https://archon.diy/reference/variables/) |
+| CLI reference | [archon.diy/reference/cli/](https://archon.diy/reference/cli/) |
+| Security model (env, sandbox, target-repo `.env` stripping) | [archon.diy/reference/security/](https://archon.diy/reference/security/) |
+| Architecture | [archon.diy/reference/architecture/](https://archon.diy/reference/architecture/) |
+| Configuration (`.archon/config.yaml` full schema) | [archon.diy/reference/configuration/](https://archon.diy/reference/configuration/) |
+| Troubleshooting | [archon.diy/reference/troubleshooting/](https://archon.diy/reference/troubleshooting/) |
+| Adapter setup (Slack/Telegram/GitHub/Web/Discord/Gitea/GitLab) | [archon.diy/adapters/](https://archon.diy/adapters/) |
+| Deployment (Docker, cloud, Windows) | [archon.diy/deployment/](https://archon.diy/deployment/) |
+
+URL shape is `archon.diy/<section>/<page>/` — the paths mirror the filenames under `packages/docs-web/src/content/docs/`.
+
+### Precedence
+
+This skill's reference pages are the primary source for routine workflow authoring, CLI use, and setup. Reach for archon.diy when the skill is incomplete for your case — don't go to the live docs first by default (skill refs load into context faster and are tuned for agents).
+
+---
+
 ## Running Workflows
 
 ### Core Command
@@ -188,6 +231,29 @@ Each node has exactly ONE of: `command`, `prompt`, `bash`, or `loop`.
     until_bash: "bun run test"    # Optional: exit 0 = done
 ```
 
+**Approval node** — pauses the workflow for human review. Requires `interactive: true` at the workflow level for Web UI delivery:
+```yaml
+interactive: true   # workflow level — required for web UI
+
+nodes:
+  - id: review-gate
+    approval:
+      message: "Review the plan above before proceeding."
+      capture_response: true      # Optional: user's comment → $review-gate.output
+      on_reject:                  # Optional: AI rework on rejection instead of cancel
+        prompt: "Revise based on feedback: $REJECTION_REASON"
+        max_attempts: 3           # Range 1-10, default 3
+    depends_on: [plan]
+```
+
+**Cancel node** — terminates the workflow with a reason. Typically gated with `when:`:
+```yaml
+- id: stop-if-unsafe
+  cancel: "Refusing to proceed: input flagged UNSAFE."
+  depends_on: [classify]
+  when: "$classify.output != 'SAFE'"
+```
+
 For the full authoring guide with all fields, conditions, trigger rules, and patterns: Read `references/workflow-dag.md`
 
 ### Creating a Command File
diff --git a/.claude/skills/archon/references/authoring-commands.md b/.claude/skills/archon/references/authoring-commands.md
index 0b1240da6b..603dd3e4a3 100644
--- a/.claude/skills/archon/references/authoring-commands.md
+++ b/.claude/skills/archon/references/authoring-commands.md
@@ -4,14 +4,29 @@ Commands are plain Markdown files containing AI prompt templates. They are the a
 
 ## File Location
 
+Commands are discovered from three scopes, highest-precedence first:
+
 ```
-.archon/commands/
-├── my-command.md           # Custom command
-├── review-code.md          # Another custom command
-└── defaults/               # Optional: override bundled defaults
-    └── archon-assist.md    # Overrides the bundled archon-assist
+<repoRoot>/.archon/commands/     # 1. Repo-scoped (wins)
+├── my-command.md                #    Custom command for this repo
+├── archon-assist.md             #    Overrides the bundled archon-assist
+└── triage/                      #    Subfolders allowed, 1 level deep
+    └── review.md                #    Resolves as 'review', not 'triage/review'
+
+~/.archon/commands/              # 2. Home-scoped (user-level, shared across all repos)
+├── review-checklist.md          #    Personal helper available in every repo
+└── pr-style-guide.md
+
+<bundled defaults>                # 3. Shipped with Archon (archon-assist, etc.)
 ```
 
+**Resolution rules:**
+
+- Filename-without-extension is the command name (e.g. `my-command.md` → `my-command`).
+- 1-level subfolders are supported for grouping; resolution is still by filename (`triage/review.md` → `review`).
+- Repo scope overrides home scope overrides bundled, by name.
+- Duplicate basenames **within a scope** (e.g. two different `review.md` files in `triage/` and `security/`) are a user error — keep names unique within each scope.
+
 Commands are referenced by name (without `.md`) in workflow YAML files.
 
 ## File Format
@@ -78,11 +93,14 @@ Command names must:
 ## Discovery and Priority
 
 When a workflow references `command: my-command`, Archon searches in this order:
-1. `.archon/commands/my-command.md` (repo custom)
-2. `.archon/commands/defaults/my-command.md` (repo default overrides)
+
+1. `<repoRoot>/.archon/commands/my-command.md` (repo scope)
+2. `~/.archon/commands/my-command.md` (home scope — shared across every repo on the machine)
 3. Bundled defaults (shipped with Archon)
 
-First match wins. To override a bundled command, create a file with the same name in your repo.
+First match wins. To override a bundled command, drop a file with the same name at either scope. To override a home-scoped command for a specific repo, drop a file with the same name in that repo's `.archon/commands/`.
+
+> **Web UI note**: Home-scoped commands appear in the workflow builder's node palette under a dedicated "Global (~/.archon/commands/)" section, distinct from project and bundled entries.
 
 ## Referencing Commands from Workflows
 
diff --git a/.claude/skills/archon/references/cli-commands.md b/.claude/skills/archon/references/cli-commands.md
index 157eacb713..0cc1a0ee06 100644
--- a/.claude/skills/archon/references/cli-commands.md
+++ b/.claude/skills/archon/references/cli-commands.md
@@ -32,7 +32,7 @@ archon workflow run archon-fix-github-issue --resume
 | `--branch <name>` / `-b` | Branch name for worktree. Reuses existing worktree if healthy |
 | `--from <name>` / `--from-branch <name>` | Start-point branch for new worktree (default: repo default branch) |
 | `--no-worktree` | Skip isolation — run in the live checkout |
-| `--resume` | Resume the last failed run of this workflow (skips completed steps/nodes) |
+| `--resume` | Resume the last failed run of this workflow at this cwd (skips completed nodes) |
 | `--cwd <path>` | Working directory override |
 
 **Flag conflicts** (errors):
@@ -42,6 +42,87 @@ archon workflow run archon-fix-github-issue --resume
 
 **Default behavior** (no flags): Auto-creates a worktree with branch name `{workflow-name}-{timestamp}`.
 
+**Auto-resume without `--resume`**: If a prior invocation of the same workflow at the same cwd failed, the next invocation automatically skips completed nodes. `--resume` is only needed when you want to force resume a specific failed run or to reuse the worktree from that run.
+
+### `archon workflow status`
+
+Show the currently running workflow (if any) with its run ID, state, and last activity.
+
+```bash
+archon workflow status
+archon workflow status --json       # Machine-readable output
+```
+
+### `archon workflow approve <run-id> [comment]`
+
+Approve a paused approval-node workflow. Auto-resumes the workflow.
+
+```bash
+archon workflow approve abc123
+archon workflow approve abc123 --comment "Plan looks good"
+archon workflow approve abc123 "Plan looks good"   # positional form
+```
+
+For interactive loop nodes, the comment becomes `$LOOP_USER_INPUT` on the next iteration. For approval nodes with `capture_response: true`, the comment becomes `$<gate-id>.output` for downstream nodes.
+
+### `archon workflow reject <run-id> [reason]`
+
+Reject a paused approval gate. Without `on_reject` on the node, cancels the workflow. With `on_reject`, runs the rework prompt with `$REJECTION_REASON` substituted and re-pauses.
+
+```bash
+archon workflow reject abc123
+archon workflow reject abc123 --reason "Plan misses test coverage"
+archon workflow reject abc123 "Plan misses test coverage"
+```
+
+### `archon workflow abandon <run-id>`
+
+Mark a non-terminal workflow run as cancelled. Use when a `running` row is stuck after a server crash or when you want to discard a paused run without rejecting. This does NOT kill an in-flight subprocess — it only transitions the DB row.
+
+```bash
+archon workflow abandon abc123
+```
+
+> **There is no `archon workflow cancel` CLI subcommand.** To actively cancel a running workflow (terminate its subprocess), use the chat slash command `/workflow cancel <run-id>` on the platform that started it (Web UI, Slack, Telegram, etc.), or the Cancel button on the Web UI dashboard. The CLI only offers `abandon`, which is the right tool for orphan cleanup but does not interrupt a live subprocess.
+
+### `archon workflow resume <run-id> [message]`
+
+Explicitly re-run a failed run. Most workflows auto-resume without this — use it when you want to force a specific run ID.
+
+```bash
+archon workflow resume abc123
+archon workflow resume abc123 "continue with the plan"
+```
+
+### `archon workflow cleanup [days]`
+
+**Deletes** old terminal workflow runs (`completed`/`failed`/`cancelled`) from the database for disk hygiene. Does NOT transition `running` rows — use `abandon`/`cancel` for those.
+
+```bash
+archon workflow cleanup             # Default: 7 days
+archon workflow cleanup 30          # Custom: 30 days
+```
+
+### `archon workflow event emit --run-id <uuid> --type <event-type> [--data <json>]`
+
+Emit a workflow event to a running workflow. Used inside loop prompts to signal state (e.g. "checkpoint written") for observability. Rarely invoked from the shell directly.
+
+```bash
+archon workflow event emit --run-id abc123 --type checkpoint --data '{"step":"plan"}'
+```
+
+### `archon continue <branch> [flags] [message]`
+
+Continue work on a branch with prior context. Defaults to `archon-assist`; use `--workflow` to pick a different workflow. Useful for iterative sessions on the same worktree without typing the full `workflow run` incantation.
+
+```bash
+archon continue feat/auth "Add password reset"
+archon continue feat/auth --workflow archon-feature-development "Continue from step 3"
+archon continue feat/auth --no-context "Start fresh without loading prior artifacts"
+```
+
+Flags: `--workflow <name>`, `--no-context`.
+
 ## Isolation Commands
 
 ### `archon isolation list`
@@ -59,11 +140,20 @@ Outputs: branch name, path, workflow type, platform, last activity age. Ghost en
 Remove stale worktree environments.
 
 ```bash
-archon isolation cleanup          # Default: 7 days
-archon isolation cleanup 14       # Custom: 14 days
-archon isolation cleanup --merged # Remove branches merged into main (+ remote branches)
+archon isolation cleanup                             # Default: 7 days
+archon isolation cleanup 14                          # Custom: 14 days
+archon isolation cleanup --merged                    # Also remove worktrees whose branches merged into main (deletes remote branches too)
+archon isolation cleanup --merged --include-closed   # Also remove worktrees whose PRs were closed without merging
 ```
 
+**Flags:**
+
+| Flag | Description |
+|------|-------------|
+| `[days]` | Positional — age threshold in days. Environments untouched for longer than this are removed. Default: 7 |
+| `--merged` | Union of three signals — ancestry (`git branch --merged`), patch equivalence (`git cherry`), and PR state (`gh`) — safely catches squash-merges |
+| `--include-closed` | With `--merged`, also remove worktrees whose PRs were closed (abandoned, not merged) |
+
 ## Validate Commands
 
 ### `archon validate workflows [name]`
diff --git a/.claude/skills/archon/references/good-practices.md b/.claude/skills/archon/references/good-practices.md
new file mode 100644
index 0000000000..e731a2583d
--- /dev/null
+++ b/.claude/skills/archon/references/good-practices.md
@@ -0,0 +1,241 @@
+# Workflow Good Practices and Anti-Patterns
+
+Guidance for authoring workflows that survive first contact with a real codebase. Written for an agent or human writing their first non-trivial workflow.
+
+## Good Practices
+
+### 1. Use deterministic nodes for deterministic work
+
+AI nodes are expensive, non-reproducible, and can hallucinate. Use `bash:` or `script:` for anything that has a right answer a computer can produce.
+
+- **Run tests** with `bash: "bun run test"`, not `prompt: "run the tests and tell me if they passed"`.
+- **Parse JSON** with `script:` (bun/uv), not a `prompt:` that re-derives structure from free text.
+- **Read files with known paths** via `bash: "cat path/to/file"` or `Read` in an AI node where the agent actually needs to reason about the content.
+- **Git state checks** (current branch, uncommitted changes, merge-base) → `bash:`.
+
+### 2. Use `output_format` for every node whose output downstream `when:` reads
+
+`when:` conditions do best-effort JSON parsing on `$nodeId.output` for `.field` access. If the upstream node doesn't enforce a shape, you're pattern-matching free-form AI text — fragile.
+
+```yaml
+# GOOD
+- id: classify
+  prompt: "Classify as BUG or FEATURE"
+  output_format:                          # enforces the JSON shape
+    type: object
+    properties:
+      type: { type: string, enum: [BUG, FEATURE] }
+    required: [type]
+
+- id: investigate
+  command: investigate-bug
+  depends_on: [classify]
+  when: "$classify.output.type == 'BUG'"  # safe field access
+
+# BAD
+- id: classify
+  prompt: "Is this a bug or a feature?"
+  # no output_format; AI might reply "it looks like a bug", "BUG", or "This is a bug.\n\n..."
+
+- id: investigate
+  command: investigate-bug
+  depends_on: [classify]
+  when: "$classify.output == 'BUG'"       # fragile string match
+```
+
+### 3. `trigger_rule: none_failed_min_one_success` after conditional branches
+
+After `when:`-gated branches, the downstream merge node will see one or more **skipped** dependencies. Skipped ≠ success. Default `all_success` fails.
+
+```yaml
+- id: investigate
+  command: investigate-bug
+  depends_on: [classify]
+  when: "$classify.output.type == 'BUG'"
+
+- id: plan
+  command: plan-feature
+  depends_on: [classify]
+  when: "$classify.output.type == 'FEATURE'"
+
+- id: implement
+  command: implement
+  depends_on: [investigate, plan]
+  trigger_rule: none_failed_min_one_success   # CORRECT — exactly one ran
+  # trigger_rule: all_success               ← would fail here (one dep skipped)
+```
+
+Use `one_success` when any dep succeeding is enough; `none_failed_min_one_success` when no dep should have failed AND at least one must have succeeded; `all_done` for "run cleanup regardless" patterns with `cancel:` or notification nodes.
+
+### 4. `context: fresh` requires artifacts for state passing
+
+A node with `context: fresh` starts with no memory of prior nodes in the same workflow. The only way state moves is via files. Default is `fresh` for parallel layers and `shared` for sequential — explicit `context: fresh` is common when you want cost isolation.
+
+```yaml
+- id: investigate
+  command: investigate-bug
+  # Investigator WRITES to $ARTIFACTS_DIR/investigation.md
+
+- id: implement
+  command: implement-fix
+  depends_on: [investigate]
+  context: fresh
+  # Implementer MUST read $ARTIFACTS_DIR/investigation.md — it has no memory
+  # of what the investigator found.
+```
+
+Command files should lead with "read artifacts from `$ARTIFACTS_DIR/...`" when they're downstream of a fresh node. This is the single biggest quality lever on multi-node workflows.
+
+### 5. Cheap models for glue, strong models for substance
+
+Classification, routing, formatting, and short summaries don't need Opus. Use `model: haiku` for these and reserve `sonnet`/`opus` for the nodes that actually produce code or long-form analysis. Combined with `allowed_tools: []` on pure-text nodes, this cuts cost dramatically.
+
+```yaml
+- id: classify
+  prompt: "Classify this issue"
+  model: haiku              # fast + cheap
+  allowed_tools: []         # no tool overhead
+  output_format: { ... }
+
+- id: implement
+  command: implement-fix
+  model: sonnet             # where the thinking happens
+```
+
+### 6. Write the workflow description for routing
+
+Archon's orchestrator routes user intent to workflows by description. Write descriptions that make routing obvious.
+
+- Start with the imperative action: "Fix a GitHub issue end-to-end", "Generate a Remotion video composition".
+- Mention triggers: "Use when the user asks to review a PR", "Use when there's a failing test run".
+- Mention what it does NOT do: "Does not create a PR — use `archon-plan-to-pr` for that".
+
+### 7. Validate before shipping
+
+Never declare a workflow "done" without:
+
+```bash
+archon validate workflows <name>     # YAML + DAG structure + resource refs
+```
+
+This checks: YAML syntax, node ID uniqueness, no cycles, all `depends_on` exist, all `$nodeId.output` refs point to known nodes, all `command:` files exist, all `mcp:` configs parse, all `skills:` directories exist, provider/model compatibility, named script existence, runtime availability. Fix everything it reports before first run.
+
+For brand-new workflows, also:
+1. Run once against a trivial input (`archon workflow run my-workflow --branch test/sanity "hello"`)
+2. Check the run log at `~/.archon/workspaces/<owner>/<repo>/logs/<run-id>.jsonl`
+3. Check artifacts at `~/.archon/workspaces/<owner>/<repo>/artifacts/runs/<run-id>/`
+
+See `references/troubleshooting.md` for how to read those.
+
+### 8. Design the artifact chain before writing command files
+
+In a multi-node workflow, each node's artifact IS the specification for the next node. Before writing any command body, map out:
+
+| Node | Reads | Writes |
+|------|-------|--------|
+| `investigate-issue` | GitHub issue via `gh` | `$ARTIFACTS_DIR/issues/issue-{n}.md` |
+| `implement-issue` | Artifact from `investigate-issue` | Code files, tests |
+| `create-pr` | Git diff | GitHub PR, `$ARTIFACTS_DIR/pr-body.md` |
+
+If a downstream agent can't execute from just its artifact, the artifact is incomplete. This is the single most common failure mode in multi-node workflows.
+
+### 9. Keep workflows reversible
+
+Use `worktree.enabled: true` at the workflow level for anything that modifies the codebase. The CLI `--no-worktree` flag will hard-error, forcing users into isolation. The cost is a one-time cp of the worktree; the benefit is never having a failed workflow corrupt a live checkout.
+
+For read-only workflows (triage, reporting, code analysis), pin `worktree.enabled: false` instead — saves the worktree setup cost.
+
+---
+
+## Anti-Patterns
+
+### ❌ Asking AI to run deterministic checks
+
+```yaml
+# BAD
+- id: test
+  prompt: "Run bun run test and tell me if it passed"
+
+# GOOD
+- id: test
+  bash: "bun run test 2>&1"
+
+- id: react-to-tests
+  prompt: "Fix any failures: $test.output"
+  depends_on: [test]
+  trigger_rule: all_done            # run even if tests failed
+```
+
+### ❌ Pattern-matching free-form AI output in `when:`
+
+```yaml
+# BAD — brittle
+- id: decide
+  prompt: "Should we proceed? Answer yes or no."
+- id: do-thing
+  depends_on: [decide]
+  when: "$decide.output == 'yes'"    # AI says "Yes!" or "Yes, because..." — no match
+
+# GOOD
+- id: decide
+  prompt: "Should we proceed?"
+  output_format:
+    type: object
+    properties: { proceed: { type: boolean } }
+    required: [proceed]
+- id: do-thing
+  depends_on: [decide]
+  when: "$decide.output.proceed == 'true'"
+```
+
+### ❌ Commands that assume prior-node memory in a `context: fresh` chain
+
+```markdown
+<!-- BAD — implement.md -->
+Fix the bug we discussed in the investigation phase.
+
+<!-- GOOD — implement.md -->
+Read the investigation at `$ARTIFACTS_DIR/issues/issue-{n}.md`.
+Extract the root cause, affected files, and implementation plan.
+Implement the changes exactly as specified in the plan.
+```
+
+### ❌ Long flat layers of AI nodes
+
+Ten sibling `prompt:` nodes in one layer all depending on one upstream is a $N/run cost bomb and a latency trap. If the work is parallel and similar, use the `agents:` inline sub-agent map-reduce pattern with a cheap model per item and a single stronger reducer. See `references/dag-advanced.md` and the [Inline sub-agents section on archon.diy](https://archon.diy/guides/authoring-workflows/#inline-sub-agents) for a worked example.
+
+### ❌ Hardcoding secrets in YAML or MCP configs
+
+Use `$ENV_VAR` expansion in MCP configs and the `env:` block in `.archon/config.yaml` (or Web UI Settings → Projects → Env Vars). See `references/repo-init.md` §Per-Project Env Injection.
+
+### ❌ `retry` on a loop node
+
+Loop nodes manage their own iteration via `max_iterations`. Setting `retry:` on a loop is a **hard parse error** — the workflow fails to load. If a loop iteration is flaky, handle it inside the loop prompt (the AI can retry tool calls) or use `until_bash` to gate completion on a deterministic check.
+
+### ❌ Tiny `max_iterations` on open-ended loops
+
+A loop with `max_iterations: 3` that's supposed to implement N stories from a PRD will silently stop after 3 iterations and leave the work half-done. Think about the worst case — multi-story PRDs need 10–20, fix-iterate cycles need 5–8, refinement loops need 3–5.
+
+### ❌ Missing `interactive: true` at workflow level for approval/loop gates on web
+
+Web UI dispatches non-interactive workflows to a background worker that cannot deliver chat messages. Approval-gate messages and loop `gate_message` prompts will never reach the user. If the workflow has `approval:` nodes OR `loop.interactive: true`, set workflow-level `interactive: true`.
+
+### ❌ Tool-restricted nodes without the MCP wildcard
+
+```yaml
+# BAD — no tools available, including MCP
+- id: analyze
+  prompt: "Use the Postgres MCP to query users"
+  mcp: .archon/mcp/postgres.json
+  allowed_tools: []          # OOPS — disables EVERYTHING, including MCP tools
+
+# FIXED — Archon auto-adds mcp__<server>__* wildcards when mcp: is set,
+# so this actually works out of the box. The anti-pattern is forgetting
+# and manually adding Read/Write/Bash/etc. when you only want MCP.
+- id: analyze
+  prompt: "Use Postgres MCP to query users"
+  mcp: .archon/mcp/postgres.json
+  allowed_tools: []          # correct — MCP tools auto-attached
+```
+
+Caveat: this only helps Claude. Codex gets MCP config from `~/.codex/config.toml` globally, not per-node.
diff --git a/.claude/skills/archon/references/interactive-workflows.md b/.claude/skills/archon/references/interactive-workflows.md
index 243cfdb7b0..856d50afd1 100644
--- a/.claude/skills/archon/references/interactive-workflows.md
+++ b/.claude/skills/archon/references/interactive-workflows.md
@@ -103,4 +103,4 @@ archon workflow reject <run-id> "reason for rejection"
 
 - **Workflow shows `running` for a long time**: The AI is doing research/implementation. Be patient — check again in a few minutes.
 - **Log file not found**: The log is at `~/.archon/workspaces/<owner>/<repo>/logs/<run-id>.jsonl`
-- **User wants to cancel**: Run `archon workflow reject <run-id>` or `archon workflow cancel <run-id>`
+- **User wants to cancel**: Run `archon workflow reject <run-id>` to stop at an approval gate, or `archon workflow abandon <run-id>` to mark the run cancelled without killing any subprocess. To actively terminate a still-live subprocess, use the chat slash command `/workflow cancel <run-id>` on the platform that started it — there is no `archon workflow cancel` CLI subcommand
diff --git a/.claude/skills/archon/references/parameter-matrix.md b/.claude/skills/archon/references/parameter-matrix.md
new file mode 100644
index 0000000000..2e2a4bbb15
--- /dev/null
+++ b/.claude/skills/archon/references/parameter-matrix.md
@@ -0,0 +1,192 @@
+# Parameter Matrix (Quick Reference)
+
+One-page lookup for Archon workflow parameters: which field works on which node type, how to pick the right parameter for a given intent, and the gotchas that don't fail loudly.
+
+This is a **lookup reference**. For the full explanation of any field, follow the cross-references at the bottom to the detailed guides.
+
+## Master Matrix: Parameters × Node Types
+
+There are seven node types. Exactly one of `command`, `prompt`, `bash`, `script`, `loop`, `approval`, or `cancel` must appear per node.
+
+| Parameter                                    | command | prompt  | bash    | script  | loop                         | approval       | cancel  |
+| -------------------------------------------- | :-----: | :-----: | :-----: | :-----: | :--------------------------: | :------------: | :-----: |
+| `id`                                         | yes     | yes     | yes     | yes     | yes                          | yes            | yes     |
+| `depends_on`                                 | yes     | yes     | yes     | yes     | yes                          | yes            | yes     |
+| `when`                                       | yes     | yes     | yes     | yes     | yes                          | yes            | yes     |
+| `trigger_rule`                               | yes     | yes     | yes     | yes     | yes                          | yes            | yes     |
+| `idle_timeout`                               | yes     | yes     | ignored (use `timeout`) | ignored (use `timeout`) | yes (per-iter) | yes | yes |
+| `timeout` (total, not idle)                  | —       | —       | yes     | yes     | —                            | —              | —       |
+| `model` / `provider`                         | yes     | yes     | ignored | ignored | **ignored at runtime**       | ignored        | ignored |
+| `context: fresh` \| `shared`                 | yes     | yes     | ignored | ignored | ignored (use `loop.fresh_context`) | ignored  | ignored |
+| `output_format`                              | yes     | yes     | ignored | ignored | ignored                      | ignored        | ignored |
+| `allowed_tools` / `denied_tools`             | yes     | yes     | ignored | ignored | ignored                      | ignored        | ignored |
+| `hooks`                                      | yes     | yes     | ignored | ignored | ignored                      | ignored        | ignored |
+| `mcp`                                        | yes     | yes     | ignored | ignored | ignored                      | ignored        | ignored |
+| `skills`                                     | yes     | yes     | ignored | ignored | ignored                      | ignored        | ignored |
+| `agents`                                     | yes     | yes     | ignored | ignored | ignored                      | ignored        | ignored |
+| `retry`                                      | yes     | yes     | yes     | yes     | **hard error**               | yes (`on_reject`) | yes  |
+| `effort` / `thinking` / `fallbackModel` / `betas` / `sandbox` / `maxBudgetUsd` / `systemPrompt` | yes | yes | ignored | ignored | ignored | ignored | ignored |
+| `bash` / `script` / `runtime` / `deps`       | —       | —       | `bash` required | `script` + `runtime` required | —            | —              | —       |
+| `loop` (nested config)                       | —       | —       | —       | —       | **required**                 | —              | —       |
+| `approval` (nested config)                   | —       | —       | —       | —       | —                            | **required**   | —       |
+| `cancel` (reason string)                     | —       | —       | —       | —       | —                            | —              | **required** |
+
+**Reading the matrix:**
+- **yes** — field works as expected on this node type.
+- **ignored** — field is accepted by the parser but has no effect at runtime. Loader emits a warning (`<node-type>_node_ai_fields_ignored`).
+- **hard error** — workflow fails to load. Only `retry` on a loop node does this.
+
+Most AI features work on `command` and `prompt` nodes. Loop nodes are thin controllers — the AI fields inside `loop.prompt` are what actually run. `bash` and `script` nodes silently ignore AI fields. `approval` and `cancel` nodes don't invoke AI at all.
+
+## Parameter Selection by Intent
+
+Organized by what you're trying to do, not by field name. Useful when you know the outcome you want but aren't sure which parameter gets you there.
+
+| You want to...                                   | Use                                                          |
+| ------------------------------------------------ | ------------------------------------------------------------ |
+| Control cost per node                            | `model: haiku`, `maxBudgetUsd: 0.50`, `effort: low`          |
+| Force pure reasoning (no tools)                  | `allowed_tools: []`                                          |
+| Read-only analysis phase                         | `denied_tools: [Write, Edit, Bash]`                          |
+| Route based on upstream output                   | Upstream `output_format: {...}` + downstream `when:`         |
+| Join after mutually-exclusive routes             | `trigger_rule: none_failed_min_one_success` or `one_success` |
+| Run two independent branches in parallel         | Two nodes with no shared `depends_on`                        |
+| Iterate until tests pass                         | `loop: {until_bash: "bun run test", max_iterations: N}`      |
+| Iterate through a backlog without memory bleed   | `loop: {fresh_context: true}`, state written to `$ARTIFACTS_DIR` |
+| Iterate with human feedback between iterations   | `loop: {interactive: true, gate_message: "..."}` + workflow `interactive: true` |
+| Single human approval gate                       | `approval:` node with `on_reject: {prompt, max_attempts}`    |
+| Fail fast if upstream output is wrong            | `cancel:` node with `when:`                                  |
+| Enforce a rule on every file edit                | `hooks.PostToolUse` with `matcher: "Write\|Edit"`            |
+| Deny dangerous commands                          | `hooks.PreToolUse` with `permissionDecision: deny`           |
+| Give a node domain knowledge                     | `skills: [skill-name]`                                       |
+| Give a node external tools                       | `mcp: .archon/mcp/server.json`                               |
+| Retry flaky API calls                            | `retry: {max_attempts: 3, delay_ms: 2000}`                   |
+| Run Python in a node                             | `script:` node with `runtime: uv`, `deps: [...]`             |
+| Run TypeScript in a node                         | `script:` node with `runtime: bun`                           |
+| Mix providers in one workflow                    | Workflow-level `provider: claude`, per-node `provider: codex` |
+| Use a non-default model for one node             | Node-level `model:` override                                 |
+| Run on a 1M context window                       | `model: opus[1m]` + `betas: ['context-1m-2025-08-07']`       |
+| Increase per-iteration timeout on a long loop    | `idle_timeout: 600000` on the loop node                      |
+| Pass large artifacts between nodes               | Write to `$ARTIFACTS_DIR/...`, read in downstream node       |
+| Pass small structured data                       | `output_format` + `$nodeId.output.field` access              |
+| Block workflow on an external condition          | `bash:` polling loop or `approval:` node                     |
+| Spawn parallel sub-tasks inside one node         | Inline `agents:` map (see below)                             |
+| Force isolation regardless of CLI flags          | Workflow-level `worktree: {enabled: true}`                   |
+| Force live checkout for read-only workflows      | Workflow-level `worktree: {enabled: false}`                  |
+
+## Silent Failures (what gets ignored without erroring)
+
+Things that don't fail parsing but don't do what you'd expect:
+
+1. **`model` / `provider` on a loop node** → silently ignored. Logged as `loop_node_ai_fields_ignored`. The loop is a controller; set model at workflow level or inside the loop prompt body.
+2. **`hooks` / `mcp` / `skills` / `output_format` / `allowed_tools` / `denied_tools` on a loop, bash, script, approval, or cancel node** → silently ignored.
+3. **`context: fresh` on a loop** → ignored. Use `loop.fresh_context: true` instead.
+4. **`output_format` on a bash or script node** → schema is accepted but bash/script output is whatever stdout says; no JSON coercion.
+5. **Unknown `$nodeId.output` reference** → resolves to empty string + warning; does not fail the workflow.
+6. **Invalid `when:` expression** → node silently skipped (fail-closed).
+7. **`allowed_tools` / `denied_tools` on Codex nodes** → ignored. Use Codex CLI config (`~/.codex/config.toml`).
+8. **`hooks` on Codex nodes** → ignored + warning logged.
+9. **`mcp` or `skills` per-node on Codex** → ignored. Configure globally in `~/.codex/config.toml` or `~/.agents/skills/`.
+10. **`trigger_rule: all_success` after `when:`-gated fan-out** → branches that didn't run count as "not succeeded"; the join node will never fire. Use `none_failed_min_one_success` or `one_success`.
+11. **Node-level `interactive: true` on an approval node or loop, without workflow-level `interactive: true`** → on the Web UI, gate messages never reach the user. The workflow dispatches to a background worker that can't deliver chat messages.
+12. **Missing env var in MCP config** → warning logged, node continues with empty string substitution.
+13. **`retry` on a loop node** → this one is a **hard parse error** (not silent). Use the loop's own `max_iterations` and `until_bash` for finish-line detection.
+
+The pattern across these: if you set an AI feature on a non-AI node, it's silently ignored. Watch loader logs for `_ignored` warnings when debugging.
+
+## Inline `agents:` (Task-tool sub-agents)
+
+A node can define named sub-agents that Claude invokes via the `Task` tool. Useful for map-reduce patterns: one node spawns N parallel sub-tasks with a cheap model, then a reducer summarizes.
+
+```yaml
+- id: analysis
+  prompt: |
+    For each area of the codebase, delegate to the appropriate sub-agent
+    via the Task tool. Summarize all findings into a single report.
+  agents:
+    security-scanner:                     # kebab-case id
+      description: "Scan for common web vulnerabilities"
+      prompt: "Run OWASP top-10 style checks on the given files"
+      model: haiku
+      tools: [Read, Grep, Glob]           # tool whitelist for this sub-agent
+      disallowedTools: [Write, Edit, Bash]
+      maxTurns: 5
+    test-coverage-auditor:
+      description: "Report untested or weakly-tested surfaces"
+      prompt: "Identify code paths without corresponding tests"
+      model: haiku
+      tools: [Read, Grep, Glob]
+      skills: [test-coverage-patterns]    # skill injection per sub-agent
+      maxTurns: 5
+```
+
+**Fields per agent:**
+
+| Field              | Required | Description                                               |
+| ------------------ | :------: | --------------------------------------------------------- |
+| `description`      | yes      | Shown when Claude decides which agent to delegate to      |
+| `prompt`           | yes      | System prompt the sub-agent runs under                    |
+| `model`            | no       | Per-agent model override                                  |
+| `tools`            | no       | Tool whitelist for the sub-agent                          |
+| `disallowedTools`  | no       | Tool blacklist                                            |
+| `skills`           | no       | Skills to inject into the sub-agent                       |
+| `maxTurns`         | no       | Max conversation turns for the sub-agent                  |
+
+**Naming rule:** lowercase kebab-case. No leading or trailing hyphens, no double hyphens, no digits-only ids.
+
+**When to use `agents:` vs fan-out at the workflow level:**
+- Use `agents:` when the number of sub-tasks is dynamic or decided by the orchestrator node at runtime.
+- Use workflow-level fan-out (parallel nodes with `depends_on: [setup]`) when the sub-tasks are known ahead of time and each needs its own artifact.
+
+See [archon.diy/guides/authoring-workflows/#inline-sub-agents](https://archon.diy/guides/authoring-workflows/#inline-sub-agents) for a worked end-to-end example.
+
+## Cross-References to Detailed Guides
+
+Use this matrix to find the right parameter. Use these references for the full explanation of how it works.
+
+| Topic                                            | Detailed reference                                                      |
+| ------------------------------------------------ | ----------------------------------------------------------------------- |
+| Workflow authoring overview, node base fields    | `workflow-dag.md`                                                       |
+| Loop nodes in depth (completion, session patterns) | `workflow-dag.md` § Loop Nodes                                         |
+| Approval / cancel nodes                          | `workflow-dag.md` § Approval Nodes, § Cancel Nodes                      |
+| Hooks (events, matchers, response shapes)        | `dag-advanced.md` § Hooks                                               |
+| MCP (transports, env expansion, wildcards)       | `dag-advanced.md` § MCP                                                 |
+| Skills (injection, discovery, combining with MCP) | `dag-advanced.md` § Skills                                             |
+| Retry classification (FATAL / TRANSIENT / UNKNOWN) | `dag-advanced.md` § Retry Configuration                               |
+| Variable reference (`$ARGUMENTS`, `$ARTIFACTS_DIR`, etc) | `variables.md`                                                   |
+| CLI flags and commands                           | `cli-commands.md`                                                       |
+| Command file authoring                           | `authoring-commands.md`                                                 |
+| Repo initialization, `.archon/config.yaml` schema | `repo-init.md`                                                         |
+| Good practices and anti-patterns                 | `good-practices.md`                                                     |
+| Interactive workflow relay protocol              | `interactive-workflows.md`                                              |
+| Debugging and log locations                      | `troubleshooting.md`                                                    |
+| Full schema reference                            | [archon.diy/reference/configuration/](https://archon.diy/reference/configuration/) |
+
+## Providers at a Glance
+
+| Feature                         | Claude        | Codex                                   | Pi (community)                       |
+| ------------------------------- | :-----------: | :-------------------------------------: | :----------------------------------: |
+| `command` / `prompt` / `loop`   | yes           | yes                                     | yes                                  |
+| `bash` / `script`               | yes           | yes                                     | yes                                  |
+| `output_format`                 | reliable      | reliable                                | best-effort                          |
+| `allowed_tools` / `denied_tools` | yes          | ignored (use Codex CLI config)          | ignored                              |
+| `hooks`                         | yes           | **ignored + warn**                      | not available                        |
+| `mcp` (per-node)                | yes           | global `~/.codex/config.toml` only      | not available                        |
+| `skills` (per-node)             | yes           | global `~/.agents/skills/` only         | not available                        |
+| Model naming                    | `haiku`, `sonnet`, `opus`, `opus[1m]`   | Codex model ID (e.g. `gpt-5.2`)         | `<vendor>/<model>` (e.g. `anthropic/claude-opus-4-5`, `openai/gpt-4o`, `groq/llama-3-70b`) |
+| `effort` / `thinking`           | yes           | use `modelReasoningEffort` for reasoning models | via `effort:` (maps to thinking level) |
+| Session resume / `--resume`     | yes           | yes                                     | yes                                  |
+
+Mixing providers in one workflow: set workflow-level `provider: claude`, then override per-node with `provider: codex` or `provider: pi`. Cross-provider `$nodeId.output` substitution works as expected.
+
+## Ten Principles for Safe Workflow Design
+
+1. Always use `--branch <name>` (or `worktree: {enabled: true}`) for workflows that modify the codebase.
+2. Validate before running: `archon validate workflows <name>`.
+3. Tier your models. Haiku for routing and glue; Sonnet for reasoning and review; Opus only where the context is deep.
+4. Use `output_format` for every node whose output downstream `when:` reads. Never pattern-match free-form AI text.
+5. On Ralph-style loops, use `loop.fresh_context: true` and treat `$ARTIFACTS_DIR` as the source of truth. Command bodies should re-read state at the top of every iteration.
+6. Use interactive loops for iterative refinement with the human. Use `approval:` nodes for single-point checkpoints.
+7. Read-only analysis phases use `denied_tools: [Write, Edit, Bash]`. Separation of concerns.
+8. Use `hooks.PostToolUse` to enforce post-change validation (type-check, lint). Tighter feedback loop than end-of-workflow review.
+9. Large artifacts go through `$ARTIFACTS_DIR`. Small structured data goes through `$nodeId.output.field`.
+10. AI can scaffold a workflow. Only a human can verify it. Read the YAML before running.
diff --git a/.claude/skills/archon/references/repo-init.md b/.claude/skills/archon/references/repo-init.md
index 66be6375f5..e44907fd2e 100644
--- a/.claude/skills/archon/references/repo-init.md
+++ b/.claude/skills/archon/references/repo-init.md
@@ -10,14 +10,27 @@ Create the following in your repository root:
 .archon/
 ├── commands/         # Custom command files (.md)
 ├── workflows/        # Workflow definitions (.yaml)
+├── scripts/          # Named scripts for script: nodes (.ts/.js for bun, .py for uv) — optional
 ├── mcp/              # MCP server config files (.json) — optional
-└── config.yaml       # Repo-specific configuration — optional
+├── state/            # Cross-run workflow state — gitignored, never committed
+├── config.yaml       # Repo-specific configuration — optional
+└── .env              # Repo-scoped Archon env (optional; do NOT commit)
 ```
 
 ```bash
-mkdir -p .archon/commands .archon/workflows
+mkdir -p .archon/commands .archon/workflows .archon/scripts
 ```
 
+**What each directory is for:**
+
+- `commands/` — Reusable prompt templates used by `command:` workflow nodes. Committed to git.
+- `workflows/` — YAML workflow definitions. Committed to git.
+- `scripts/` — Named TypeScript/JavaScript (bun) or Python (uv) scripts referenced by `script:` nodes. Extension determines runtime: `.ts`/`.js` → bun, `.py` → uv. Committed to git.
+- `mcp/` — MCP server JSON configs. Usually checked in with `$ENV_VAR` references; avoid hardcoding secrets. Some teams gitignore this and rely entirely on env expansion.
+- `state/` — Workflow-written cross-run state (e.g. the `repo-triage` dedup log). **Always gitignore** — these are runtime artifacts, not source.
+- `config.yaml` — Repo-specific defaults (assistant, worktree settings, etc.). Committed to git.
+- `.env` — Repo-scoped Archon env (loaded with `override: true` at boot). **Do NOT commit.** This is different from the target repo's top-level `.env` — that file belongs to the target project, and Archon strips its auto-loaded keys from subprocess env before spawning AI to prevent leakage. See **Three-Path Env Model** below.
+
 ## Minimal config.yaml
 
 Create `.archon/config.yaml` only if you need to override defaults:
@@ -52,11 +65,59 @@ Archon ships with built-in commands and workflows (like `archon-assist`, `archon
 Add to your `.gitignore`:
 
 ```gitignore
-# Archon runtime artifacts (never commit)
-.archon/mcp/          # May contain env var references
+# Archon runtime artifacts — NEVER commit
+.archon/state/        # Cross-run workflow state, runtime-only
+.archon/.env          # Repo-scoped Archon env (secrets)
+
+# Optional — gitignore if your MCP configs hardcode secrets
+.archon/mcp/
+```
+
+`.archon/commands/`, `.archon/workflows/`, and `.archon/scripts/` **should be committed** — they are part of your project's workflow definitions. `.archon/config.yaml` should be committed unless it contains secrets (use `.archon/.env` for those instead).
+
+## Three-Path Env Model
+
+Archon loads env from three distinct paths at boot, with different trust levels and precedence:
+
+| Path | Scope | Trust | Loaded? |
+|------|-------|-------|---------|
+| `~/.archon/.env` | User (home) | Trusted — user owns it | Yes, with `override: true` |
+| `<cwd>/.archon/.env` | Repo (per-project, Archon-owned) | Trusted — user owns it | Yes, with `override: true` (overrides home) |
+| `<cwd>/.env` | Target repo | **Untrusted** — belongs to the project being worked on | **Stripped from `process.env`** before subprocess spawn to prevent secret leakage (see [archon.diy/reference/security/](https://archon.diy/reference/security/#target-repo-env-isolation) for the full trust model) |
+
+Boot behavior emits observable log lines:
+
+```
+[archon] loaded N keys from ~/.archon/.env
+[archon] loaded M keys from /path/to/repo/.archon/.env
+[archon] stripped K keys from /path/to/repo (ANTHROPIC_API_KEY, OPENAI_API_KEY, ...)
 ```
 
-The `.archon/commands/` and `.archon/workflows/` directories should be committed — they are part of your project's workflow definitions.
+**Where should you put what?**
+
+- **API keys for Archon itself** (`ANTHROPIC_API_KEY`, `CLAUDE_CODE_OAUTH_TOKEN`, `DATABASE_URL`, `SLACK_BOT_TOKEN`, etc.) → `~/.archon/.env` (shared across all repos) or `<cwd>/.archon/.env` (per-repo override).
+- **Target-project env that a workflow needs** (`GH_TOKEN`, `DOTENV_PRIVATE_KEY`, etc.) → see [Per-Project Env Injection](#per-project-env-injection) below.
+- **Target-project env that Archon should NOT touch** → leave it in `<cwd>/.env` where the project already expects it. Archon strips it from subprocess env but doesn't delete the file.
+
+The `archon setup --scope home|project [--force]` wizard writes to the right file for you and produces a timestamped backup on every rewrite.
+
+## Per-Project Env Injection
+
+For env vars a workflow's `bash:` and `script:` subprocesses need (`GH_TOKEN` for `gh` calls, `DATABASE_URL` for a migration script, etc.), use one of the two **managed injection** surfaces — both inject into subprocess env at workflow execution time, after the target-repo `.env` strip:
+
+**Option 1: `.archon/config.yaml` `env:` block** (checked into git; values can be `$REF_NAME` expansions from Archon env):
+
+```yaml
+env:
+  GH_TOKEN: $GH_TOKEN             # expanded from ~/.archon/.env at runtime
+  BUILD_TARGET: production        # literal value
+```
+
+**Option 2: Web UI Settings → Projects → Env Vars** — per-codebase, stored in the Archon DB, values never returned over the API (only keys are listed). Use this for values that should NOT appear in git.
+
+Both surfaces inject into: Claude/Codex/Pi subprocess env, `bash:` node subprocess env, `script:` node subprocess env, and direct chat messages that run against the codebase. The worktree isolation layer propagates them as well.
+
+> **About keys in the target repo's `<cwd>/.env`**: Archon unconditionally strips the keys auto-loaded from `<cwd>/.env` out of `process.env` at boot (see the Three-Path Env Model above) and the Bun subprocess is invoked with `--no-env-file`, so those values do NOT reach AI / bash / script subprocesses. If a workflow needs a value that currently lives in the target repo's `.env`, surface it through one of the two managed injection options above — don't expect the target `.env` to leak through.
 
 ## Global Configuration
 
diff --git a/.claude/skills/archon/references/troubleshooting.md b/.claude/skills/archon/references/troubleshooting.md
new file mode 100644
index 0000000000..099cccd928
--- /dev/null
+++ b/.claude/skills/archon/references/troubleshooting.md
@@ -0,0 +1,162 @@
+# Troubleshooting Workflows
+
+Where to look when a workflow fails, hangs, or does the wrong thing.
+
+## Log Locations
+
+Workflow run logs are written as JSONL per run:
+
+```
+~/.archon/workspaces/<owner>/<repo>/logs/<run-id>.jsonl
+```
+
+Each line is a structured event. The discriminator is the `type` field. Values (see `packages/workflows/src/logger.ts` for the canonical list):
+
+| `type` | Meaning |
+|--------|---------|
+| `workflow_start` / `workflow_complete` / `workflow_error` | Run lifecycle |
+| `node_start` / `node_complete` / `node_error` / `node_skipped` | Node lifecycle |
+| `assistant` | AI assistant message — has `content` field with the full AI output |
+| `tool` | SDK tool invocation — has `tool_name`, `tool_input`, `duration_ms`, and optionally `tokens` |
+| `validation` | Workflow-level validation event — has `check` and `result` (`pass` / `fail` / `warn` / `unknown`) |
+
+> **Loop iterations and per-attempt retry events are NOT in the JSONL file.** They go through the workflow event emitter (WebSocket / `workflow_events` DB table) under `loop_iteration_started` / `loop_iteration_completed` etc. To see them, query the DB or the Web UI dashboard — not the JSONL log.
+
+Find the run ID from `archon workflow status` (most recent run). Then:
+
+```bash
+# Last assistant message (what the AI said before failure)
+jq 'select(.type == "assistant") | .content' <log-file> | tail -1
+
+# All error events (node failures + workflow-level failures)
+jq 'select(.type == "node_error" or .type == "workflow_error")' <log-file>
+
+# Full event stream
+cat <log-file> | jq .
+```
+
+Adapter logs (Slack / Telegram / Web / GitHub) are emitted to stderr when `LOG_LEVEL=debug` is set on the server.
+
+## Artifact Locations
+
+```
+~/.archon/workspaces/<owner>/<repo>/artifacts/runs/<run-id>/
+```
+
+Inspect artifacts when a multi-node workflow produces wrong output. The failing node's upstream artifact is usually where the problem originated.
+
+```bash
+ls ~/.archon/workspaces/<owner>/<repo>/artifacts/runs/<run-id>/
+cat ~/.archon/workspaces/<owner>/<repo>/artifacts/runs/<run-id>/issues/issue-42.md
+```
+
+Artifacts are **external** to the repo on purpose — they don't pollute git.
+
+## Common Failure Modes
+
+### "No base branch could be resolved"
+
+A node references `$BASE_BRANCH` in its prompt, but neither git auto-detection nor `worktree.baseBranch` in `.archon/config.yaml` produced a branch.
+
+**Fix:**
+1. Set `worktree.baseBranch: main` (or `dev`, or whatever) in `.archon/config.yaml`.
+2. Or pass `--from <branch>` on `archon workflow run`.
+3. Or remove the `$BASE_BRANCH` reference if the node doesn't actually need it.
+
+### "Claude Code not found" / "Codex CLI binary not found"
+
+Compiled-binary builds of Archon no longer embed Claude Code / Codex — you install them separately and Archon resolves the binary via env var or config.
+
+**Fix (Claude):**
+- Install: `curl -fsSL https://claude.ai/install.sh | bash` (or `npm install -g @anthropic-ai/claude-code`)
+- Set `CLAUDE_BIN_PATH=/path/to/claude` in `~/.archon/.env`, OR
+- Set `assistants.claude.claudeBinaryPath: /absolute/path` in `.archon/config.yaml`
+- Autodetect covers `$HOME/.local/bin/claude` (native installer) — no config needed if you used that path
+
+**Fix (Codex):**
+- Install: `npm install -g @openai/codex` (or platform-specific instructions)
+- Set `CODEX_CLI_PATH=/path/to/codex` or `assistants.codex.codexBinaryPath` in config
+- Autodetect covers the standard npm / Homebrew locations per platform
+
+See [archon.diy/getting-started/installation/](https://archon.diy/getting-started/installation/) for full platform-specific install paths.
+
+### Workflow shows `running` for a long time but nothing happens
+
+Three possibilities:
+
+1. **The AI is actually working.** Check `~/.archon/workspaces/<owner>/<repo>/logs/<run-id>.jsonl` — if you see recent `tool` or `assistant` events in the tail, it's fine. Wait.
+2. **The server crashed and left an orphan row.** Server startup no longer auto-fails orphaned `running` rows (per the "No Autonomous Lifecycle Mutation" rule — `CLAUDE.md`). Transition it manually:
+   - Web UI: Dashboard → Abandon or Cancel button on the run card
+   - CLI: `archon workflow abandon <run-id>` — marks the DB row cancelled without killing any subprocess. Right tool for orphans since the subprocess is already gone
+   - Chat (Slack / Telegram / Web): `/workflow cancel <run-id>` — actively terminates the subprocess. Use for a still-live run that needs to be interrupted (there is no `archon workflow cancel` CLI subcommand)
+3. **A node is past its `idle_timeout`.** The default is 5 minutes. Override with per-node `idle_timeout: 600000` (10 min) for long-running nodes.
+
+### Workflow fails mid-way; how do I resume?
+
+Auto-resume is default — just re-invoke the same workflow at the same cwd:
+
+```bash
+archon workflow run my-workflow "original message"
+# → "Resuming workflow — skipping N already-completed node(s)"
+```
+
+Use `--resume` only when you want to force-reuse the same worktree from a specific failed run. Use `archon workflow resume <run-id>` to force a specific run ID.
+
+**Caveat:** AI session context from prior nodes is NOT restored on resume. If a `context: shared` node depended on in-session memory, re-running it will have fresh context. Artifact-based handoff survives; in-context memory does not.
+
+### Approval gate not appearing on web UI
+
+You set `interactive: true` on the approval node but the workflow still runs in the background and no chat message appears.
+
+**Fix:** Set `interactive: true` at the **workflow level** too. Node-level `interactive` is ignored on web without workflow-level `interactive`. See `references/workflow-dag.md` §Approval Nodes and §Interactive Loops.
+
+### `MCP server connection failed: <plugin>` noise in chat
+
+User-level Claude plugin MCPs (e.g. `telegram`, `notion`) inherited from `~/.claude/` fail to connect in the headless subprocess. This is normal — they're not configured for Archon's worktree context. Archon filters these to debug logs (`dag.mcp_plugin_connection_suppressed`) and surfaces only workflow-configured MCP failures.
+
+If you see a failure for an MCP you DID configure via `mcp:` in the workflow: check the config JSON path, the MCP server's `command`/`args`, and any referenced env vars.
+
+### Node output is empty / `$nodeId.output.field` resolves to empty string
+
+Common causes:
+
+1. Upstream node is an AI node without `output_format` — the output is free-form text, JSON parsing fails, field access returns empty.
+2. Upstream node was **skipped** (its `when:` evaluated false). Downstream `when:` with `==` comparisons against a specific value will fail-closed.
+3. Bash/script node printed to stderr, not stdout. Only stdout is captured.
+4. For script nodes, non-zero exit on a non-existent file / missing import silently drops the output. Check the run log for `node_error` entries.
+
+## Useful Diagnostic Commands
+
+```bash
+# All active runs as JSON (running / paused / recently finished, depending on retention)
+archon workflow status --json | jq '.runs[]'
+
+# Human-readable status of any active runs
+archon workflow status
+
+# Active worktrees and their last activity
+archon isolation list
+
+# Validate a specific workflow before running
+archon validate workflows my-workflow
+
+# Validate a specific command
+archon validate commands my-command
+
+# Dump the last 50 lines of a workflow's log
+tail -n 50 ~/.archon/workspaces/<owner>/<repo>/logs/<run-id>.jsonl | jq .
+
+# Increase log verbosity (workflow run)
+archon workflow run my-workflow --verbose "..."
+
+# Increase server log verbosity
+LOG_LEVEL=debug bun run start
+```
+
+## Escalation: when nothing makes sense
+
+1. Run `archon version` and note the version.
+2. Run `archon validate workflows <name>` and capture the output.
+3. Grab the last ~50 lines of the run's JSONL log.
+4. Check the `CHANGELOG.md` for known issues / recent changes to the subsystem you're hitting.
+5. File an issue at https://github.com/coleam00/Archon/issues with version, validate output, log tail, and the YAML.
diff --git a/.claude/skills/archon/references/workflow-dag.md b/.claude/skills/archon/references/workflow-dag.md
index aacf5aeca5..9ec01b6c7e 100644
--- a/.claude/skills/archon/references/workflow-dag.md
+++ b/.claude/skills/archon/references/workflow-dag.md
@@ -20,7 +20,89 @@ nodes:
     depends_on: [other-node]        # Node IDs that must complete first
 ```
 
-## Four Node Types (Mutually Exclusive)
+## Workflow-Level Fields
+
+Top-level YAML fields on a workflow object. Per-node overrides (same name under a node) win over workflow-level defaults.
+
+### Core
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | string (required) | Workflow identifier (used in `archon workflow run <name>`) |
+| `description` | string (required) | Human-readable summary. Used for routing; see [Workflow Description Best Practices](https://archon.diy/guides/authoring-workflows/#workflow-description-best-practices) |
+| `provider` | string | AI provider (e.g. `claude`, `codex`, `pi`). Default: from `.archon/config.yaml` |
+| `model` | string | Model override. Claude: `sonnet` \| `opus` \| `haiku` \| `claude-*` \| `inherit`. Codex: any non-Claude model ID |
+| `interactive` | boolean | **Required for web UI** when the workflow has approval gates or `loop.interactive` nodes. Forces foreground execution so gate messages reach the user's chat. Default: `false` (background on web) |
+
+### Isolation
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `worktree.enabled` | boolean | Pin isolation regardless of caller. `false` = always live checkout (CLI `--branch`/`--from` hard-error). `true` = always worktree (CLI `--no-worktree` hard-errors). Omit = caller decides. Use `false` for read-only workflows (triage, reporting) |
+
+Other worktree config (`baseBranch`, `copyFiles`, `initSubmodules`, `path`) lives in `.archon/config.yaml`, not the workflow YAML — see `references/repo-init.md`.
+
+### Claude SDK Advanced Options
+
+These fields apply to Claude nodes workflow-wide; each can be overridden per-node. Codex nodes ignore them with a warning.
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `effort` | `'low'` \| `'medium'` \| `'high'` \| `'max'` | Claude Agent SDK reasoning depth. Different from Codex `modelReasoningEffort` below |
+| `thinking` | string \| object | Extended thinking. String shorthand: `'adaptive'` \| `'enabled'` \| `'disabled'`. Object form: `{ type: 'enabled', budgetTokens: 8000 }` |
+| `fallbackModel` | string | Model to use if the primary model fails (e.g. `claude-haiku-4-5-20251001`) |
+| `betas` | string[] | SDK beta feature flags (non-empty array). Example: `['context-1m-2025-08-07']` for 1M-context Claude |
+| `sandbox` | object | OS-level filesystem/network restrictions. Nested `network` / `filesystem` sub-objects — see [archon.diy/guides/authoring-workflows/#claude-sdk-advanced-options](https://archon.diy/guides/authoring-workflows/#claude-sdk-advanced-options) for the full schema. Layers on top of worktree isolation |
+
+Per-node-only (NOT valid at workflow level): `maxBudgetUsd`, `systemPrompt`.
+
+### Codex-Specific Options
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `modelReasoningEffort` | `'minimal'` \| `'low'` \| `'medium'` \| `'high'` \| `'xhigh'` | Codex reasoning depth. Separate field from Claude's `effort` |
+| `webSearchMode` | `'disabled'` \| `'cached'` \| `'live'` | Codex web search behavior. Default: `disabled` |
+| `additionalDirectories` | string[] | Absolute paths Codex can read outside the codebase (shared libraries, docs repos) |
+
+### Complete workflow-level example
+
+```yaml
+name: careful-migration
+description: |
+  Plan a migration, get explicit approval, then implement under strict
+  sandbox and cost limits. Used by the ops team before destructive work.
+provider: claude
+model: sonnet
+interactive: true                   # required — this workflow has an approval gate
+
+worktree:
+  enabled: true                     # always isolate; reject --no-worktree
+
+effort: high
+thinking: adaptive
+fallbackModel: claude-haiku-4-5-20251001
+betas: ['context-1m-2025-08-07']
+sandbox:
+  enabled: true
+  network:
+    allowedDomains: ['api.github.com']
+    allowManagedDomainsOnly: true
+  filesystem:
+    denyWrite: ['/etc', '/usr']
+
+nodes:
+  - id: plan
+    command: plan-migration
+  - id: review
+    approval:
+      message: "Review the migration plan above."
+    depends_on: [plan]
+  - id: implement
+    command: implement-migration
+    depends_on: [review]
+```
+
+## Node Types (Mutually Exclusive)
 
 Each node must have exactly ONE of these fields:
 
@@ -129,14 +211,53 @@ nodes:
 
 ## Conditions (`when:`)
 
+Gate whether a node runs based on upstream output. A condition that evaluates to `false` skips the node (fail-closed — skipped nodes propagate their skipped state to dependants).
+
+### Operators
+
+**String comparison** (literal string equality):
 ```yaml
-- id: investigate
-  command: investigate-bug
-  depends_on: [classify]
-  when: "$classify.output.issue_type == 'bug'"
+when: "$nodeId.output == 'VALUE'"
+when: "$nodeId.output != 'VALUE'"
+when: "$nodeId.output.field == 'VALUE'"       # JSON dot notation (requires output_format)
+```
+
+**Numeric comparison** (both sides auto-parsed as numbers; fail-closed if either side is not finite):
+```yaml
+when: "$score.output > '80'"
+when: "$score.output >= '0.9'"
+when: "$score.output < '100'"
+when: "$score.output <= '5'"
+when: "$score.output.confidence >= '0.9'"
 ```
 
-**Syntax**: `$nodeId.output OPERATOR 'value'` — operators: `==`, `!=` only. Values single-quoted. Invalid expressions skip the node (fail-closed).
+All six operators — `==`, `!=`, `<`, `>`, `<=`, `>=` — are supported. Values are single-quoted strings (even for numeric comparisons).
+
+### Compound Expressions
+
+Combine conditions with `&&` (AND) and `||` (OR). **`&&` binds tighter than `||`.** No parentheses supported — structure expressions with that precedence in mind.
+
+```yaml
+when: "$a.output == 'X' && $b.output != 'Y'"
+when: "$a.output == 'X' || $b.output == 'Y'"
+when: "$score.output > '80' && $flag.output == 'true'"
+
+# Precedence: (A && B) || C
+when: "$a.output == 'X' && $b.output == 'Y' || $c.output == 'Z'"
+```
+
+Short-circuit evaluation: `&&` stops at the first false, `||` stops at the first true.
+
+### Dot Notation (JSON Field Access)
+
+`$nodeId.output.field` parses the upstream output as JSON and extracts the named field. Returns empty string if parsing fails or the field is absent — which then fails-closed against any literal value. Requires the upstream node to have `output_format` set (for AI nodes) or to print valid JSON (for bash/script nodes).
+
+### Fail-Closed Rules
+
+- Invalid or unparseable expression → node skipped, warning logged
+- Numeric operator with a non-numeric side → node skipped
+- `$nodeId.output.field` on non-JSON output → field is empty → comparison fails
+- Referenced node did not run (skipped upstream) → substitution is empty → comparison fails
 
 ## Node Output Substitution
 
@@ -211,15 +332,53 @@ Loop nodes iterate an AI prompt until a completion condition is met. Use them fo
     max_iterations: 10         # Required. Integer >= 1. Fails if exceeded
     fresh_context: true        # Optional. Default: false
     until_bash: "..."          # Optional. Exit 0 = complete
+    interactive: true          # Optional. Pauses between iterations for user input
+    gate_message: "..."        # Required when interactive: true
 ```
 
 | Field | Type | Required | Description |
 |-------|------|----------|-------------|
-| `prompt` | string | Yes | Prompt template. Supports all variable substitution (`$ARGUMENTS`, `$nodeId.output`, etc.) |
+| `prompt` | string | Yes | Prompt template. Supports all variable substitution (`$ARGUMENTS`, `$nodeId.output`, `$LOOP_USER_INPUT`, etc.) |
 | `until` | string | Yes | Completion signal to detect in AI output |
 | `max_iterations` | number | Yes | Hard limit. Node **fails** if exceeded |
 | `fresh_context` | boolean | No | Default `false`. `true` = fresh AI session each iteration |
-| `until_bash` | string | No | Shell script run after each iteration. Exit 0 = complete |
+| `until_bash` | string | No | Shell script run after each iteration. Exit 0 = complete. Variable substitution applies; `$nodeId.output` IS shell-quoted here |
+| `interactive` | boolean | No | Default `false`. `true` = pause after each non-completing iteration for user feedback via `/workflow approve <id> <text>` |
+| `gate_message` | string | **Required when `interactive: true`** | Message shown to the user at each pause. Validated at parse time — a loop with `interactive: true` and no `gate_message` fails to load |
+
+### Interactive Loops
+
+Interactive loops pause between iterations so a human can provide feedback that feeds the next iteration. Use them for guided writing/refinement (e.g. PRD co-authoring, iterative design).
+
+```yaml
+name: guided-refine
+description: Refine an output with human feedback between iterations
+interactive: true                # REQUIRED at the workflow level for web UI
+
+nodes:
+  - id: refine
+    loop:
+      prompt: |
+        Review the current draft and improve it based on this feedback:
+        $LOOP_USER_INPUT
+
+        When the output is satisfactory, output: <promise>DONE</promise>
+      until: DONE
+      max_iterations: 5
+      interactive: true          # node level — enables the pause
+      gate_message: |
+        Review the output above. Reply with feedback, or type DONE to finish.
+```
+
+The flow:
+1. Iteration N runs. AI produces output.
+2. If AI signalled completion (`<promise>DONE</promise>`) or `until_bash` exited 0, loop ends.
+3. Otherwise: `gate_message` is sent to the user, workflow pauses (status = `paused`).
+4. User runs `archon workflow approve <run-id> "<their feedback>"` (or replies naturally in chat platforms).
+5. Iteration N+1 runs with `$LOOP_USER_INPUT` substituted to the user's feedback — but **only on that first resumed iteration**. Subsequent iterations in the same resumed session see `$LOOP_USER_INPUT` as empty string.
+6. Repeat.
+
+**Workflow-level `interactive: true` is required** for the gate message to reach the user on the web UI (otherwise the workflow dispatches to a background worker that can't deliver chat messages). The loader emits a warning if a node has `interactive: true` without workflow-level `interactive: true`.
 
 ### Completion Detection
 
@@ -279,6 +438,148 @@ First iteration is always fresh regardless.
 
 ---
 
+## Approval Nodes
+
+Approval nodes **pause the workflow** until a human approves or rejects the gate. Use them to insert review steps between AI-driven nodes — for example, reviewing a generated plan before committing to expensive implementation work.
+
+### Configuration
+
+```yaml
+- id: review-gate
+  approval:
+    message: "Review the plan above before proceeding with implementation."
+    capture_response: false        # Optional. true = user's comment stored as $review-gate.output
+    on_reject:                     # Optional. AI rework on rejection instead of cancel
+      prompt: "Revise based on feedback: $REJECTION_REASON"
+      max_attempts: 3              # Range 1–10, default 3. After max, workflow is cancelled.
+  depends_on: [plan]
+```
+
+### Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `approval.message` | **Yes** | The message shown to the user when the workflow pauses |
+| `approval.capture_response` | No | `true` = user's approval comment stored as `$<node-id>.output` for downstream nodes. Default: `false` (downstream `$<node-id>.output` is empty string) |
+| `approval.on_reject.prompt` | No | Prompt run via AI when the user rejects. `$REJECTION_REASON` is substituted with the reject reason. After running, the workflow re-pauses at the same gate |
+| `approval.on_reject.max_attempts` | No | Max times the on_reject prompt runs before the workflow is cancelled. Range: 1–10. Default: 3 |
+
+### Web UI Requirement
+
+Approval gates delivered on the Web UI require `interactive: true` at the **workflow level** — otherwise the workflow dispatches to a background worker and the gate message never reaches the user's chat window.
+
+```yaml
+name: plan-approve-implement
+interactive: true   # REQUIRED for approval gates on web UI
+nodes:
+  - id: plan
+    command: plan-feature
+  - id: review-gate
+    approval:
+      message: "Approve the plan to proceed."
+    depends_on: [plan]
+  - id: implement
+    command: implement
+    depends_on: [review-gate]
+```
+
+### Approve and Reject Commands
+
+```bash
+# From the CLI
+archon workflow approve <run-id>
+archon workflow approve <run-id> --comment "looks good"
+archon workflow reject <run-id>
+archon workflow reject <run-id> --reason "plan needs more test coverage"
+
+# Cross-platform (Slack / Telegram / Web / GitHub chat)
+/workflow approve <run-id> <optional comment>
+/workflow reject <run-id> <optional reason>
+
+# Natural language (all platforms except CLI — auto-detects paused workflow)
+User: "Looks good, proceed"
+# → auto-approves. With capture_response: true, the message becomes $review-gate.output
+```
+
+### What Does NOT Work on Approval Nodes
+
+AI-specific fields (`model`, `provider`, `hooks`, `mcp`, `skills`, `output_format`, `allowed_tools`, `denied_tools`, `context`, `effort`, `thinking`, etc.) are accepted by the parser but emit a loader warning and are ignored — no AI runs during the pause. (Note: `on_reject.prompt` DOES run AI, using the workflow's default provider/model.)
+
+`retry`, `when`, `trigger_rule`, `depends_on`, `idle_timeout` all work.
+
+---
+
+## Cancel Nodes
+
+Cancel nodes **terminate the workflow run** with a reason string. Useful for guarded exits — a `cancel:` node with a `when:` condition stops the workflow cleanly when preconditions aren't met.
+
+### Configuration
+
+```yaml
+- id: gate-branch
+  cancel: "Refusing to run on main — this workflow modifies files."
+  when: "$check-branch.output == 'main'"
+  depends_on: [check-branch]
+```
+
+When a cancel node runs, Archon:
+- Marks the workflow run as `cancelled` (not `failed`)
+- Stops in-flight parallel nodes via the existing cancellation plumbing
+- Records the reason string in the run's metadata
+- Emits a `node_completed` event for the cancel node itself
+
+### Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `cancel` | **Yes** | Non-empty reason string shown to the user and recorded in metadata |
+
+Standard DAG fields (`id`, `depends_on`, `when`, `trigger_rule`, `idle_timeout`) all work. AI-specific fields emit a loader warning and are ignored — cancel nodes don't invoke AI.
+
+### When to use `cancel` vs failing a `bash:` check
+
+- **Use `cancel:`** when the precondition failure is **expected** (e.g., wrong branch, required file missing, feature flag disabled). The run shows as `cancelled`, which doesn't trigger the DAG auto-resume path.
+- **Use a `bash:` node that exits non-zero** when the check itself fails (e.g., network error, tool missing). The run shows as `failed`, which auto-resumes on the next invocation.
+
+### Typical Patterns
+
+**Gate on upstream classification:**
+```yaml
+- id: classify
+  prompt: "Is the input safe to proceed? Output 'SAFE' or 'UNSAFE'."
+  allowed_tools: []
+
+- id: stop-if-unsafe
+  cancel: "Refusing to proceed: input flagged UNSAFE by classifier."
+  depends_on: [classify]
+  when: "$classify.output != 'SAFE'"
+
+- id: do-work
+  command: the-work
+  depends_on: [classify]
+  when: "$classify.output == 'SAFE'"
+```
+
+**Stop before expensive step unless precondition met:**
+```yaml
+- id: check-budget
+  bash: |
+    spent=$(gh api /meta --jq '.rate.used // 0')
+    echo "$spent"
+
+- id: abort-if-over
+  cancel: "Aborting — GH API quota exhausted."
+  depends_on: [check-budget]
+  when: "$check-budget.output > '4500'"
+
+- id: run-api-heavy-work
+  command: heavy-work
+  depends_on: [check-budget]
+  when: "$check-budget.output <= '4500'"
+```
+
+---
+
 ## Validate Before Finishing
 
 Before declaring a workflow complete, validate it:
@@ -304,6 +605,9 @@ Use `--json` for machine-readable output. Use `archon validate commands <name>`
 - `$nodeId.output` refs in `when:`, `prompt:`, `loop.prompt:` must point to known IDs
 - Exactly one of `command`, `prompt`, `bash`, `loop` per node
 - `retry` on loop node = hard error
+- `approval.message` required and non-empty
+- `cancel` reason required and non-empty
+- Approval `on_reject.max_attempts` must be 1–10 if set
 - `steps:` format rejected (deprecated — use `nodes:` only)
 
 ## Complete Example
diff --git a/CHANGELOG.md b/CHANGELOG.md
index eee59c21ea..0ebf973fe0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,8 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Changed
+
+- **Setup wizard simplified to AI + skippable adapters flow** (cherry-picked from upstream `5e61faf0`). The interactive `archon setup` no longer prompts for `Which database do you want to use?` (SQLite is now implicit; PostgreSQL still works — set `DATABASE_URL` in `.env` instead) and no longer prompts for Discord (the Discord adapter still ships and runs at runtime when `DISCORD_BOT_TOKEN` is set in `.env`; only the wizard step is gone). Users on existing `.env` files keep their database/Discord configuration unchanged. The wizard flow is now: AI provider → optional Telegram/Slack/GitHub adapters → confirm. New users wanting Postgres or Discord configure those manually.
+
 ### Fixed
 
+- **Cherry-pick batch 6 from upstream — Tier 5 setup overhaul + skill docs (3 commits).** The deferred `5e61faf0` from PR #4 is now picked, along with two prerequisite docs commits that ship the skill files `5e61faf0`'s expanded `bundled-skill.ts` references.
+  - `2c154396` — Skill docs hardening: fixes inaccuracies, fills workflow/CLI/env gaps, adds `references/good-practices.md` and `references/troubleshooting.md`. Also expands `references/workflow-dag.md` with a Workflow-Level Fields section and updates `book/dag-workflows.md` + `book/quick-reference.md` to document seven node types (was four) (#1363).
+  - `91226735` — Adds `references/parameter-matrix.md` quick-lookup reference and registers it in the SKILL.md routing table.
+  - `5e61faf0` — Setup wizard overhaul, new `archon doctor` command, and complete bundled skill (#1494, #1566). Three concrete improvements:
+    1. **`archon doctor` command** — a green/red checklist for Claude binary, `gh auth`, database, workspace writability, bundled defaults, and adapter token pings (best-effort). Returns exit 0 if all checks pass, exit 1 if any fail. Wired into `cli.ts` as `noGitCommands` (no repo required) and registered alongside peer commands like `setup`, `serve`, `version`.
+    2. **`bundled-skill.ts` now embeds 21 skill files** (was 18 — adds `good-practices.md`, `parameter-matrix.md`, `troubleshooting.md` from the prerequisite picks above). New `scripts/check-bundled-skill.ts` CI guard fails when `bundled-skill.ts` drifts from the source files in `.claude/skills/archon/`. Wired into `bun run validate` as `check:bundled-skill`.
+    3. **Setup wizard overhaul** — drops the database prompt (SQLite implicit), drops Discord (still runtime-supported, just not in the wizard), validates the Claude binary via a spawn test (returns `{ok, reason}` so the warning shows the actual spawn error: ENOENT, timeout, permissions), probes `gh auth status` and optionally runs `gh auth login` (interactive OAuth flow gated to TTY), adds a Telegram security note + empty-allowlist warning, and offers to run `archon doctor` at the end of setup. Tightens production correctness: `bootstrapProjectConfig` uses `writeFileSync` flag `'wx'` to eliminate the TOCTOU window between `existsSync` and the write; `gh auth login` now checks `.status !== 0` so cancelled OAuth surfaces instead of silently succeeding; `checkDatabase` separates module-load vs query try-catches so a missing `@archon/core` stops masquerading as "Database not reachable".
+
 - **Cherry-pick batch 5 from upstream — Tier 4 paths/env unification (5 commits).** Five commits picked from `coleam00/archon` upstream/dev. The deferred `e33e0de6` from PR #8 (archon-assist worktree opt-out) is now included because its prerequisite (`5ed38dc7`'s `worktree:` schema) lands in this batch. One candidate (`cc78071f` worktree timeout 5m) was skipped as already-absorbed in earlier picks.
   - `28908f0c` — Unifies env load + write on a three-path model (`<repo>/.env` stripped at boot, `<repo>/.archon/.env` loaded at repo scope and wins, `~/.archon/.env` loaded at home scope). New `loadArchonEnv(cwd)` helper in `@archon/paths/env-loader` shared by CLI and server entry points (replaces the old `dotenv` invocations that always lied "(0 keys injected)" about stripped files). `archon setup` gains `--scope home|project` (default home) targeting exactly one archon-owned file, with merge-only-by-default behavior and a `--force` opt-out. `<repo>/.env` is never written to (it would be incoherent — `stripCwdEnv` deletes those keys on every run anyway). User-facing log lines are now actionable: `[archon] stripped N keys from <cwd>` and `[archon] loaded N keys from <path>`, emitted only when N > 0 (#1302, #1303, #1304).
   - `7be4d0a3` — Collapses the awkward `~/.archon/.archon/workflows/` convention to a direct `~/.archon/workflows/` child (matching `workspaces/`, `archon.db`, etc.); adds home-scoped commands (`~/.archon/commands/`) and scripts (`~/.archon/scripts/`) with the same loading story; kills the opt-in `globalSearchPath` parameter so every call site gets home-scope for free. New paths helpers: `getHomeWorkflowsPath()`, `getHomeCommandsPath()`, `getHomeScriptsPath()`, plus `getLegacyHomeWorkflowsPath()` for migration detection. `discoverWorkflowsWithConfig(cwd, loadConfig)` reads home-scope internally; `discoverScriptsForCwd(cwd)` merges home + repo scripts. Command resolution is now walked-by-basename in each scope so `.archon/commands/triage/review.md` resolves as `review` (closes the latent bug where subfolder commands were listed but unresolvable). Closes #1136 — supersedes the tactical fix because the bug was the primitive itself: an easy-to-forget parameter that five of six call sites on dev dropped (#1315).
diff --git a/CLAUDE.md b/CLAUDE.md
index 5d4c9ce89f..3a8add54fd 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -150,7 +150,7 @@ bun run format:check
 bun run validate
 ```
 
-This runs `check:bundled`, type-check, lint, format check, and tests. All five must pass for CI to succeed.
+This runs `check:bundled`, `check:bundled-skill`, type-check, lint, format check, and tests. All six must pass for CI to succeed.
 
 ### ESLint Guidelines
 
@@ -255,6 +255,9 @@ bun run cli serve --download-only  # Download without starting
 bun run cli skill install
 bun run cli skill install /path/to/project
 
+# Verify your Archon setup (Claude binary, gh auth, DB, adapters)
+bun run cli doctor
+
 # Show version
 bun run cli version
 ```
@@ -718,7 +721,7 @@ async function createSession(conversationId: string, codebaseId: string) {
 - Source builds: Loaded from filesystem at runtime
 - Merged with repo-specific commands/workflows (repo overrides defaults by name)
 - Opt-out: Set `defaults.loadDefaultCommands: false` or `defaults.loadDefaultWorkflows: false` in `.archon/config.yaml`
-- **After adding, removing, or editing a default file, run `bun run generate:bundled`** to refresh the embedded bundle. `bun run validate` (and CI) run `check:bundled` and will fail loudly if the generated file is stale.
+- **After adding, removing, or editing a default file, run `bun run generate:bundled`** to refresh the embedded bundle. `bun run validate` (and CI) run `check:bundled` and `check:bundled-skill` and will fail loudly if either generated file is stale.
 
 **Home-scoped ("global") workflows, commands, and scripts** (user-level, applies to every project):
 - Workflows: `~/.archon/workflows/` (or `$ARCHON_HOME/workflows/`)
diff --git a/package.json b/package.json
index 736a97ac4c..b4c6288aef 100644
--- a/package.json
+++ b/package.json
@@ -16,6 +16,7 @@
     "build:checksums": "bash scripts/checksums.sh",
     "generate:bundled": "bun run scripts/generate-bundled-defaults.ts",
     "check:bundled": "bun run scripts/generate-bundled-defaults.ts --check",
+    "check:bundled-skill": "bun run scripts/check-bundled-skill.ts --check",
     "test": "bun --filter '*' --parallel test",
     "test:watch": "bun --filter @archon/server test:watch",
     "type-check": "bun --filter '*' type-check && bun x tsc --noEmit -p scripts/tsconfig.json",
@@ -27,7 +28,7 @@
     "build:web": "bun --filter @archon/web build",
     "dev:docs": "bun --filter @archon/docs-web dev",
     "build:docs": "bun --filter @archon/docs-web build",
-    "validate": "bun run check:bundled && bun run type-check && bun run lint --max-warnings 0 && bun run format:check && bun run test",
+    "validate": "bun run check:bundled && bun run check:bundled-skill && bun run type-check && bun run lint --max-warnings 0 && bun run format:check && bun run test",
     "prepare": "husky",
     "setup-auth": "bun --filter @archon/server setup-auth"
   },
diff --git a/packages/cli/package.json b/packages/cli/package.json
index 454e5c4d3c..0867a195c1 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -8,7 +8,7 @@
   },
   "scripts": {
     "cli": "bun src/cli.ts",
-    "test": "bun test src/commands/version.test.ts src/commands/setup.test.ts src/commands/skill.test.ts && bun test src/commands/workflow.test.ts && bun test src/commands/isolation.test.ts && bun test src/commands/chat.test.ts && bun test src/commands/serve.test.ts",
+    "test": "bun test src/commands/version.test.ts src/commands/setup.test.ts src/commands/skill.test.ts src/commands/doctor.test.ts && bun test src/commands/workflow.test.ts && bun test src/commands/isolation.test.ts && bun test src/commands/chat.test.ts && bun test src/commands/serve.test.ts",
     "type-check": "bun x tsc --noEmit"
   },
   "dependencies": {
diff --git a/packages/cli/src/bundled-skill.ts b/packages/cli/src/bundled-skill.ts
index ca1cd3bee1..a822d9a660 100644
--- a/packages/cli/src/bundled-skill.ts
+++ b/packages/cli/src/bundled-skill.ts
@@ -9,7 +9,7 @@
  */
 
 // =============================================================================
-// Skill Files (18 total)
+// Skill Files (21 total)
 // =============================================================================
 
 import skillMd from '../../../.claude/skills/archon/SKILL.md' with { type: 'text' };
@@ -26,8 +26,11 @@ import telegramGuide from '../../../.claude/skills/archon/guides/telegram.md' wi
 import authoringCommands from '../../../.claude/skills/archon/references/authoring-commands.md' with { type: 'text' };
 import cliCommands from '../../../.claude/skills/archon/references/cli-commands.md' with { type: 'text' };
 import dagAdvanced from '../../../.claude/skills/archon/references/dag-advanced.md' with { type: 'text' };
+import goodPractices from '../../../.claude/skills/archon/references/good-practices.md' with { type: 'text' };
 import interactiveWorkflows from '../../../.claude/skills/archon/references/interactive-workflows.md' with { type: 'text' };
+import parameterMatrix from '../../../.claude/skills/archon/references/parameter-matrix.md' with { type: 'text' };
 import repoInit from '../../../.claude/skills/archon/references/repo-init.md' with { type: 'text' };
+import troubleshooting from '../../../.claude/skills/archon/references/troubleshooting.md' with { type: 'text' };
 import variables from '../../../.claude/skills/archon/references/variables.md' with { type: 'text' };
 import workflowDag from '../../../.claude/skills/archon/references/workflow-dag.md' with { type: 'text' };
 
@@ -53,8 +56,11 @@ export const BUNDLED_SKILL_FILES: Record<string, string> = {
   'references/authoring-commands.md': authoringCommands,
   'references/cli-commands.md': cliCommands,
   'references/dag-advanced.md': dagAdvanced,
+  'references/good-practices.md': goodPractices,
   'references/interactive-workflows.md': interactiveWorkflows,
+  'references/parameter-matrix.md': parameterMatrix,
   'references/repo-init.md': repoInit,
+  'references/troubleshooting.md': troubleshooting,
   'references/variables.md': variables,
   'references/workflow-dag.md': workflowDag,
 };
diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts
index fb00e1de4e..afd89ae681 100755
--- a/packages/cli/src/cli.ts
+++ b/packages/cli/src/cli.ts
@@ -64,6 +64,7 @@ import { setupCommand } from './commands/setup';
 import { skillInstallCommand } from './commands/skill';
 import { validateWorkflowsCommand, validateCommandsCommand } from './commands/validate';
 import { serveCommand } from './commands/serve';
+import { doctorCommand } from './commands/doctor';
 import { closeDatabase } from '@archon/core';
 import {
   setLogLevel,
@@ -105,6 +106,7 @@ Commands:
   complete <branch> [...]    Complete branch lifecycle (remove worktree + branches)
   serve                      Start the web UI server (downloads web UI on first run)
   skill install [path]       Install the bundled Archon skill into .claude/skills/archon
+  doctor                     Verify your Archon setup (Claude binary, gh auth, DB, adapters)
   validate workflows [name]  Validate workflow definitions and their references
   validate commands [name]   Validate command files
   version, --version, -V     Show version info (also -v when used alone)
@@ -267,7 +269,16 @@ async function main(): Promise<number> {
   const subcommand = positionals[1];
 
   // Commands that don't require git repo validation
-  const noGitCommands = ['version', 'help', 'setup', 'chat', 'continue', 'serve', 'skill'];
+  const noGitCommands = [
+    'version',
+    'help',
+    'setup',
+    'chat',
+    'continue',
+    'serve',
+    'skill',
+    'doctor',
+  ];
   const requiresGitRepo = !noGitCommands.includes(command ?? '');
 
   try {
@@ -600,6 +611,10 @@ async function main(): Promise<number> {
         return await serveCommand({ port: servePort, downloadOnly });
       }
 
+      case 'doctor': {
+        return await doctorCommand();
+      }
+
       case 'skill': {
         switch (subcommand) {
           case 'install': {
diff --git a/packages/cli/src/commands/doctor.test.ts b/packages/cli/src/commands/doctor.test.ts
new file mode 100644
index 0000000000..f6c40549d1
--- /dev/null
+++ b/packages/cli/src/commands/doctor.test.ts
@@ -0,0 +1,342 @@
+/**
+ * Tests for `archon doctor` check functions.
+ *
+ * Uses spyOn for `@archon/git.execFileAsync` and `globalThis.fetch`.
+ * `BUNDLED_IS_BINARY` is a static const re-export and cannot be spied at
+ * runtime — `checkClaudeBinary` accepts it as an injectable parameter for
+ * testability. Avoids `mock.module()` because it is process-global and
+ * irreversible in Bun, which would pollute other test files in this package.
+ */
+import { describe, it, expect, spyOn, afterEach, beforeEach } from 'bun:test';
+import { tmpdir } from 'os';
+import { join } from 'path';
+import { mkdirSync, rmSync } from 'fs';
+import * as git from '@archon/git';
+import {
+  checkClaudeBinary,
+  checkDatabase,
+  checkGhAuth,
+  checkWorkspaceWritable,
+  checkBundledDefaults,
+  checkSlack,
+  checkTelegram,
+  doctorCommand,
+  type DatabaseDeps,
+} from './doctor';
+
+describe('checkClaudeBinary', () => {
+  let execSpy: ReturnType<typeof spyOn<typeof git, 'execFileAsync'>>;
+
+  beforeEach(() => {
+    execSpy = spyOn(git, 'execFileAsync');
+  });
+
+  afterEach(() => {
+    execSpy.mockRestore();
+  });
+
+  it('returns skip when not in binary mode', async () => {
+    const result = await checkClaudeBinary({}, false);
+    expect(result.status).toBe('skip');
+    expect(result.label).toBe('Claude binary');
+    expect(execSpy).not.toHaveBeenCalled();
+  });
+
+  it('returns fail in binary mode when CLAUDE_BIN_PATH is unset', async () => {
+    const result = await checkClaudeBinary({}, true);
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('CLAUDE_BIN_PATH');
+    expect(execSpy).not.toHaveBeenCalled();
+  });
+
+  it('returns pass in binary mode when binary spawns successfully', async () => {
+    execSpy.mockResolvedValue({ stdout: '1.0.0', stderr: '' });
+    const result = await checkClaudeBinary({ CLAUDE_BIN_PATH: '/opt/claude' }, true);
+    expect(result.status).toBe('pass');
+    expect(result.message).toContain('/opt/claude');
+    expect(execSpy).toHaveBeenCalledWith('/opt/claude', ['--version'], expect.any(Object));
+  });
+
+  it('returns fail in binary mode when spawn throws', async () => {
+    execSpy.mockRejectedValue(new Error('ENOENT'));
+    const result = await checkClaudeBinary({ CLAUDE_BIN_PATH: '/opt/claude' }, true);
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('did not spawn');
+    expect(result.message).toContain('ENOENT');
+  });
+});
+
+describe('checkGhAuth', () => {
+  let execSpy: ReturnType<typeof spyOn<typeof git, 'execFileAsync'>>;
+
+  beforeEach(() => {
+    execSpy = spyOn(git, 'execFileAsync');
+  });
+
+  afterEach(() => {
+    execSpy.mockRestore();
+  });
+
+  it('returns skip when no GitHub token is set', async () => {
+    const result = await checkGhAuth({});
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('GitHub not configured');
+    expect(execSpy).not.toHaveBeenCalled();
+  });
+
+  it('runs gh auth check when only GH_TOKEN is set', async () => {
+    execSpy.mockResolvedValue({ stdout: 'Logged in as @user', stderr: '' });
+    const result = await checkGhAuth({ GH_TOKEN: 'ghp_y' });
+    expect(result.status).toBe('pass');
+    expect(execSpy).toHaveBeenCalledWith('gh', ['auth', 'status'], expect.any(Object));
+  });
+
+  it('returns pass when gh auth status succeeds', async () => {
+    execSpy.mockResolvedValue({ stdout: 'Logged in as @user', stderr: '' });
+    const result = await checkGhAuth({ GITHUB_TOKEN: 'ghp_x' });
+    expect(result.status).toBe('pass');
+    expect(execSpy).toHaveBeenCalledWith('gh', ['auth', 'status'], expect.any(Object));
+  });
+
+  it('returns fail when gh auth status throws', async () => {
+    execSpy.mockRejectedValue(new Error('not logged in'));
+    const result = await checkGhAuth({ GH_TOKEN: 'ghp_y' });
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('not logged in');
+  });
+});
+
+describe('checkDatabase', () => {
+  it('returns pass when query succeeds', async () => {
+    const deps: DatabaseDeps = {
+      pool: { query: async () => undefined },
+      getDatabaseType: () => 'sqlite',
+    };
+    const result = await checkDatabase(async () => deps);
+    expect(result.status).toBe('pass');
+    expect(result.message).toContain('sqlite');
+  });
+
+  it('reports postgres dbType when configured', async () => {
+    const deps: DatabaseDeps = {
+      pool: { query: async () => undefined },
+      getDatabaseType: () => 'postgres',
+    };
+    const result = await checkDatabase(async () => deps);
+    expect(result.status).toBe('pass');
+    expect(result.message).toContain('postgres');
+  });
+
+  it('returns fail with "not reachable" when query throws', async () => {
+    const deps: DatabaseDeps = {
+      pool: {
+        query: async () => {
+          throw new Error('connection refused');
+        },
+      },
+      getDatabaseType: () => 'postgres',
+    };
+    const result = await checkDatabase(async () => deps);
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('not reachable');
+    expect(result.message).toContain('connection refused');
+  });
+
+  it('returns fail with "failed to load" when module load throws', async () => {
+    const result = await checkDatabase(async () => {
+      throw new Error('Cannot find module @archon/core');
+    });
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('failed to load database module');
+    expect(result.message).toContain('Cannot find module');
+  });
+});
+
+describe('checkWorkspaceWritable', () => {
+  const TMP = join(tmpdir(), 'archon-doctor-test-' + Date.now());
+  let originalHome: string | undefined;
+
+  beforeEach(() => {
+    mkdirSync(TMP, { recursive: true });
+    originalHome = process.env.ARCHON_HOME;
+    process.env.ARCHON_HOME = TMP;
+  });
+
+  afterEach(() => {
+    if (originalHome === undefined) {
+      delete process.env.ARCHON_HOME;
+    } else {
+      process.env.ARCHON_HOME = originalHome;
+    }
+    try {
+      rmSync(TMP, { recursive: true, force: true });
+    } catch {
+      // Ignore cleanup errors
+    }
+  });
+
+  it('returns pass when directory is writable', async () => {
+    const result = await checkWorkspaceWritable();
+    expect(result.status).toBe('pass');
+    expect(result.message).toContain('writable');
+  });
+
+  it('returns pass when directory does not exist (creates it)', async () => {
+    rmSync(TMP, { recursive: true, force: true });
+    const result = await checkWorkspaceWritable();
+    expect(result.status).toBe('pass');
+  });
+});
+
+describe('checkBundledDefaults', () => {
+  it('returns pass with workflow and command counts in dev mode', async () => {
+    const result = await checkBundledDefaults();
+    expect(result.status).toBe('pass');
+    expect(result.label).toBe('Bundled defaults');
+    expect(result.message).toMatch(/\d+ workflow/);
+    expect(result.message).toMatch(/\d+ command/);
+  });
+});
+
+describe('checkSlack', () => {
+  let fetchSpy: ReturnType<typeof spyOn<typeof globalThis, 'fetch'>>;
+
+  beforeEach(() => {
+    fetchSpy = spyOn(globalThis, 'fetch');
+  });
+
+  afterEach(() => {
+    fetchSpy.mockRestore();
+  });
+
+  it('returns skip when SLACK_BOT_TOKEN not set', async () => {
+    const result = await checkSlack({});
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('SLACK_BOT_TOKEN');
+    expect(fetchSpy).not.toHaveBeenCalled();
+  });
+
+  it('returns pass when auth.test responds ok', async () => {
+    fetchSpy.mockResolvedValue(
+      new Response(JSON.stringify({ ok: true }), { status: 200 }) as unknown as Response
+    );
+    const result = await checkSlack({ SLACK_BOT_TOKEN: 'xoxb-x' });
+    expect(result.status).toBe('pass');
+  });
+
+  it('returns fail when auth.test rejects with body.ok=false', async () => {
+    fetchSpy.mockResolvedValue(
+      new Response(JSON.stringify({ ok: false, error: 'invalid_auth' }), {
+        status: 200,
+      }) as unknown as Response
+    );
+    const result = await checkSlack({ SLACK_BOT_TOKEN: 'xoxb-x' });
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('invalid_auth');
+  });
+
+  it('returns skip on network error (best-effort by design)', async () => {
+    fetchSpy.mockRejectedValue(new Error('ECONNREFUSED'));
+    const result = await checkSlack({ SLACK_BOT_TOKEN: 'xoxb-x' });
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('ECONNREFUSED');
+  });
+});
+
+describe('checkTelegram', () => {
+  let fetchSpy: ReturnType<typeof spyOn<typeof globalThis, 'fetch'>>;
+
+  beforeEach(() => {
+    fetchSpy = spyOn(globalThis, 'fetch');
+  });
+
+  afterEach(() => {
+    fetchSpy.mockRestore();
+  });
+
+  it('returns skip when TELEGRAM_BOT_TOKEN not set', async () => {
+    const result = await checkTelegram({});
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('TELEGRAM_BOT_TOKEN');
+    expect(fetchSpy).not.toHaveBeenCalled();
+  });
+
+  it('returns pass when getMe responds ok', async () => {
+    fetchSpy.mockResolvedValue(
+      new Response(JSON.stringify({ ok: true }), { status: 200 }) as unknown as Response
+    );
+    const result = await checkTelegram({ TELEGRAM_BOT_TOKEN: '123:abc' });
+    expect(result.status).toBe('pass');
+  });
+
+  it('returns fail when getMe responds ok=false', async () => {
+    fetchSpy.mockResolvedValue(
+      new Response(JSON.stringify({ ok: false, description: 'Unauthorized' }), {
+        status: 401,
+      }) as unknown as Response
+    );
+    const result = await checkTelegram({ TELEGRAM_BOT_TOKEN: '123:abc' });
+    expect(result.status).toBe('fail');
+    expect(result.message).toContain('Unauthorized');
+  });
+
+  it('returns skip on network error (best-effort by design)', async () => {
+    fetchSpy.mockRejectedValue(new Error('ETIMEDOUT'));
+    const result = await checkTelegram({ TELEGRAM_BOT_TOKEN: '123:abc' });
+    expect(result.status).toBe('skip');
+    expect(result.message).toContain('ETIMEDOUT');
+  });
+});
+
+describe('doctorCommand', () => {
+  let logSpy: ReturnType<typeof spyOn<Console, 'log'>>;
+
+  beforeEach(() => {
+    logSpy = spyOn(console, 'log').mockImplementation(() => {});
+  });
+
+  afterEach(() => {
+    logSpy.mockRestore();
+  });
+
+  const passing = (label: string) => async () =>
+    ({ label, status: 'pass', message: 'ok' }) as const;
+  const failing = (label: string) => async () =>
+    ({ label, status: 'fail', message: 'broken' }) as const;
+  const skipping = (label: string) => async () =>
+    ({ label, status: 'skip', message: 'no token' }) as const;
+  const throwing = (label: string) => async (): Promise<never> => {
+    throw new Error(`${label} blew up`);
+  };
+
+  it('returns 0 when every check passes', async () => {
+    const exit = await doctorCommand([passing('A'), passing('B')]);
+    expect(exit).toBe(0);
+  });
+
+  it('returns 0 when checks are pass + skip (skip is not a failure)', async () => {
+    const exit = await doctorCommand([passing('A'), skipping('B')]);
+    expect(exit).toBe(0);
+  });
+
+  it('returns 1 when any check fails', async () => {
+    const exit = await doctorCommand([passing('A'), failing('B')]);
+    expect(exit).toBe(1);
+  });
+
+  it('counts a thrown check as a failure (allSettled rejection branch)', async () => {
+    const exit = await doctorCommand([passing('A'), throwing('B')]);
+    expect(exit).toBe(1);
+  });
+
+  it('continues after a thrown check (Promise.allSettled does not short-circuit)', async () => {
+    const exit = await doctorCommand([throwing('A'), passing('B'), failing('C')]);
+    // 1 throw + 1 fail = 2 failures, but exit code is still 1.
+    expect(exit).toBe(1);
+    // Verify all three were rendered (one per ✓/✗/unknown line).
+    const renderedLines = logSpy.mock.calls
+      .map(args => String(args[0] ?? ''))
+      .filter(s => s.startsWith('✓') || s.startsWith('✗') || s.startsWith('○'));
+    expect(renderedLines.length).toBeGreaterThanOrEqual(2);
+  });
+});
diff --git a/packages/cli/src/commands/doctor.ts b/packages/cli/src/commands/doctor.ts
new file mode 100644
index 0000000000..d50723deed
--- /dev/null
+++ b/packages/cli/src/commands/doctor.ts
@@ -0,0 +1,259 @@
+/**
+ * Doctor command - Verifies the local Archon setup.
+ *
+ * Also invoked from the end of `archon setup`; the setup wizard discards the
+ * return value so a doctor failure does not abort setup (the env file was
+ * already written successfully).
+ */
+import { mkdirSync, writeFileSync, rmSync } from 'fs';
+import { join } from 'path';
+import { execFileAsync } from '@archon/git';
+import { BUNDLED_IS_BINARY, getArchonHome, createLogger } from '@archon/paths';
+
+let cachedLog: ReturnType<typeof createLogger> | undefined;
+function getLog(): ReturnType<typeof createLogger> {
+  if (!cachedLog) cachedLog = createLogger('cli.doctor');
+  return cachedLog;
+}
+
+export interface CheckResult {
+  label: string;
+  status: 'pass' | 'fail' | 'skip';
+  message: string;
+}
+
+export async function checkClaudeBinary(
+  env: NodeJS.ProcessEnv,
+  // Injected so tests can drive the binary-mode branch — `BUNDLED_IS_BINARY`
+  // is a static const re-export and cannot be spied at runtime.
+  isBinary: boolean = BUNDLED_IS_BINARY
+): Promise<CheckResult> {
+  const label = 'Claude binary';
+  if (!isBinary) {
+    return { label, status: 'skip', message: 'dev mode (SDK resolves via node_modules)' };
+  }
+  const path = env.CLAUDE_BIN_PATH;
+  if (!path) {
+    return {
+      label,
+      status: 'fail',
+      message: 'CLAUDE_BIN_PATH is not set. Run `archon setup` to configure.',
+    };
+  }
+  try {
+    await execFileAsync(path, ['--version'], { timeout: 5000 });
+    return { label, status: 'pass', message: `${path} (spawns OK)` };
+  } catch (err) {
+    return {
+      label,
+      status: 'fail',
+      message: `${path} did not spawn: ${(err as Error).message}`,
+    };
+  }
+}
+
+export async function checkGhAuth(env: NodeJS.ProcessEnv): Promise<CheckResult> {
+  const label = 'gh CLI';
+  // Skip for users without GitHub configured — gh auth is irrelevant
+  // to a CLI-only or Slack/Telegram setup, so reporting fail would be noise.
+  if (!env.GITHUB_TOKEN && !env.GH_TOKEN) {
+    return { label, status: 'skip', message: 'GitHub not configured (no GITHUB_TOKEN)' };
+  }
+  try {
+    await execFileAsync('gh', ['auth', 'status'], { timeout: 10_000 });
+    return { label, status: 'pass', message: 'authenticated' };
+  } catch (err) {
+    return {
+      label,
+      status: 'fail',
+      message: `gh auth status failed: ${(err as Error).message}. Run \`gh auth login\`.`,
+    };
+  }
+}
+
+export interface DatabaseDeps {
+  pool: { query: (sql: string) => Promise<unknown> };
+  getDatabaseType: () => string;
+}
+
+export async function checkDatabase(
+  // Injected so tests can drive both code paths without mocking the dynamic
+  // import. Falls back to the lazy `@archon/core` import in production.
+  loadDeps: () => Promise<DatabaseDeps> = defaultLoadDatabaseDeps
+): Promise<CheckResult> {
+  const label = 'Database';
+  let deps: DatabaseDeps;
+  try {
+    deps = await loadDeps();
+  } catch (err) {
+    // Distinguish module-load failure from query failure — surfacing
+    // "not reachable" for an import error misleads the user into running
+    // `archon setup` when the real fix is a binary rebuild.
+    getLog().error({ err }, 'doctor.db_module_load_failed');
+    return {
+      label,
+      status: 'fail',
+      message: `failed to load database module: ${(err as Error).message}`,
+    };
+  }
+  try {
+    const dbType = deps.getDatabaseType();
+    await deps.pool.query('SELECT 1');
+    return { label, status: 'pass', message: `reachable (${dbType})` };
+  } catch (err) {
+    getLog().error({ err }, 'doctor.db_query_failed');
+    return { label, status: 'fail', message: `not reachable: ${(err as Error).message}` };
+  }
+}
+
+async function defaultLoadDatabaseDeps(): Promise<DatabaseDeps> {
+  // Lazy import so doctor doesn't pull in the full @archon/core graph just to
+  // print --help or run a different check.
+  const { pool, getDatabaseType } = await import('@archon/core');
+  return { pool, getDatabaseType };
+}
+
+export async function checkWorkspaceWritable(): Promise<CheckResult> {
+  const label = 'Workspace';
+  const home = getArchonHome();
+  const probe = join(home, `.doctor-probe-${process.pid}-${Date.now()}`);
+  try {
+    mkdirSync(home, { recursive: true });
+    writeFileSync(probe, 'ok');
+  } catch (err) {
+    return { label, status: 'fail', message: `${home} not writable: ${(err as Error).message}` };
+  }
+  try {
+    rmSync(probe, { force: true });
+  } catch (err) {
+    // Deletion failure is cosmetic — the write succeeded, so the dir is
+    // writable. Log so repeated failures leave a diagnostic trace instead of
+    // silently accumulating .doctor-probe-* files in ARCHON_HOME.
+    getLog().warn({ probe, err }, 'doctor.workspace_probe_delete_failed');
+  }
+  return { label, status: 'pass', message: `${home} is writable` };
+}
+
+export async function checkBundledDefaults(): Promise<CheckResult> {
+  const label = 'Bundled defaults';
+  try {
+    const { BUNDLED_COMMANDS, BUNDLED_WORKFLOWS } = await import('@archon/workflows/defaults');
+    const commands = Object.keys(BUNDLED_COMMANDS).length;
+    const workflows = Object.keys(BUNDLED_WORKFLOWS).length;
+    return {
+      label,
+      status: 'pass',
+      message: `${workflows} workflow(s), ${commands} command(s) loaded`,
+    };
+  } catch (err) {
+    return { label, status: 'fail', message: `failed to load: ${(err as Error).message}` };
+  }
+}
+
+export async function checkSlack(env: NodeJS.ProcessEnv): Promise<CheckResult> {
+  const label = 'Slack';
+  const token = env.SLACK_BOT_TOKEN;
+  if (!token) {
+    return { label, status: 'skip', message: 'no SLACK_BOT_TOKEN set' };
+  }
+  try {
+    const res = await fetch('https://slack.com/api/auth.test', {
+      method: 'POST',
+      headers: { Authorization: `Bearer ${token}` },
+      signal: AbortSignal.timeout(5000),
+    });
+    const body = (await res.json()) as { ok?: boolean; error?: string };
+    if (body.ok) {
+      return { label, status: 'pass', message: 'auth.test OK' };
+    }
+    return { label, status: 'fail', message: `auth.test rejected: ${body.error ?? 'unknown'}` };
+  } catch (err) {
+    // Network errors → skip, not fail — best-effort by design.
+    return {
+      label,
+      status: 'skip',
+      message: `ping skipped (${(err as Error).message})`,
+    };
+  }
+}
+
+export async function checkTelegram(env: NodeJS.ProcessEnv): Promise<CheckResult> {
+  const label = 'Telegram';
+  const token = env.TELEGRAM_BOT_TOKEN;
+  if (!token) {
+    return { label, status: 'skip', message: 'no TELEGRAM_BOT_TOKEN set' };
+  }
+  try {
+    const res = await fetch(`https://api.telegram.org/bot${token}/getMe`, {
+      signal: AbortSignal.timeout(5000),
+    });
+    const body = (await res.json()) as { ok?: boolean; description?: string };
+    if (body.ok) {
+      return { label, status: 'pass', message: 'getMe OK' };
+    }
+    return {
+      label,
+      status: 'fail',
+      message: `getMe rejected: ${body.description ?? 'unknown'}`,
+    };
+  } catch (err) {
+    return {
+      label,
+      status: 'skip',
+      message: `ping skipped (${(err as Error).message})`,
+    };
+  }
+}
+
+function renderResult(r: CheckResult): string {
+  const icon = r.status === 'pass' ? '✓' : r.status === 'fail' ? '✗' : '○';
+  return `${icon} ${r.label}: ${r.message}`;
+}
+
+export async function doctorCommand(
+  // Injected so tests can drive the exit-code contract and the
+  // Promise.allSettled rejection branch with synthetic checks.
+  checks?: (() => Promise<CheckResult>)[]
+): Promise<number> {
+  console.log('archon doctor — verifying your setup\n');
+  getLog().info('doctor.run_started');
+  const env = process.env;
+
+  const promises = checks
+    ? checks.map(fn => fn())
+    : [
+        checkClaudeBinary(env),
+        checkGhAuth(env),
+        checkDatabase(),
+        checkWorkspaceWritable(),
+        checkBundledDefaults(),
+        checkSlack(env),
+        checkTelegram(env),
+      ];
+
+  // Promise.allSettled so one unexpected rejection doesn't skip remaining checks.
+  const settled = await Promise.allSettled(promises);
+
+  let failures = 0;
+  for (const s of settled) {
+    if (s.status === 'rejected') {
+      failures++;
+      const msg = s.reason instanceof Error ? s.reason.message : String(s.reason);
+      console.log(`✗ unknown: check threw: ${msg}`);
+      getLog().error({ reason: s.reason }, 'doctor.check_threw_unexpectedly');
+      continue;
+    }
+    if (s.value.status === 'fail') failures++;
+    console.log(renderResult(s.value));
+  }
+
+  console.log('');
+  if (failures === 0) {
+    console.log('All checks passed.');
+    getLog().info('doctor.run_completed');
+    return 0;
+  }
+  console.log(`${failures} check(s) failed. Run \`archon setup\` to reconfigure.`);
+  getLog().warn({ failures }, 'doctor.run_failed');
+  return 1;
+}
diff --git a/packages/cli/src/commands/setup.test.ts b/packages/cli/src/commands/setup.test.ts
index bb73eec09a..c64cb064dc 100644
--- a/packages/cli/src/commands/setup.test.ts
+++ b/packages/cli/src/commands/setup.test.ts
@@ -6,6 +6,7 @@ import { existsSync, readFileSync, mkdirSync, writeFileSync, rmSync } from 'fs';
 import { join } from 'path';
 import { tmpdir } from 'os';
 import {
+  bootstrapProjectConfig,
   checkExistingConfig,
   generateEnvContent,
   generateWebhookSecret,
@@ -99,30 +100,6 @@ CODEX_ACCOUNT_ID=account1
       expect(result?.platforms.telegram).toBe(true);
       expect(result?.platforms.github).toBe(false);
       expect(result?.platforms.slack).toBe(false);
-      expect(result?.platforms.discord).toBe(false);
-      expect(result?.hasDatabase).toBe(false);
-
-      if (originalHome === undefined) {
-        delete process.env.ARCHON_HOME;
-      } else {
-        process.env.ARCHON_HOME = originalHome;
-      }
-    });
-
-    it('should detect PostgreSQL database configuration', () => {
-      const envDir = join(TEST_DIR, '.archon2');
-      mkdirSync(envDir, { recursive: true });
-      const envPath = join(envDir, '.env');
-
-      writeFileSync(envPath, 'DATABASE_URL=postgresql://localhost:5432/test');
-
-      const originalHome = process.env.ARCHON_HOME;
-      process.env.ARCHON_HOME = envDir;
-
-      const result = checkExistingConfig();
-
-      expect(result).not.toBeNull();
-      expect(result?.hasDatabase).toBe(true);
 
       if (originalHome === undefined) {
         delete process.env.ARCHON_HOME;
@@ -135,7 +112,6 @@ CODEX_ACCOUNT_ID=account1
   describe('generateEnvContent', () => {
     it('should generate valid .env content for SQLite configuration', () => {
       const content = generateEnvContent({
-        database: { type: 'sqlite' },
         ai: {
           claude: true,
           claudeAuthType: 'global',
@@ -146,7 +122,6 @@ CODEX_ACCOUNT_ID=account1
           github: false,
           telegram: false,
           slack: false,
-          discord: false,
         },
         botDisplayName: 'Archon',
       });
@@ -157,36 +132,13 @@ CODEX_ACCOUNT_ID=account1
       // PORT is intentionally commented out — server and Vite both default to 3090 when unset (#1152).
       expect(content).toContain('# PORT=3090');
       expect(content).not.toMatch(/^PORT=/m);
-      expect(content).not.toContain('DATABASE_URL=');
-    });
-
-    it('should generate valid .env content for PostgreSQL configuration', () => {
-      const content = generateEnvContent({
-        database: { type: 'postgresql', url: 'postgresql://localhost:5432/archon' },
-        ai: {
-          claude: true,
-          claudeAuthType: 'apiKey',
-          claudeApiKey: 'sk-test-key',
-          codex: false,
-          defaultAssistant: 'claude',
-        },
-        platforms: {
-          github: false,
-          telegram: false,
-          slack: false,
-          discord: false,
-        },
-        botDisplayName: 'Archon',
-      });
-
-      expect(content).toContain('DATABASE_URL=postgresql://localhost:5432/archon');
-      expect(content).toContain('CLAUDE_USE_GLOBAL_AUTH=false');
-      expect(content).toContain('CLAUDE_API_KEY=sk-test-key');
+      // Sanity: never emit an active DATABASE_URL line. The "# Set DATABASE_URL=..."
+      // hint is a comment and is fine — only an unprefixed assignment would be wrong.
+      expect(content).not.toMatch(/^DATABASE_URL=/m);
     });
 
     it('emits CLAUDE_BIN_PATH when claudeBinaryPath is configured', () => {
       const content = generateEnvContent({
-        database: { type: 'sqlite' },
         ai: {
           claude: true,
           claudeAuthType: 'global',
@@ -194,7 +146,7 @@ CODEX_ACCOUNT_ID=account1
           codex: false,
           defaultAssistant: 'claude',
         },
-        platforms: { github: false, telegram: false, slack: false, discord: false },
+        platforms: { github: false, telegram: false, slack: false },
         botDisplayName: 'Archon',
       });
 
@@ -205,14 +157,13 @@ CODEX_ACCOUNT_ID=account1
 
     it('omits CLAUDE_BIN_PATH when not configured', () => {
       const content = generateEnvContent({
-        database: { type: 'sqlite' },
         ai: {
           claude: true,
           claudeAuthType: 'global',
           codex: false,
           defaultAssistant: 'claude',
         },
-        platforms: { github: false, telegram: false, slack: false, discord: false },
+        platforms: { github: false, telegram: false, slack: false },
         botDisplayName: 'Archon',
       });
 
@@ -221,7 +172,6 @@ CODEX_ACCOUNT_ID=account1
 
     it('should include platform configurations', () => {
       const content = generateEnvContent({
-        database: { type: 'sqlite' },
         ai: {
           claude: true,
           claudeAuthType: 'global',
@@ -232,7 +182,6 @@ CODEX_ACCOUNT_ID=account1
           github: true,
           telegram: true,
           slack: false,
-          discord: false,
         },
         github: {
           token: 'ghp_testtoken',
@@ -259,7 +208,6 @@ CODEX_ACCOUNT_ID=account1
 
     it('should include Codex tokens when configured', () => {
       const content = generateEnvContent({
-        database: { type: 'sqlite' },
         ai: {
           claude: false,
           codex: true,
@@ -275,7 +223,6 @@ CODEX_ACCOUNT_ID=account1
           github: false,
           telegram: false,
           slack: false,
-          discord: false,
         },
         botDisplayName: 'Archon',
       });
@@ -289,7 +236,6 @@ CODEX_ACCOUNT_ID=account1
 
     it('should include custom bot display name', () => {
       const content = generateEnvContent({
-        database: { type: 'sqlite' },
         ai: {
           claude: true,
           claudeAuthType: 'global',
@@ -300,7 +246,6 @@ CODEX_ACCOUNT_ID=account1
           github: false,
           telegram: false,
           slack: false,
-          discord: false,
         },
         botDisplayName: 'MyCustomBot',
       });
@@ -310,7 +255,6 @@ CODEX_ACCOUNT_ID=account1
 
     it('should not include bot display name when default', () => {
       const content = generateEnvContent({
-        database: { type: 'sqlite' },
         ai: {
           claude: true,
           claudeAuthType: 'global',
@@ -321,7 +265,6 @@ CODEX_ACCOUNT_ID=account1
           github: false,
           telegram: false,
           slack: false,
-          discord: false,
         },
         botDisplayName: 'Archon',
       });
@@ -331,7 +274,6 @@ CODEX_ACCOUNT_ID=account1
 
     it('should include Slack configuration', () => {
       const content = generateEnvContent({
-        database: { type: 'sqlite' },
         ai: {
           claude: true,
           claudeAuthType: 'global',
@@ -342,7 +284,6 @@ CODEX_ACCOUNT_ID=account1
           github: false,
           telegram: false,
           slack: true,
-          discord: false,
         },
         slack: {
           botToken: 'xoxb-test',
@@ -357,33 +298,6 @@ CODEX_ACCOUNT_ID=account1
       expect(content).toContain('SLACK_ALLOWED_USER_IDS=U123');
       expect(content).toContain('SLACK_STREAMING_MODE=batch');
     });
-
-    it('should include Discord configuration', () => {
-      const content = generateEnvContent({
-        database: { type: 'sqlite' },
-        ai: {
-          claude: true,
-          claudeAuthType: 'global',
-          codex: false,
-          defaultAssistant: 'claude',
-        },
-        platforms: {
-          github: false,
-          telegram: false,
-          slack: false,
-          discord: true,
-        },
-        discord: {
-          botToken: 'discord-bot-token-test',
-          allowedUserIds: '123456789',
-        },
-        botDisplayName: 'Archon',
-      });
-
-      expect(content).toContain('DISCORD_BOT_TOKEN=discord-bot-token-test');
-      expect(content).toContain('DISCORD_ALLOWED_USER_IDS=123456789');
-      expect(content).toContain('DISCORD_STREAMING_MODE=batch');
-    });
   });
 
   describe('spawnTerminalWithSetup', () => {
@@ -460,6 +374,65 @@ CODEX_ACCOUNT_ID=account1
       expect(existsSync(join(target, '.claude', 'skills', 'archon', 'SKILL.md'))).toBe(true);
     });
   });
+
+  describe('bootstrapProjectConfig', () => {
+    it('creates .archon/config.yaml when it does not exist', () => {
+      const target = join(TEST_DIR, 'bootstrap-target');
+      mkdirSync(target, { recursive: true });
+
+      const result = bootstrapProjectConfig(target);
+
+      expect(result.state).toBe('created');
+      expect(result.path).toBe(join(target, '.archon', 'config.yaml'));
+      expect(existsSync(result.path)).toBe(true);
+      const content = readFileSync(result.path, 'utf-8');
+      // Must be valid YAML — comment lines only — so loaders treat it as empty.
+      expect(content.split('\n').every(line => line === '' || line.startsWith('#'))).toBe(true);
+      expect(content).toContain('Project-scoped Archon config');
+      expect(content).toContain('archon.diy/reference/configuration');
+    });
+
+    it('creates the .archon directory if missing (idempotent on parent)', () => {
+      const target = join(TEST_DIR, 'bootstrap-no-archon-dir');
+      mkdirSync(target, { recursive: true });
+      // Do NOT pre-create .archon — bootstrap must create it
+
+      const result = bootstrapProjectConfig(target);
+
+      expect(result.state).toBe('created');
+      expect(existsSync(join(target, '.archon'))).toBe(true);
+    });
+
+    it('is idempotent — leaves an existing config untouched', () => {
+      const target = join(TEST_DIR, 'bootstrap-existing');
+      const archonDir = join(target, '.archon');
+      mkdirSync(archonDir, { recursive: true });
+      const userContent = '# my custom config\nassistants:\n  claude:\n    model: opus\n';
+      writeFileSync(join(archonDir, 'config.yaml'), userContent);
+
+      const result = bootstrapProjectConfig(target);
+
+      expect(result.state).toBe('existed');
+      const after = readFileSync(join(archonDir, 'config.yaml'), 'utf-8');
+      expect(after).toBe(userContent);
+    });
+
+    it('returns failed state without throwing when the target path is unwritable', () => {
+      // Pointing at a path inside a non-existent parent that mkdirSync can
+      // create succeeds. Use a deeply-nested path inside a regular file
+      // (which fs cannot mkdir into) to force a real failure.
+      const blocker = join(TEST_DIR, 'blocker-file');
+      writeFileSync(blocker, 'not a directory');
+      // mkdir under a file path fails with ENOTDIR — that's the failure mode
+      // we want to model (read-only FS, permission denied, etc.).
+      const result = bootstrapProjectConfig(blocker);
+
+      expect(result.state).toBe('failed');
+      if (result.state === 'failed') {
+        expect(result.error.length).toBeGreaterThan(0);
+      }
+    });
+  });
 });
 
 describe('detectClaudeExecutablePath probe order', () => {
diff --git a/packages/cli/src/commands/setup.ts b/packages/cli/src/commands/setup.ts
index 42ca63e3a4..eca05654fa 100644
--- a/packages/cli/src/commands/setup.ts
+++ b/packages/cli/src/commands/setup.ts
@@ -2,9 +2,11 @@
  * Setup command - Interactive CLI wizard for Archon credential configuration
  *
  * Guides users through configuring:
- * - Database (SQLite default vs PostgreSQL)
  * - AI assistants (Claude and/or Codex)
- * - Platform connections (GitHub, Telegram, Slack, Discord)
+ * - Platform connections (GitHub, Telegram, Slack — all skippable)
+ *
+ * SQLite is the implicit default; no database prompt. PostgreSQL users set
+ * DATABASE_URL by hand (documented separately).
  *
  * Writes configuration to one archon-owned env file, chosen by --scope:
  *   - 'home'    (default)  → ~/.archon/.env
@@ -38,7 +40,8 @@ import { join, dirname } from 'path';
 import { copyArchonSkill } from './skill';
 import { homedir } from 'os';
 import { randomBytes } from 'crypto';
-import { spawn, execSync, type ChildProcess } from 'child_process';
+import { spawn, execSync, spawnSync, type ChildProcess } from 'child_process';
+import { execFileAsync } from '@archon/git';
 import { getRegisteredProviders } from '@archon/providers';
 import {
   getArchonEnvPath as pathsGetArchonEnvPath,
@@ -50,10 +53,6 @@ import {
 // =============================================================================
 
 interface SetupConfig {
-  database: {
-    type: 'sqlite' | 'postgresql';
-    url?: string;
-  };
   ai: {
     claude: boolean;
     claudeAuthType?: 'global' | 'apiKey' | 'oauthToken';
@@ -70,12 +69,10 @@ interface SetupConfig {
     github: boolean;
     telegram: boolean;
     slack: boolean;
-    discord: boolean;
   };
   github?: GitHubConfig;
   telegram?: TelegramConfig;
   slack?: SlackConfig;
-  discord?: DiscordConfig;
   botDisplayName: string;
 }
 
@@ -97,11 +94,6 @@ interface SlackConfig {
   allowedUserIds: string;
 }
 
-interface DiscordConfig {
-  botToken: string;
-  allowedUserIds: string;
-}
-
 interface CodexTokens {
   idToken: string;
   accessToken: string;
@@ -110,14 +102,12 @@ interface CodexTokens {
 }
 
 interface ExistingConfig {
-  hasDatabase: boolean;
   hasClaude: boolean;
   hasCodex: boolean;
   platforms: {
     github: boolean;
     telegram: boolean;
     slack: boolean;
-    discord: boolean;
   };
 }
 
@@ -343,7 +333,6 @@ export function checkExistingConfig(envPath?: string): ExistingConfig | null {
   const content = readFileSync(path, 'utf-8');
 
   return {
-    hasDatabase: hasEnvValue(content, 'DATABASE_URL'),
     hasClaude:
       hasEnvValue(content, 'CLAUDE_API_KEY') ||
       hasEnvValue(content, 'CLAUDE_CODE_OAUTH_TOKEN') ||
@@ -357,7 +346,6 @@ export function checkExistingConfig(envPath?: string): ExistingConfig | null {
       github: hasEnvValue(content, 'GITHUB_TOKEN') || hasEnvValue(content, 'GH_TOKEN'),
       telegram: hasEnvValue(content, 'TELEGRAM_BOT_TOKEN'),
       slack: hasEnvValue(content, 'SLACK_BOT_TOKEN') && hasEnvValue(content, 'SLACK_APP_TOKEN'),
-      discord: hasEnvValue(content, 'DISCORD_BOT_TOKEN'),
     },
   };
 }
@@ -366,53 +354,6 @@ export function checkExistingConfig(envPath?: string): ExistingConfig | null {
 // Data Collection Functions
 // =============================================================================
 
-/**
- * Collect database configuration
- */
-async function collectDatabaseConfig(): Promise<SetupConfig['database']> {
-  const dbType = await select({
-    message: 'Which database do you want to use?',
-    options: [
-      {
-        value: 'sqlite',
-        label: 'SQLite (default - no setup needed)',
-        hint: 'Recommended for single user',
-      },
-      { value: 'postgresql', label: 'PostgreSQL', hint: 'For server deployments' },
-    ],
-  });
-
-  if (isCancel(dbType)) {
-    cancel('Setup cancelled.');
-    process.exit(0);
-  }
-
-  if (dbType === 'postgresql') {
-    const url = await text({
-      message: 'Enter your PostgreSQL connection string:',
-      placeholder: 'postgresql://user:pass@localhost:5432/archon',
-      validate: value => {
-        if (!value) {
-          return 'Connection string is required';
-        }
-        if (!value.startsWith('postgresql://') && !value.startsWith('postgres://')) {
-          return 'Must be a valid PostgreSQL URL (postgresql:// or postgres://)';
-        }
-        return undefined;
-      },
-    });
-
-    if (isCancel(url)) {
-      cancel('Setup cancelled.');
-      process.exit(0);
-    }
-
-    return { type: 'postgresql', url };
-  }
-
-  return { type: 'sqlite' };
-}
-
 /**
  * Try to read Codex tokens from ~/.codex/auth.json
  */
@@ -455,8 +396,22 @@ function tryReadCodexAuth(): CodexTokens | null {
 }
 
 /**
- * Collect Claude authentication method
+ * Try to spawn the Claude binary with `--version` to confirm it actually runs.
+ * Returns `{ ok: true }` on success or `{ ok: false, reason }` with the spawn
+ * error message so the caller can show it to the user. Bounded to 5s so a hung
+ * process can't stall setup.
  */
+async function probeClaudeBinarySpawns(
+  path: string
+): Promise<{ ok: true } | { ok: false; reason: string }> {
+  try {
+    await execFileAsync(path, ['--version'], { timeout: 5000 });
+    return { ok: true };
+  } catch (err) {
+    return { ok: false, reason: (err as Error).message };
+  }
+}
+
 /**
  * Resolve the Claude Code executable path for CLAUDE_BIN_PATH.
  * Auto-detects common install locations and falls back to prompting the user.
@@ -467,8 +422,10 @@ async function collectClaudeBinaryPath(): Promise<string | undefined> {
   const detected = detectClaudeExecutablePath();
 
   if (detected) {
+    const probe = await probeClaudeBinarySpawns(detected);
+    const suffix = probe.ok ? '(spawns OK)' : `(could not spawn: ${probe.reason})`;
     const useDetected = await confirm({
-      message: `Found Claude Code at ${detected}. Write this to CLAUDE_BIN_PATH?`,
+      message: `Found Claude Code at ${detected} ${suffix}. Write this to CLAUDE_BIN_PATH?`,
       initialValue: true,
     });
     if (isCancel(useDetected)) {
@@ -509,10 +466,21 @@ async function collectClaudeBinaryPath(): Promise<string | undefined> {
     log.warning(
       `Path does not exist: ${trimmed}. Saving anyway — the compiled binary will error on first use until this is correct.`
     );
+    return trimmed;
+  }
+
+  const probe = await probeClaudeBinarySpawns(trimmed);
+  if (!probe.ok) {
+    log.warning(
+      `Could not spawn ${trimmed} --version: ${probe.reason}. Saving anyway — verify the binary works (try running it directly).`
+    );
   }
   return trimmed;
 }
 
+/**
+ * Collect Claude authentication method (API key, OAuth token, or global auth).
+ */
 async function collectClaudeAuth(): Promise<{
   authType: 'global' | 'apiKey' | 'oauthToken';
   apiKey?: string;
@@ -884,12 +852,12 @@ After upgrading, run 'archon setup' again.`,
  */
 async function collectPlatforms(): Promise<SetupConfig['platforms']> {
   const platforms = await multiselect({
-    message: 'Which platforms do you want to connect? (↑↓ navigate, space select, enter confirm)',
+    message:
+      'Which chat adapters do you want to connect? (all optional — Archon works as CLI + skill without any)\n(↑↓ navigate, space select, enter confirm)',
     options: [
       { value: 'github', label: 'GitHub', hint: 'Respond to issues/PRs via webhooks' },
       { value: 'telegram', label: 'Telegram', hint: 'Chat bot via BotFather' },
       { value: 'slack', label: 'Slack', hint: 'Workspace app with Socket Mode' },
-      { value: 'discord', label: 'Discord', hint: 'Server bot' },
     ],
     required: false,
   });
@@ -903,7 +871,6 @@ async function collectPlatforms(): Promise<SetupConfig['platforms']> {
     github: platforms.includes('github'),
     telegram: platforms.includes('telegram'),
     slack: platforms.includes('slack'),
-    discord: platforms.includes('discord'),
   };
 }
 
@@ -939,6 +906,58 @@ async function collectGitHubConfig(): Promise<GitHubConfig> {
     process.exit(0);
   }
 
+  // Probe `gh` CLI auth — workflows that shell out to `gh` (e.g. `gh issue
+  // create`, `gh pr edit`) need this even if the PAT is set, because they call
+  // the local `gh` binary, not the API directly.
+  const ghSpin = spinner();
+  ghSpin.start('Checking gh CLI authentication...');
+  let ghAuthOk = false;
+  let ghAuthError: string | undefined;
+  try {
+    await execFileAsync('gh', ['auth', 'status'], { timeout: 10_000 });
+    ghAuthOk = true;
+    ghSpin.stop('gh CLI is authenticated');
+  } catch (err) {
+    const e = err as NodeJS.ErrnoException;
+    ghAuthError =
+      e.code === 'ENOENT'
+        ? 'gh not found in PATH — install it first (https://cli.github.com)'
+        : (e.message ?? 'unknown error');
+    ghSpin.stop('gh CLI check failed');
+  }
+
+  if (!ghAuthOk) {
+    log.warning(
+      `gh auth check failed: ${ghAuthError}\n` +
+        (ghAuthError?.includes('not found') ? '' : 'Run: gh auth login')
+    );
+    // gh auth login is an interactive OAuth flow — only offer it from a TTY.
+    if (process.stdout.isTTY) {
+      const runGhLogin = await confirm({
+        message: 'Run `gh auth login` now?',
+        initialValue: true,
+      });
+      if (!isCancel(runGhLogin) && runGhLogin) {
+        // spawnSync with inherited stdio so the OAuth prompt reaches the terminal.
+        const ghLoginResult = spawnSync('gh', ['auth', 'login'], { stdio: 'inherit' });
+        if (ghLoginResult.error) {
+          log.warning(
+            `Could not run gh auth login: ${ghLoginResult.error.message}. ` +
+              'Install the gh CLI from https://cli.github.com/ and run it manually.'
+          );
+        } else if (ghLoginResult.status !== 0) {
+          // gh exited non-zero (user cancelled, OAuth callback failed, etc.).
+          // .error is only set on spawn failure, so without this the wizard
+          // would proceed as if auth succeeded.
+          log.warning(
+            `gh auth login exited with code ${ghLoginResult.status ?? 'null'}. ` +
+              'Authentication may not have completed — re-run `gh auth login` manually if needed.'
+          );
+        }
+      }
+    }
+  }
+
   const allowedUsers = await text({
     message: 'Enter allowed GitHub usernames (comma-separated, or leave empty for all):',
     placeholder: 'username1,username2',
@@ -994,6 +1013,15 @@ async function collectGitHubConfig(): Promise<GitHubConfig> {
  * Collect Telegram credentials
  */
 async function collectTelegramConfig(): Promise<TelegramConfig> {
+  note(
+    'SECURITY: Telegram bots are public by default — anyone can DM your bot.\n' +
+      'Set TELEGRAM_ALLOWED_USER_IDS to restrict access to your user ID only.\n\n' +
+      'To find your user ID:\n' +
+      '1. Open Telegram and search for @userinfobot\n' +
+      '2. Send any message — it replies with your user ID (a number)',
+    'Telegram Security'
+  );
+
   note(
     'Telegram Bot Setup\n\n' +
       'Step 1: Create your bot\n' +
@@ -1001,11 +1029,7 @@ async function collectTelegramConfig(): Promise<TelegramConfig> {
       '2. Send /newbot\n' +
       '3. Choose a display name (e.g., "My Archon Bot")\n' +
       '4. Choose a username (must end in "bot")\n' +
-      '5. Copy the token BotFather gives you\n\n' +
-      'Step 2: Get your user ID\n' +
-      '1. Search for @userinfobot on Telegram\n' +
-      '2. Send any message\n' +
-      '3. It will reply with your user ID (a number)',
+      '5. Copy the token BotFather gives you',
     'Telegram Setup'
   );
 
@@ -1024,8 +1048,11 @@ async function collectTelegramConfig(): Promise<TelegramConfig> {
     process.exit(0);
   }
 
+  // Do NOT set required: true — clack's text() blocks the enter key when
+  // required is true and the value is empty, which traps the user. Validate
+  // post-hoc with a warning instead.
   const allowedUserIds = await text({
-    message: 'Enter allowed Telegram user IDs (comma-separated, or leave empty for all):',
+    message: 'Enter allowed Telegram user IDs (comma-separated):',
     placeholder: '123456789,987654321',
   });
 
@@ -1034,6 +1061,13 @@ async function collectTelegramConfig(): Promise<TelegramConfig> {
     process.exit(0);
   }
 
+  if (!allowedUserIds?.trim()) {
+    log.warning(
+      'No allowlist set — your Telegram bot will accept messages from ANYONE.\n' +
+        'Add TELEGRAM_ALLOWED_USER_IDS to ~/.archon/.env after setup to restrict access.'
+    );
+  }
+
   return {
     botToken,
     allowedUserIds: allowedUserIds || '',
@@ -1110,58 +1144,6 @@ async function collectSlackConfig(): Promise<SlackConfig> {
   };
 }
 
-/**
- * Collect Discord credentials
- */
-async function collectDiscordConfig(): Promise<DiscordConfig> {
-  note(
-    'Discord Bot Setup\n\n' +
-      '1. Go to discord.com/developers/applications\n' +
-      '2. Click "New Application" and name it\n' +
-      '3. Go to "Bot" in sidebar:\n' +
-      '   - Click "Reset Token" and copy it\n' +
-      '   - Enable "MESSAGE CONTENT INTENT"\n' +
-      '4. Go to "OAuth2" -> "URL Generator":\n' +
-      '   - Select scope: bot\n' +
-      '   - Select permissions: Send Messages, Read Message History\n' +
-      '   - Open generated URL to add bot to your server\n\n' +
-      'Get your user ID:\n' +
-      '- Discord Settings -> Advanced -> Enable Developer Mode\n' +
-      '- Right-click yourself -> Copy User ID',
-    'Discord Setup'
-  );
-
-  const botToken = await password({
-    message: 'Enter your Discord Bot Token:',
-    validate: value => {
-      if (!value || value.length < 50) {
-        return 'Please enter a valid Discord bot token';
-      }
-      return undefined;
-    },
-  });
-
-  if (isCancel(botToken)) {
-    cancel('Setup cancelled.');
-    process.exit(0);
-  }
-
-  const allowedUserIds = await text({
-    message: 'Enter allowed Discord user IDs (comma-separated, or leave empty for all):',
-    placeholder: '123456789012345678,987654321098765432',
-  });
-
-  if (isCancel(allowedUserIds)) {
-    cancel('Setup cancelled.');
-    process.exit(0);
-  }
-
-  return {
-    botToken,
-    allowedUserIds: allowedUserIds || '',
-  };
-}
-
 /**
  * Collect bot display name
  */
@@ -1213,11 +1195,8 @@ export function generateEnvContent(config: SetupConfig): string {
 
   // Database
   lines.push('# Database');
-  if (config.database.type === 'postgresql' && config.database.url) {
-    lines.push(`DATABASE_URL=${config.database.url}`);
-  } else {
-    lines.push('# Using SQLite (default) - no DATABASE_URL needed');
-  }
+  lines.push('# Using SQLite (default) - no DATABASE_URL needed');
+  lines.push('# Set DATABASE_URL=postgresql://... to use PostgreSQL instead.');
   lines.push('');
 
   // AI Assistants
@@ -1293,17 +1272,6 @@ export function generateEnvContent(config: SetupConfig): string {
     lines.push('');
   }
 
-  // Discord
-  if (config.platforms.discord && config.discord) {
-    lines.push('# Discord');
-    lines.push(`DISCORD_BOT_TOKEN=${config.discord.botToken}`);
-    if (config.discord.allowedUserIds) {
-      lines.push(`DISCORD_ALLOWED_USER_IDS=${config.discord.allowedUserIds}`);
-    }
-    lines.push('DISCORD_STREAMING_MODE=batch');
-    lines.push('');
-  }
-
   // Bot Display Name
   if (config.botDisplayName !== 'Archon') {
     lines.push('# Bot Display Name');
@@ -1338,6 +1306,63 @@ export function resolveScopedEnvPath(scope: 'home' | 'project', repoPath: string
   return pathsGetArchonEnvPath();
 }
 
+/**
+ * Result of attempting to bootstrap project-scoped Archon config.
+ *  - `created`: `.archon/config.yaml` did not exist; we wrote a starter.
+ *  - `existed`: file already present; left untouched (idempotent re-run).
+ *  - `failed`: mkdir or write failed (permissions, read-only FS, etc.).
+ *    Setup continues — the user can hand-create the file later.
+ */
+export type BootstrapProjectConfigResult =
+  | { state: 'created'; path: string }
+  | { state: 'existed'; path: string }
+  | { state: 'failed'; path: string; error: string };
+
+/**
+ * Create `<projectPath>/.archon/config.yaml` with a commented-out template if
+ * absent. Pairs with the skill install — gives the user a place to put
+ * per-project overrides without manual mkdir. Workflows/commands/scripts
+ * subdirs are intentionally not created; empty directories would clutter
+ * users' trees and Archon's loaders handle their absence cleanly.
+ */
+export function bootstrapProjectConfig(projectPath: string): BootstrapProjectConfigResult {
+  const archonDir = join(projectPath, '.archon');
+  const configPath = join(archonDir, 'config.yaml');
+  try {
+    mkdirSync(archonDir, { recursive: true });
+    // `wx` flag = exclusive create. Atomic against a concurrent create between
+    // a check and a write, so an in-flight user edit is never overwritten.
+    writeFileSync(
+      configPath,
+      [
+        '# Project-scoped Archon config',
+        '# Inherits defaults from ~/.archon/config.yaml.',
+        '# Reference: https://archon.diy/reference/configuration/',
+        '#',
+        '# Examples:',
+        '#   assistants:',
+        '#     claude:',
+        '#       model: sonnet',
+        '#   docs:',
+        '#     path: docs',
+        '',
+      ].join('\n'),
+      { mode: 0o644, flag: 'wx' }
+    );
+    return { state: 'created', path: configPath };
+  } catch (err) {
+    const e = err as NodeJS.ErrnoException;
+    if (e.code === 'EEXIST') {
+      return { state: 'existed', path: configPath };
+    }
+    return {
+      state: 'failed',
+      path: configPath,
+      error: e.message,
+    };
+  }
+}
+
 /**
  * Serialize a key/value map back to `KEY=value` lines. Values with whitespace,
  * `#`, `"`, `'`, `\n`, or `\r` are double-quoted with `\\`, `"`, `\n`, `\r`
@@ -1648,10 +1673,8 @@ export async function setupCommand(options: SetupOptions): Promise<void> {
     if (existing.platforms.github) configuredPlatforms.push('GitHub');
     if (existing.platforms.telegram) configuredPlatforms.push('Telegram');
     if (existing.platforms.slack) configuredPlatforms.push('Slack');
-    if (existing.platforms.discord) configuredPlatforms.push('Discord');
 
     const summary = [
-      `Database: ${existing.hasDatabase ? 'PostgreSQL' : 'SQLite'}`,
       `Claude: ${existing.hasClaude ? 'Configured' : 'Not configured'}`,
       `Codex: ${existing.hasCodex ? 'Configured' : 'Not configured'}`,
       `Platforms: ${configuredPlatforms.length > 0 ? configuredPlatforms.join(', ') : 'None'}`,
@@ -1687,7 +1710,6 @@ export async function setupCommand(options: SetupOptions): Promise<void> {
 
     // Read existing config values - for simplicity, start with defaults and merge
     config = {
-      database: { type: 'sqlite' },
       ai: {
         claude: existing?.hasClaude ?? false,
         codex: existing?.hasCodex ?? false,
@@ -1697,7 +1719,6 @@ export async function setupCommand(options: SetupOptions): Promise<void> {
         github: existing?.platforms.github ?? false,
         telegram: existing?.platforms.telegram ?? false,
         slack: existing?.platforms.slack ?? false,
-        discord: existing?.platforms.discord ?? false,
       },
       botDisplayName: 'Archon',
     };
@@ -1713,7 +1734,6 @@ export async function setupCommand(options: SetupOptions): Promise<void> {
       github: config.platforms.github || newPlatforms.github,
       telegram: config.platforms.telegram || newPlatforms.telegram,
       slack: config.platforms.slack || newPlatforms.slack,
-      discord: config.platforms.discord || newPlatforms.discord,
     };
 
     // Collect credentials for new platforms only
@@ -1726,17 +1746,11 @@ export async function setupCommand(options: SetupOptions): Promise<void> {
     if (newPlatforms.slack && !existing?.platforms.slack) {
       config.slack = await collectSlackConfig();
     }
-    if (newPlatforms.discord && !existing?.platforms.discord) {
-      config.discord = await collectDiscordConfig();
-    }
   } else {
-    // Fresh or update mode - collect everything
-    const database = await collectDatabaseConfig();
     const ai = await collectAIConfig();
     const platforms = await collectPlatforms();
 
     config = {
-      database,
       ai,
       platforms,
       botDisplayName: 'Archon',
@@ -1752,9 +1766,6 @@ export async function setupCommand(options: SetupOptions): Promise<void> {
     if (platforms.slack) {
       config.slack = await collectSlackConfig();
     }
-    if (platforms.discord) {
-      config.discord = await collectDiscordConfig();
-    }
 
     // Collect bot display name
     config.botDisplayName = await collectBotDisplayName();
@@ -1808,6 +1819,7 @@ export async function setupCommand(options: SetupOptions): Promise<void> {
   }
 
   let skillInstalledPath: string | null = null;
+  let projectConfigCreatedPath: string | null = null;
 
   if (shouldCopySkill) {
     const skillTargetRaw = await text({
@@ -1832,6 +1844,16 @@ export async function setupCommand(options: SetupOptions): Promise<void> {
     }
     s.stop('Archon skill installed');
     skillInstalledPath = join(skillTarget, '.claude', 'skills', 'archon');
+
+    const bootstrapResult = bootstrapProjectConfig(skillTarget);
+    if (bootstrapResult.state === 'created') {
+      log.info(`Created project config: ${bootstrapResult.path}`);
+      projectConfigCreatedPath = bootstrapResult.path;
+    } else if (bootstrapResult.state === 'failed') {
+      // Non-fatal — log so silent permission errors don't masquerade as a
+      // successful setup. The user can hand-create the file later.
+      log.warn(`Could not create ${bootstrapResult.path}: ${bootstrapResult.error}`);
+    }
   }
 
   // Optional: configure docs directory
@@ -1873,7 +1895,6 @@ export async function setupCommand(options: SetupOptions): Promise<void> {
   if (config.platforms.github) configuredPlatforms.push('GitHub');
   if (config.platforms.telegram) configuredPlatforms.push('Telegram');
   if (config.platforms.slack) configuredPlatforms.push('Slack');
-  if (config.platforms.discord) configuredPlatforms.push('Discord');
 
   const aiConfigured: string[] = [];
   if (config.ai.claude) {
@@ -1890,10 +1911,9 @@ export async function setupCommand(options: SetupOptions): Promise<void> {
   }
 
   const summaryLines = [
-    `Database: ${config.database.type === 'postgresql' ? 'PostgreSQL' : 'SQLite (default)'}`,
     `AI: ${aiConfigured.length > 0 ? aiConfigured.join(', ') : 'None configured'}`,
     `Default: ${config.ai.defaultAssistant}`,
-    `Platforms: ${configuredPlatforms.length > 0 ? configuredPlatforms.join(', ') : 'None'}`,
+    `Platforms: ${configuredPlatforms.length > 0 ? configuredPlatforms.join(', ') : 'None (CLI + skill only)'}`,
     '',
     `File written (${scope} scope):`,
     `  ${writeResult.targetPath}`,
@@ -1910,6 +1930,11 @@ export async function setupCommand(options: SetupOptions): Promise<void> {
     summaryLines.push('');
     summaryLines.push('Archon skill installed:');
     summaryLines.push(`  ${skillInstalledPath}`);
+    if (projectConfigCreatedPath) {
+      summaryLines.push('');
+      summaryLines.push('Project config created:');
+      summaryLines.push(`  ${projectConfigCreatedPath}`);
+    }
   }
 
   note(summaryLines.join('\n'), 'Configuration Complete');
@@ -1924,5 +1949,22 @@ export async function setupCommand(options: SetupOptions): Promise<void> {
     'Additional Options'
   );
 
-  outro('Setup complete! Run `archon version` to verify.');
+  note(
+    'To update Archon:\n' +
+      '  Homebrew:  brew upgrade coleam00/archon/archon\n' +
+      '  curl:      curl -fsSL https://raw.githubusercontent.com/coleam00/Archon/main/scripts/install.sh | bash\n' +
+      '  Docker:    docker pull ghcr.io/coleam00/archon:latest',
+    'Update Instructions'
+  );
+
+  const runDoctor = await confirm({
+    message: 'Run `archon doctor` now to verify your setup?',
+    initialValue: true,
+  });
+  if (!isCancel(runDoctor) && runDoctor) {
+    const { doctorCommand } = await import('./doctor');
+    await doctorCommand();
+  }
+
+  outro('Setup complete!');
 }
diff --git a/packages/docs-web/src/content/docs/book/dag-workflows.md b/packages/docs-web/src/content/docs/book/dag-workflows.md
index 2a66702584..558df2590f 100644
--- a/packages/docs-web/src/content/docs/book/dag-workflows.md
+++ b/packages/docs-web/src/content/docs/book/dag-workflows.md
@@ -230,20 +230,23 @@ The classify-and-route example uses `none_failed_min_one_success` on `implement`
 
 ## Node Types
 
-Archon supports four node types:
+Archon supports seven node types. Exactly one mode field is required per node:
 
 | Type | Syntax | When to use |
 |------|--------|-------------|
 | **Command** | `command: my-command` | Load a command from `.archon/commands/my-command.md`. The standard choice. |
 | **Prompt** | `prompt: "inline instructions..."` | Quick, one-off instructions that don't need a reusable command file. |
 | **Bash** | `bash: "shell command"` | Run a shell script without AI. Stdout is captured as `$nodeId.output`. Deterministic operations only. |
+| **Script** | `script: "..." ` + `runtime: bun \| uv` | Run TypeScript/JavaScript (bun) or Python (uv) without AI. Inline code or named reference to `.archon/scripts/`. Stdout captured as `$nodeId.output`. See [Script Nodes](/guides/script-nodes/). |
 | **Loop** | `loop: { prompt: "...", until: SIGNAL }` | Repeat an AI prompt until a completion signal appears in the output. See [Loop Nodes](/guides/loop-nodes/). |
+| **Approval** | `approval: { message: "..." }` | Pause the workflow for a human approve/reject decision. See [Approval Nodes](/guides/approval-nodes/). |
+| **Cancel** | `cancel: "reason string"` | Terminate the workflow run (status: cancelled, not failed). Usually gated with `when:`. |
 
 **Command** is the most common. Use it for anything you'll reuse across workflows.
 
 **Prompt** is convenient for glue nodes — summarizing outputs, formatting data — where the logic is simple and workflow-specific.
 
-**Bash** is powerful for deterministic operations: running tests, checking git status, reading a file, fetching an API. The AI doesn't run the bash command; your shell does. The output becomes a variable for downstream nodes:
+**Bash** is powerful for deterministic shell operations: running tests, checking git status, reading a file, fetching an API. The AI doesn't run the bash command; your shell does. The output becomes a variable for downstream nodes:
 
 ```yaml
 - id: check-tests
@@ -255,6 +258,22 @@ Archon supports four node types:
   prompt: "Test output: $check-tests.output\n\nFix any failures."
 ```
 
+**Script** is for deterministic work that needs a real programming language — parsing JSON, transforming data between AI nodes, calling typed HTTP clients. Use `runtime: bun` for TypeScript/JavaScript and `runtime: uv` for Python:
+
+```yaml
+- id: transform
+  script: |
+    const raw = process.env.UPSTREAM ?? '{}';
+    const items = JSON.parse(raw).items ?? [];
+    console.log(JSON.stringify({ count: items.length }));
+  runtime: bun
+
+- id: analyze
+  script: analyze-metrics        # Named script: .archon/scripts/analyze-metrics.py
+  runtime: uv
+  deps: ["pandas>=2.0"]          # uv-only; bun auto-installs imports
+```
+
 **Loop** is for iterative tasks where you don't know how many steps it will take. The AI runs until it emits a completion signal:
 
 ```yaml
@@ -269,6 +288,32 @@ Archon supports four node types:
     fresh_context: true
 ```
 
+**Approval** pauses the workflow for human review. The downstream nodes don't run until the user approves in chat, CLI, or web UI:
+
+```yaml
+interactive: true                 # required at workflow level for web UI delivery
+
+nodes:
+  - id: plan
+    command: plan-feature
+  - id: review-gate
+    approval:
+      message: "Review the plan above."
+    depends_on: [plan]
+  - id: implement
+    command: implement
+    depends_on: [review-gate]
+```
+
+**Cancel** terminates the workflow with a reason string. Pair with `when:` for guarded exits — the run shows as `cancelled` rather than `failed`:
+
+```yaml
+- id: gate-branch
+  cancel: "Refusing to run on main — this workflow modifies files."
+  when: "$check-branch.output == 'main'"
+  depends_on: [check-branch]
+```
+
 ---
 
 ## Best Practices
diff --git a/packages/docs-web/src/content/docs/book/quick-reference.md b/packages/docs-web/src/content/docs/book/quick-reference.md
index f6cc613b2f..6275f5487d 100644
--- a/packages/docs-web/src/content/docs/book/quick-reference.md
+++ b/packages/docs-web/src/content/docs/book/quick-reference.md
@@ -124,7 +124,10 @@ All nodes share these base fields:
 | `command` | One of | string | Name of a command file in `.archon/commands/` |
 | `prompt` | One of | string | Inline AI instructions |
 | `bash` | One of | string | Shell script (runs without AI; stdout captured as `$nodeId.output`) |
+| `script` | One of | string | TypeScript/JavaScript (bun) or Python (uv) — inline or named ref to `.archon/scripts/`. Requires `runtime`. See [Script Nodes](/guides/script-nodes/) |
 | `loop` | One of | object | Loop configuration (see Loop Options below) |
+| `approval` | One of | object | Pause for human review; see [Approval Nodes](/guides/approval-nodes/) |
+| `cancel` | One of | string | Reason string; terminates the run with `cancelled` status (not `failed`). Usually gated with `when:` |
 | `depends_on` | No | string[] | Node IDs that must complete before this node runs |
 | `when` | No | string | Condition expression; node is skipped if false |
 | `trigger_rule` | No | string | Join semantics when multiple upstreams exist (see Trigger Rules) |
@@ -135,12 +138,30 @@ All nodes share these base fields:
 | `allowed_tools` | No | string[] | Restrict available tools to this list (Claude only) |
 | `denied_tools` | No | string[] | Remove specific tools from this node's context (Claude only) |
 | `idle_timeout` | No | number | Per-node idle timeout in milliseconds (default: 5 minutes) |
-| `retry` | No | object | Retry configuration for transient failures (see Retry Options) |
+| `retry` | No | object | Retry configuration for transient failures (see Retry Options). **Hard error on loop nodes** |
 | `hooks` | No | object | SDK hook callbacks (Claude only; see Hook Schema) |
 | `mcp` | No | string | Path to MCP server config JSON file (Claude only) |
 | `skills` | No | string[] | Skill names to preload into this node's context (Claude only) |
+| `agents` | No | object | Inline sub-agent definitions keyed by kebab-case ID. Claude only |
 
-> **bash node timeout**: The `timeout` field on bash nodes is in **milliseconds** (default: 120000). This differs from hook `timeout`, which is in seconds.
+**Script-specific fields** (required when `script:` is set):
+
+| Field | Required | Type | Description |
+|-------|----------|------|-------------|
+| `runtime` | Yes | `'bun'` \| `'uv'` | Which runtime executes the script. Must match file extension for named scripts (`.ts`/`.js` → bun, `.py` → uv) |
+| `deps` | No | string[] | Python dependencies for `uv run --with`. Ignored for bun (bun auto-installs) |
+| `timeout` | No | number | Hard kill in ms. Default: 120000 (2 min). Same semantics as `bash` timeout |
+
+**Approval-specific fields** (required when `approval:` is set):
+
+| Field | Required | Type | Description |
+|-------|----------|------|-------------|
+| `approval.message` | Yes | string | The message shown to the user when the workflow pauses |
+| `approval.capture_response` | No | boolean | `true` = user's comment becomes `$<node-id>.output`. Default: `false` |
+| `approval.on_reject.prompt` | No | string | AI rework prompt when the user rejects. `$REJECTION_REASON` substituted |
+| `approval.on_reject.max_attempts` | No | number | Max rework iterations before cancel. Range 1-10, default 3 |
+
+> **bash and script node timeout**: The `timeout` field is in **milliseconds** (default: 120000). This differs from hook `timeout`, which is in seconds.
 
 ### Trigger Rules
 
diff --git a/packages/docs-web/src/content/docs/getting-started/overview.md b/packages/docs-web/src/content/docs/getting-started/overview.md
index e10f9c5f1b..0962382ce7 100644
--- a/packages/docs-web/src/content/docs/getting-started/overview.md
+++ b/packages/docs-web/src/content/docs/getting-started/overview.md
@@ -304,6 +304,7 @@ archon workflow run <name> --cwd /path/to/repo "<message>"
 |---------|-------------|
 | `archon chat <message>` | Send a message to the orchestrator |
 | `archon setup` | Interactive setup wizard for credentials and config |
+| `archon doctor` | Verify your setup (Claude binary, gh auth, DB, adapters) |
 | `archon workflow list` | List available workflows |
 | `archon workflow run <name> [msg]` | Run a workflow |
 | `archon workflow status` | Show running workflows |
diff --git a/packages/docs-web/src/content/docs/reference/cli.md b/packages/docs-web/src/content/docs/reference/cli.md
index 37790374cf..5717e51b5c 100644
--- a/packages/docs-web/src/content/docs/reference/cli.md
+++ b/packages/docs-web/src/content/docs/reference/cli.md
@@ -50,7 +50,7 @@ archon workflow run plan --cwd /path/to/repo --branch feature-auth "Add OAuth su
 archon workflow run assist --cwd /path/to/repo --no-worktree "Quick question"
 ```
 
-**Note:** Workflow and isolation commands require running from within a git repository. Running from subdirectories automatically resolves to the repo root. The `version`, `help`, `chat`, `setup`, and `serve` commands work anywhere.
+**Note:** Workflow and isolation commands require running from within a git repository. Running from subdirectories automatically resolves to the repo root. The `version`, `help`, `chat`, `setup`, `serve`, and `doctor` commands work anywhere.
 
 ## Commands
 
@@ -84,6 +84,18 @@ archon setup --spawn              # open in a new terminal window
 
 **Write safety**: `archon setup` never writes to `<cwd>/.env` — that file belongs to you. The wizard always targets one archon-owned file chosen by `--scope`, merges into existing content (so user-added keys survive), and writes a timestamped backup before every rewrite (e.g. `~/.archon/.env.archon-backup-2026-04-20T09-28-11-000Z`).
 
+### `doctor`
+
+Verify your Archon setup. Runs a checklist of common failure points: Claude binary spawn, gh CLI auth, database reachability, workspace writability, bundled defaults, and adapter token pings (Slack/Telegram, best-effort).
+
+```bash
+archon doctor
+```
+
+Exit code 0 if all checks pass or are skipped; 1 if any critical check fails. Adapter pings degrade to `skip` on network errors — a flaky connection does not flip the result red.
+
+Also runs automatically at the end of `archon setup` (optional).
+
 ### `workflow list`
 
 List workflows available in target directory.
diff --git a/packages/docs-web/src/content/docs/reference/troubleshooting.md b/packages/docs-web/src/content/docs/reference/troubleshooting.md
index 5e9b032293..b1e503156c 100644
--- a/packages/docs-web/src/content/docs/reference/troubleshooting.md
+++ b/packages/docs-web/src/content/docs/reference/troubleshooting.md
@@ -311,7 +311,7 @@ assistants:
     claudeBinaryPath: /absolute/path/to/claude
 ```
 
-`archon setup` auto-detects and writes `CLAUDE_BIN_PATH` for you. Docker users do not need to do anything — the image pre-sets the variable.
+`archon setup` auto-detects and writes `CLAUDE_BIN_PATH` for you. After setup, run `archon doctor` to confirm the binary actually spawns. Docker users do not need to do anything — the image pre-sets the variable.
 
 See the [AI Assistants → Binary path configuration](/getting-started/ai-assistants/#binary-path-configuration-compiled-binaries-only) guide for the full install matrix.
 
diff --git a/scripts/check-bundled-skill.ts b/scripts/check-bundled-skill.ts
new file mode 100644
index 0000000000..90cade23eb
--- /dev/null
+++ b/scripts/check-bundled-skill.ts
@@ -0,0 +1,49 @@
+#!/usr/bin/env bun
+/**
+ * Verifies that packages/cli/src/bundled-skill.ts embeds every file from
+ * .claude/skills/archon/. The bundled-skill.ts file is hand-maintained
+ * (uses Bun's `with { type: 'text' }` import attributes, which the
+ * generator approach in scripts/generate-bundled-defaults.ts cannot
+ * reproduce for the binary build). This script is the safety net.
+ *
+ * Usage:
+ *   bun run scripts/check-bundled-skill.ts          # exit 1 if missing
+ *   bun run scripts/check-bundled-skill.ts --check  # exit 2 if missing (CI)
+ *
+ * Exit codes:
+ *   0  bundled-skill.ts covers every file under .claude/skills/archon/
+ *   1  missing files (default mode)
+ *   2  missing files (--check mode, used by `bun run validate`)
+ */
+import { readdirSync, readFileSync, statSync } from 'fs';
+import { join, relative, resolve } from 'path';
+
+const REPO_ROOT = resolve(import.meta.dir, '..');
+const SKILL_ROOT = join(REPO_ROOT, '.claude', 'skills', 'archon');
+const BUNDLED_SKILL_PATH = join(REPO_ROOT, 'packages', 'cli', 'src', 'bundled-skill.ts');
+
+const CHECK_ONLY = process.argv.includes('--check');
+
+function listSkillFiles(dir: string, base: string = dir): string[] {
+  return readdirSync(dir).flatMap(entry => {
+    const full = join(dir, entry);
+    return statSync(full).isDirectory() ? listSkillFiles(full, base) : [relative(base, full)];
+  });
+}
+
+const skillFiles = listSkillFiles(SKILL_ROOT).sort();
+const bundledSrc = readFileSync(BUNDLED_SKILL_PATH, 'utf-8');
+// NOTE: This is a substring check — a filename that appears in a comment or
+// stale string literal will also pass. It's a safety net against missing imports,
+// not a structural verification of the export map.
+const missing = skillFiles.filter(f => !bundledSrc.includes(f));
+
+if (missing.length > 0) {
+  console.error(
+    `bundled-skill.ts is missing these files:\n${missing.map(f => `  - ${f}`).join('\n')}\n\n` +
+      `Add a corresponding import + BUNDLED_SKILL_FILES entry to\n  ${relative(REPO_ROOT, BUNDLED_SKILL_PATH)}`
+  );
+  process.exit(CHECK_ONLY ? 2 : 1);
+}
+
+console.log(`bundled-skill.ts is up to date (${skillFiles.length} files).`);