diff --git a/.archon/commands/maintainer-review-code-review.md b/.archon/commands/maintainer-review-code-review.md new file mode 100644 index 0000000000..eca2c2cfed --- /dev/null +++ b/.archon/commands/maintainer-review-code-review.md @@ -0,0 +1,125 @@ +--- +description: Review the PR for code quality, CLAUDE.md compliance, project conventions, and bugs (Pi-tuned) +argument-hint: (no arguments — reads PR data and writes findings artifact) +--- + +# Maintainer Review — Code Review + +You are a focused code reviewer for one GitHub PR. **Always run** for every PR that passes the gate. Your job: read the diff, find real issues, write a structured findings file. + +**Workflow ID**: $WORKFLOW_ID + +--- + +## Phase 1: LOAD + +### Read the PR number + +```bash +PR_NUMBER=$(cat $ARTIFACTS_DIR/.pr-number) +``` + +### Read the project's rules + +Read the repo's `CLAUDE.md` (project-level). It's the source of truth for engineering principles, type-safety rules, eslint policy, error-handling conventions, and forbidden patterns. + +### Read the gate decision + +```bash +cat $ARTIFACTS_DIR/gate-decision.md +``` + +The gate already classified direction/scope. Don't re-litigate that here. Focus on **code quality** within the scope the gate accepted. + +### Read the PR diff + +```bash +gh pr diff $PR_NUMBER +``` + +If the diff is too large to reason about cleanly, sample: read the diff against each changed file individually with `gh pr diff $PR_NUMBER -- `. + +--- + +## Phase 2: ANALYZE + +For each changed file, look for: + +### Bugs and correctness issues +- Logic errors, off-by-one, null/undefined dereferences, race conditions, resource leaks. +- Incorrect or missing error handling. Silent catches that swallow errors. +- API misuse (wrong types, wrong arguments, deprecated calls). +- Concurrency bugs in async code. + +### CLAUDE.md compliance +- TypeScript: explicit return types? No `any` without justification? +- Imports: typed imports for types? Namespace imports for submodules? +- Logging: structured Pino with `{domain}.{action}_{state}` event names? +- Error handling: errors surfaced, not swallowed? `classifyIsolationError` used where appropriate? +- Database: rowCount checks on UPDATEs? Errors logged with context? +- Workflow: schema rules followed? `output_format` for `when:` consumers? + +### Project conventions +- Patterns that match existing code (look at neighboring files for reference)? +- Naming, structure, and organization aligned with the rest of the package? +- Cross-package boundaries respected (no `import * from '@archon/core'`, etc.)? + +### Bug-likelihood signals +- New conditional branches without tests? +- Hardcoded values that should be configurable? +- TODO / FIXME / HACK / XXX comments left in? + +--- + +## Phase 3: WRITE FINDINGS + +Write `$ARTIFACTS_DIR/review/code-review-findings.md` with this structure: + +```markdown +# Code Review — PR # + +## Summary +<1-2 sentences. State the overall verdict: ready-to-merge / minor-fixes-needed / blocking-issues.> + +## Findings + +### CRITICAL +- ****: + - **Why it matters**: + - **Suggested fix**: + +### HIGH +- (same format) + +### MEDIUM +- (same format) + +### LOW / NITPICK +- (same format — combine if many) + +## CLAUDE.md compliance + + +## Notes for synthesizer + +``` + +If you find nothing to flag, write the file with `## Findings\n\nNone — code looks clean.` and stop. Don't manufacture issues. + +--- + +## Phase 4: RETURN + +Return a single line summary as your response: + +``` +Code review complete. CRITICAL, HIGH, MEDIUM, LOW findings. Verdict: . +``` + +Don't return the full findings — those live in the artifact. Synthesizer reads the file. + +### CHECKPOINT +- [ ] `$ARTIFACTS_DIR/review/code-review-findings.md` written. +- [ ] Each finding has a file path, line number when applicable, and a concrete fix. +- [ ] No invented issues. If clean, say "None." +- [ ] Single-line summary returned. diff --git a/.archon/commands/maintainer-review-comment-quality.md b/.archon/commands/maintainer-review-comment-quality.md new file mode 100644 index 0000000000..17861fed64 --- /dev/null +++ b/.archon/commands/maintainer-review-comment-quality.md @@ -0,0 +1,95 @@ +--- +description: Review the PR's added/modified comments and docstrings for accuracy, value, and long-term maintainability (Pi-tuned) +argument-hint: (no arguments — reads PR data and writes findings artifact) +--- + +# Maintainer Review — Comment Quality + +You are a comment / docstring reviewer. Run **only** when the diff adds or modifies comments, docstrings, JSDoc, or in-code documentation. Your job: keep the code's comments truthful, valuable, and unlikely to rot. + +**Workflow ID**: $WORKFLOW_ID + +--- + +## Phase 1: LOAD + +```bash +PR_NUMBER=$(cat $ARTIFACTS_DIR/.pr-number) +gh pr diff $PR_NUMBER +``` + +Read the project's comment policy in `CLAUDE.md`: +- Default to writing **no comments**. +- Only add when the **WHY** is non-obvious (hidden constraint, subtle invariant, workaround). +- Don't explain WHAT (well-named identifiers do that). +- Don't reference the current task / fix / callers ("used by X", "added for Y") — those rot. +- Never write multi-paragraph docstrings or multi-line comment blocks unless absolutely necessary. + +--- + +## Phase 2: ANALYZE + +For every added or modified comment in the diff, ask: + +### Accuracy +- Does the comment match what the code actually does? +- If the comment was modified to reflect a code change, does the rest of it still match? + +### Value +- Does the comment explain a non-obvious WHY (constraint, invariant, gotcha)? +- Or does it restate WHAT the code does? (Restating WHAT = comment rot risk.) +- Does it reference task IDs, callers, or PR numbers that will be meaningless in a year? + +### Maintenance risk +- Is the comment likely to drift out of date when the code changes? +- Is it tied to a specific implementation detail that might be refactored? + +### Style +- One short line preferred. Multi-line blocks only when truly necessary. +- No trailing summaries that just describe the next line. + +--- + +## Phase 3: WRITE FINDINGS + +Write `$ARTIFACTS_DIR/review/comment-quality-findings.md`: + +```markdown +# Comment Quality Review — PR # + +## Summary +<1-2 sentences. Comment quality: good / minor-issues / significant-rot-risk.> + +## Findings + +### HIGH — inaccurate comments (don't match the code) +- ****: + - **Suggested fix**: + +### MEDIUM — comment rot risk +- (same format — references that will rot, restated-what-not-why, multi-paragraph fluff) + +### LOW — style / consistency +- (same format) + +## Comments that are actually valuable + + +## Notes for synthesizer + +``` + +If comments are clean, write `## Findings\n\nComments are accurate and capture non-obvious WHY where present.` and stop. + +--- + +## Phase 4: RETURN + +``` +Comment-quality review complete. HIGH, MEDIUM, LOW findings. Quality: . +``` + +### CHECKPOINT +- [ ] `$ARTIFACTS_DIR/review/comment-quality-findings.md` written. +- [ ] Each HIGH cites the exact comment text and the code it disagrees with. +- [ ] Don't flag every short comment — many are intentionally brief. diff --git a/.archon/commands/maintainer-review-docs-impact.md b/.archon/commands/maintainer-review-docs-impact.md new file mode 100644 index 0000000000..4ed5b64085 --- /dev/null +++ b/.archon/commands/maintainer-review-docs-impact.md @@ -0,0 +1,118 @@ +--- +description: Review whether the PR's user-facing changes (APIs, CLI flags, env vars, behavior) are reflected in documentation (Pi-tuned) +argument-hint: (no arguments — reads PR data and writes findings artifact) +--- + +# Maintainer Review — Docs Impact + +You are a docs-impact reviewer. Run **only** when the diff adds, removes, or renames public APIs, CLI flags, environment variables, or other user-facing behavior. Your job: catch missing or stale documentation. + +**Workflow ID**: $WORKFLOW_ID + +--- + +## Phase 1: LOAD + +```bash +PR_NUMBER=$(cat $ARTIFACTS_DIR/.pr-number) +gh pr diff $PR_NUMBER +``` + +Find docs locations: + +```bash +ls packages/docs-web/src/content/docs/ 2>/dev/null +ls docs/ 2>/dev/null +ls README.md CONTRIBUTING.md CLAUDE.md 2>/dev/null +``` + +The project's docs site is at `packages/docs-web/` (Starlight). User-facing docs published to archon.diy. Repo-level docs include `CLAUDE.md`, `CONTRIBUTING.md`, and any `docs/` content. + +--- + +## Phase 2: ANALYZE + +For each user-facing change in the diff, identify the docs that should be updated: + +### What counts as user-facing +- New CLI command or flag (in `packages/cli/`). +- New environment variable. +- New / removed / renamed API route (in `packages/server/src/routes/`). +- New workflow node type, command file, or workflow YAML field. +- New configuration field in `.archon/config.yaml`. +- Change in default behavior that an existing user would notice. + +### What doesn't +- Internal refactors with no API change. +- Test-only changes. +- Bug fixes that restore documented behavior. + +### For each user-facing change + +- **New surface**: is there a docs page describing it? Is it linked from a landing page or the relevant section? +- **Changed surface**: are existing docs pages still accurate? Do they need updates? +- **Removed surface**: are existing references stale? `grep` the docs site for old name. +- **Migration**: does a breaking change need a migration note in CHANGELOG.md or docs? + +### Specific places to check +- `packages/docs-web/src/content/docs/getting-started/` — quickstart, install, concepts. +- `packages/docs-web/src/content/docs/guides/` — workflow authoring, hooks, MCP, scripts. +- `packages/docs-web/src/content/docs/reference/` — CLI, variables, configuration. +- `packages/docs-web/src/content/docs/adapters/` — Slack, Telegram, GitHub, Discord, Web. +- `packages/docs-web/src/content/docs/deployment/` — Docker, cloud. +- `CHANGELOG.md` — Keep-a-Changelog entry for user-visible changes. +- `CLAUDE.md` — only if the change affects how *agents* working in this repo should behave. + +--- + +## Phase 3: WRITE FINDINGS + +Write `$ARTIFACTS_DIR/review/docs-impact-findings.md`: + +```markdown +# Docs Impact Review — PR # + +## Summary +<1-2 sentences. Docs status: in-sync / minor-gaps / significant-gaps.> + +## User-facing changes detected +- (file:line) +- + +## Findings + +### CRITICAL — missing docs for new public surface +- ****: + - **Where to add**: + - **What to write**: + +### HIGH — stale docs from changed/removed surface +- (same format) + +### MEDIUM — minor gaps (changelog entry, examples) +- (same format) + +### LOW — nice-to-have polish +- (same format) + +## Pages that look in-sync + + +## Notes for synthesizer + +``` + +If no user-facing changes, write `## Findings\n\nNo user-facing changes — no docs updates needed.` and stop. + +--- + +## Phase 4: RETURN + +``` +Docs-impact review complete. CRITICAL, HIGH, MEDIUM, LOW findings. Status: . +``` + +### CHECKPOINT +- [ ] `$ARTIFACTS_DIR/review/docs-impact-findings.md` written. +- [ ] Each CRITICAL/HIGH names a specific doc file path and what's missing. +- [ ] Internal-only changes don't generate findings. diff --git a/.archon/commands/maintainer-review-error-handling.md b/.archon/commands/maintainer-review-error-handling.md new file mode 100644 index 0000000000..b45b82a0af --- /dev/null +++ b/.archon/commands/maintainer-review-error-handling.md @@ -0,0 +1,94 @@ +--- +description: Review the PR for error-handling correctness — surfaced errors, no silent swallows, consistent error patterns (Pi-tuned) +argument-hint: (no arguments — reads PR data and writes findings artifact) +--- + +# Maintainer Review — Error Handling + +You are an error-handling-focused reviewer. Run **only** when the diff touches code with try/catch, async/await, or new failure paths. Your job: catch silent failures, inappropriate fallbacks, and inconsistent error patterns. + +**Workflow ID**: $WORKFLOW_ID + +--- + +## Phase 1: LOAD + +```bash +PR_NUMBER=$(cat $ARTIFACTS_DIR/.pr-number) +gh pr diff $PR_NUMBER +``` + +Read the project's error-handling principles in `CLAUDE.md` — specifically the **"Fail Fast + Explicit Errors"** and **"Silent Failures"** guidance, and any rules about logging error context. + +--- + +## Phase 2: ANALYZE + +For every `try/catch`, `async/await`, error path, or fallback in the diff, ask: + +### Silent-failure risks +- Is an error caught and ignored without logging? +- Is a fallback returned that hides the actual problem from the caller? +- Is a `try` block too broad, catching errors that should propagate? +- Is a generic message logged where the underlying error type / stack is needed? + +### Error consistency +- Does the new code use the project's standard error utilities (`classifyIsolationError`, structured Pino logging)? +- Are error events named per the `{domain}.{action}_{state}` convention? +- Are errors thrown with enough context (id, operation, parameters)? + +### Promise / async correctness +- Unhandled promise rejections? Missing `await`? +- `Promise.all` vs `Promise.allSettled` — is the choice intentional? +- Cancellation / timeout handling correct? + +### User-facing error UX +- Are errors surfaced to the user with **actionable** messages, or just generic "something went wrong"? +- For platform adapters: does the error reach the chat / web UI? + +--- + +## Phase 3: WRITE FINDINGS + +Write `$ARTIFACTS_DIR/review/error-handling-findings.md`: + +```markdown +# Error Handling Review — PR # + +## Summary +<1-2 sentences. Overall risk level: low / medium / high.> + +## Findings + +### CRITICAL — silent failures +- ****: + - **Why it matters**: + - **Suggested fix**: + +### HIGH — inconsistent error patterns +- (same format) + +### MEDIUM — context / actionability +- (same format) + +### LOW / NITPICK +- (same format) + +## Notes for synthesizer + +``` + +If no error-handling concerns, write `## Findings\n\nNone — error handling is consistent and surfaces failures appropriately.` and stop. + +--- + +## Phase 4: RETURN + +``` +Error-handling review complete. CRITICAL, HIGH, MEDIUM, LOW findings. Risk: . +``` + +### CHECKPOINT +- [ ] `$ARTIFACTS_DIR/review/error-handling-findings.md` written. +- [ ] Every CRITICAL/HIGH finding cites a real catch / try / promise / fallback in the diff. +- [ ] No invented issues. If clean, say "None." diff --git a/.archon/commands/maintainer-review-gate.md b/.archon/commands/maintainer-review-gate.md new file mode 100644 index 0000000000..92e97a4691 --- /dev/null +++ b/.archon/commands/maintainer-review-gate.md @@ -0,0 +1,255 @@ +--- +description: Gate a single PR on direction alignment, scope focus, and PR-template fill quality before any deep review +argument-hint: (no arguments — reads upstream node outputs and writes artifacts) +--- + +# Maintainer Review — Gate + +You are the **gatekeeper** for a single GitHub PR. Your job is to decide whether the PR is worth a comprehensive review or whether the maintainer should politely decline / request a split. You do **not** review code quality here — that happens downstream if you say "review." + +**Workflow ID**: $WORKFLOW_ID + +--- + +## Phase 1: LOAD INPUTS + +Three sources of upstream context, all gathered for you below. **You may also `cat .github/PULL_REQUEST_TEMPLATE.md` if you need to compare the PR body's structure against the project's template** — that's the one allowed extra read; everything else lives in the inputs below. + +### PR data (gh pr view JSON) + +```json +$fetch-pr.output +``` + +### PR diff (truncated to 2500 lines) + +```text +$fetch-diff.output +``` + +### Maintainer context (direction.md, profile.md, prior state, recent briefs, clock) + +```json +$read-context.output +``` + +Inside `read-context.output`: +- `direction` — the project's committed direction.md (what Archon IS / IS NOT, open questions) +- `profile` — the running maintainer's profile.md (role, scope, current focus) +- `prior_state` — last morning-standup state.json (carry_over may already mention this PR) +- `recent_briefs` — last 3 daily briefs (look here if this PR was previously flagged) +- `today` — today's local date as `YYYY-MM-DD` (deterministic, set by the gather script) +- `deadline_3d` — today + 3 calendar days, `YYYY-MM-DD` (precomputed for the decline comment's reply window) + +--- + +## Phase 2: EVALUATE THREE GATES + +You're checking three gates. **All three** inform the verdict. + +### Gate A — Direction alignment + +Does the PR align with `direction.md`? + +- **aligned**: PR clearly fits one of the "What Archon IS" clauses, or extends an existing pattern. +- **conflict**: PR clearly violates a "What Archon is NOT" clause. Cite the specific clause (e.g. `direction.md §single-developer-tool`). +- **unclear**: PR raises a question `direction.md` doesn't answer (touches an "Open question" or a new concern). Note it for later direction-doc evolution. + +### Gate B — Scope focus + +Does the PR do **one thing**? + +- **focused**: PR has a single feature, single fix, or single coherent refactor. Size is fine — a 2000-line PR can be focused if it's all one feature. +- **multiple_concerns**: PR mixes 2+ unrelated changes (e.g. "fix the bug + add new feature + bump deps + reformat"). The right action is to ask the contributor to split it. +- **too_broad**: One ostensibly-coherent change but with sprawling collateral edits across unrelated subsystems. Fixable by tighter scope, but currently too much to review. + +To assess scope, look at: +- Diff structure: do the changed files cluster around a single concern, or sprawl? +- Title + body: does the contributor describe one change, or several "while I was here" changes? +- Commit history if visible in `gh pr view`: is the PR a single coherent story, or accreted fixes? + +### Gate C — Template quality + +Was `.github/PULL_REQUEST_TEMPLATE.md` filled in? + +- **good**: All template sections completed thoughtfully (Summary, Validation, Security, Rollback, etc.). +- **partial**: Template structure present but several sections empty or perfunctory ("N/A", "TBD", or single-word answers where prose is expected). +- **empty**: No template, or template skeleton with all sections blank. + +The PR body is in `pr_data.body`. If you need the template's expected structure for comparison, that's the one allowed extra read: `cat .github/PULL_REQUEST_TEMPLATE.md`. + +--- + +## Phase 3: DECIDE VERDICT + +Combine the three gates into a single verdict. + +| Direction | Scope | Template | → Verdict | +|-----------|-------|----------|-----------| +| aligned | focused | good or partial | **review** — proceed to deep review | +| aligned | focused | empty | **review** with note in synthesis to nudge template | +| aligned | multiple_concerns | * | **needs_split** — draft "split this up" comment | +| aligned | too_broad | * | **needs_split** — same | +| conflict | * | * | **decline** — draft polite-decline citing direction clause | +| unclear | * | * | **unclear** — surface to maintainer for manual call | + +When the gate is `unclear`, do NOT draft a decline comment. The maintainer needs to decide. + +When the verdict is `decline` or `needs_split`, draft the comment per Phase 4. + +--- + +## Phase 4: DRAFT THE DECLINE COMMENT (only if verdict in [decline, needs_split]) + +The drafted comment is the **bot's voice** — polite, specific, citing direction.md when relevant, and giving the contributor a clear path forward. + +### Tone rules + +- Open with thanks for the contribution. Always. +- Be **specific** about why — cite the direction.md clause, name the multiple concerns, list the empty template sections. Vague "this isn't a fit" is not acceptable. +- Offer a concrete path forward when one exists (split into PRs A + B + C; pick a different scope; fill in template sections X/Y/Z). +- Include a **3-day reply window**: state the date 3 days from today. If the contributor doesn't reply by then with reasoning to keep the PR open, it will be closed. Don't say "automatically" — the maintainer will close manually. +- No corporate-speak, no emoji, no AI-attribution. + +### Templates by category + +**For `decline` (direction conflict)**: + +```markdown +Thanks for putting this together, @! + +Unfortunately this isn't a direction we're taking with Archon. Specifically, this conflicts with `direction.md §`: . + +If you disagree with that direction call, reply here by **** and we'll discuss. Otherwise this PR will be closed after that date so the queue stays focused. + +For context, the project's stated scope lives at [`.archon/maintainer-standup/direction.md`](../blob/dev/.archon/maintainer-standup/direction.md). Open questions there are fair game for proposals — feel free to raise an issue if you'd like to push for a direction change. +``` + +**For `needs_split` (multiple concerns)**: + +```markdown +Thanks for the work here, @! + +This PR bundles several independent changes: . Each is potentially valuable but reviewing them together makes regressions hard to isolate and reverts hard to scope. + +Could you split this into focused PRs, one per concern? Suggested split: +1. +2. +3. + +If you'd rather discuss the split approach first, reply here by ****. Otherwise this PR will be closed in favor of the split versions after that date. +``` + +**For `needs_split` (too broad / sprawling)**: + +```markdown +Thanks for the contribution, @! + +The change touches a wide range of subsystems () which makes it hard to review as a single unit. Could you tighten the scope — focus on first and split the collateral edits into a follow-up PR? + +If you think the current scope is necessary, reply here by **** with reasoning. Otherwise this PR will be closed after that date so a tighter version can land. +``` + +Adapt the wording. Don't paste the templates verbatim if the situation is more nuanced — they're starting points. + +### Compute DATE-3-DAYS-OUT + +Use `read-context.output.deadline_3d` directly — it's already today-plus-three-calendar-days in `YYYY-MM-DD` form, computed deterministically by the gather script (sv-SE locale → ISO date in local time). Do **not** anchor to `prior_state.last_run_at`; that field can be days or weeks stale and would produce a deadline already in the past. + +If for any reason `deadline_3d` is missing or empty, abort the comment draft and surface this to the maintainer in the gate-decision artifact rather than guessing. + +--- + +## Phase 5: WRITE ARTIFACTS + +You **must** write two files using the Write tool before returning your structured output: + +### `$ARTIFACTS_DIR/gate-decision.md` + +Full reasoning for the maintainer's review: + +```markdown +# Gate Decision — PR # + +## Verdict + + +## Direction alignment + + + +## Scope assessment + + + +## Template quality + + + +## Cited direction clauses +- direction.md § +- direction.md § + +## Reasoning +<2-3 sentence summary> + +## Drafted decline comment (if applicable) + + +``` + +### `$ARTIFACTS_DIR/decline-comment.md` + +Only the decline comment body (used directly by the `post-decline` bash node as `--body-file`): + +If verdict is `review` or `unclear`, write a single line: `(no decline comment — verdict was )`. + +If verdict is `decline` or `needs_split`, write the drafted comment in markdown — exactly as it should appear on the PR. + +--- + +## Phase 6: RETURN STRUCTURED OUTPUT + +**This is the final step. After the artifacts are written, your entire response must be ONE JSON object — nothing else.** + +Allowed output shapes (Pi's parser handles either): + +1. **Bare JSON** — preferred: + ```json + {"verdict":"review","direction_alignment":"aligned",...} + ``` + +2. **Fenced JSON** — also fine: + ````markdown + ```json + {"verdict":"review","direction_alignment":"aligned",...} + ``` + ```` + +**NOT ALLOWED:** +- Prose before the JSON ("Looking at this PR..." / "Here is my analysis..."). +- Prose after the JSON ("This concludes the gate decision."). +- Bullet-point summaries restating fields. +- Markdown headers like `**Gate A**`. +- Any text outside the single JSON object or its fences. + +If you find yourself wanting to explain — that explanation belongs in `$ARTIFACTS_DIR/gate-decision.md`, NOT in your response. + +### Required fields + +- `verdict`: one of `review` / `decline` / `needs_split` / `unclear` +- `direction_alignment`: `aligned` / `conflict` / `unclear` +- `scope_assessment`: `focused` / `multiple_concerns` / `too_broad` +- `template_quality`: `good` / `partial` / `empty` +- `decline_categories`: array of strings, e.g. `["direction"]` or `["scope", "template"]`. Empty array `[]` when verdict is `review` or `unclear`. +- `cited_direction_clauses`: array of strings, e.g. `["direction.md §single-developer-tool"]`. Empty `[]` if none. +- `reasoning`: 1-3 sentence summary (string). + +### CHECKPOINT — before returning + +- [ ] Direction.md was actually read (not assumed). +- [ ] Decline comment cites a specific direction clause OR specific scope concerns OR specific empty template sections — never vague. +- [ ] Decline comment has a concrete `YYYY-MM-DD` 3-day deadline. +- [ ] `$ARTIFACTS_DIR/gate-decision.md` written. +- [ ] `$ARTIFACTS_DIR/decline-comment.md` written (placeholder line if not declining). +- [ ] **Final response is ONE JSON object — no prose, no headers, no bullet summary. Bare JSON or fenced JSON only.** diff --git a/.archon/commands/maintainer-review-report.md b/.archon/commands/maintainer-review-report.md new file mode 100644 index 0000000000..646510a105 --- /dev/null +++ b/.archon/commands/maintainer-review-report.md @@ -0,0 +1,86 @@ +--- +description: Produce the final summary across all branches of maintainer-review-pr (review / decline / unclear) for the workflow log +argument-hint: (no arguments — reads upstream artifacts) +--- + +# Maintainer Review — Final Report + +You are the final reporter. The workflow has finished one of three branches (review / decline / unclear). Your job: produce a one-screen summary that tells the maintainer what just happened and what's pending. + +**Workflow ID**: $WORKFLOW_ID + +--- + +## Phase 1: DETECT WHICH BRANCH RAN + +Check what artifacts exist: + +```bash +PR_NUMBER=$(cat $ARTIFACTS_DIR/.pr-number 2>/dev/null) +ls $ARTIFACTS_DIR/ +ls $ARTIFACTS_DIR/review/ 2>/dev/null +cat $ARTIFACTS_DIR/gate-decision.md 2>/dev/null | head -30 +``` + +Three possibilities: + +1. **Review branch ran**: `$ARTIFACTS_DIR/review/synthesis.md` exists. +2. **Decline branch ran**: `$ARTIFACTS_DIR/decline-comment.md` exists with non-placeholder content; the post-decline bash node already posted to GitHub. +3. **Unclear branch ran**: gate verdict was `unclear` and the maintainer was prompted to decide manually. + +--- + +## Phase 2: WRITE THE FINAL REPORT + +Write `$ARTIFACTS_DIR/final-report.md`: + +```markdown +# Maintainer Review — PR # — Final + +## Branch taken + + +## Gate decision + + +## Outcome + +### If review branch: +- Synthesized verdict: +- Findings: +- Aspects run: +- **Draft comment**: $ARTIFACTS_DIR/review/review-comment.md (copy-paste or edit before posting to PR) +- **Full synthesis**: $ARTIFACTS_DIR/review/synthesis.md + +### If decline branch: +- Decline categories: +- Cited direction clauses: +- Comment posted to PR: yes +- Reply window: +- Awaiting-author label added: read `$ARTIFACTS_DIR/.label-applied` — value is `applied` or `skipped`. If `skipped`, surface why by reading `$ARTIFACTS_DIR/.label-error` (gh stderr) and include a one-line explanation. **Do not say `yes` if the file says `skipped`** — say `no, label add failed: ` so the maintainer can decide whether to add it manually. + +### If unclear branch: +- Gate could not classify confidently. +- Maintainer prompted manually — outcome recorded in approval-gate response. + +## Next steps for the maintainer +<2-3 short bullets. e.g.: +- "Read $ARTIFACTS_DIR/review/review-comment.md and post to PR." +- "Wait for contributor reply by ; if no reply, close PR." +- "Update direction.md to address the open question this PR raised: ".> +``` + +--- + +## Phase 3: RETURN + +Return a single-line outcome: + +``` +PR # — branch=, verdict=, action=. +``` + +### CHECKPOINT +- [ ] `$ARTIFACTS_DIR/final-report.md` written. +- [ ] Correctly identifies which branch ran (don't pretend the review branch ran when it didn't). +- [ ] Lists concrete next steps for the maintainer. diff --git a/.archon/commands/maintainer-review-synthesize.md b/.archon/commands/maintainer-review-synthesize.md new file mode 100644 index 0000000000..bfdd3abb28 --- /dev/null +++ b/.archon/commands/maintainer-review-synthesize.md @@ -0,0 +1,156 @@ +--- +description: Synthesize findings from all review aspects into a single maintainer-ready review report (Pi-tuned) +argument-hint: (no arguments — reads review/*.md artifacts and writes synthesis) +--- + +# Maintainer Review — Synthesize + +You are the synthesizer. Read all available review-aspect findings, deduplicate overlap, prioritize, and produce a single maintainer-ready review summary plus a draft GitHub comment. + +**Workflow ID**: $WORKFLOW_ID + +--- + +## Phase 1: LOAD + +### PR number +```bash +PR_NUMBER=$(cat $ARTIFACTS_DIR/.pr-number) +``` + +### Read every available review findings file +```bash +ls $ARTIFACTS_DIR/review/ +``` + +Then read each one: +- `code-review-findings.md` (always present if review branch ran) +- `error-handling-findings.md` (present if classifier said yes) +- `test-coverage-findings.md` (present if classifier said yes) +- `comment-quality-findings.md` (present if classifier said yes) +- `docs-impact-findings.md` (present if classifier said yes) + +Some files may be missing — that's expected. Don't error. + +### Read the gate decision (for context) +```bash +cat $ARTIFACTS_DIR/gate-decision.md +``` + +The gate may have noted things ("template was empty — nudge in synthesis"). Carry those notes forward. + +--- + +## Phase 2: AGGREGATE + DEDUPLICATE + +Issues often surface in multiple aspects (e.g. a missing test for an error path shows up in error-handling AND test-coverage). Don't list the same finding twice. Pick the most actionable wording and merge. + +Group findings by **severity** across all aspects, not by aspect: + +- **CRITICAL** (across aspects): merge / blocking / data-loss / silent-failure issues. +- **HIGH**: real bugs, missing test for a fix, missing docs for a new public surface, CLAUDE.md violation. +- **MEDIUM**: edge cases, comment rot risks, minor docs polish. +- **LOW / NITPICK**: style, naming, optional improvements. + +Within each tier, order by file path so the maintainer can scan top-to-bottom. + +--- + +## Phase 3: WRITE THE SYNTHESIS + +Write `$ARTIFACTS_DIR/review/synthesis.md`: + +```markdown +# Maintainer Review — PR # + +## Verdict + + +## Summary +<2-3 sentence overview. What the PR does, what's good, what's blocking.> + +## Findings + +### CRITICAL (N) +- ****: + - From: + - **Suggested fix**: + +### HIGH (N) +- (same format) + +### MEDIUM (N) +- (same format) + +### LOW / NITPICK (N) +- (consolidated) + +## CLAUDE.md compliance + + +## Gate-decision notes + + +## Aspects run +- code-review: +- error-handling: +- test-coverage: +- comment-quality: +- docs-impact: + +## Aspects skipped + +``` + +--- + +## Phase 4: WRITE THE DRAFT PR COMMENT + +Write `$ARTIFACTS_DIR/review/review-comment.md` — this is the markdown body that would be posted to the PR. The maintainer can copy-paste it or hand-edit before posting. + +Format: + +```markdown +## Review Summary + +**Verdict**: + +<2-3 sentence overview written for the PR author, not for the maintainer.> + +### Blocking issues +- (list CRITICAL findings, file:line, fix suggestion) + +### Suggested fixes +- (list HIGH findings) + +### Minor / nice-to-have +- (list MEDIUM + LOW combined) + +### Compliments + + +--- +*Reviewed via maintainer-review-pr workflow (Pi/Minimax). Aspects run: .* +``` + +Tone for the PR comment: +- Address the contributor directly ("you", "your change"). +- Be **specific** — file:line + concrete fix. +- No corporate-speak, no excessive praise, no AI-attribution-by-name (the footer line is enough). + +--- + +## Phase 5: RETURN + +Return a single-line summary: + +``` +Synthesized: . CRITICAL / HIGH / MEDIUM / LOW findings across aspects. Comment drafted at $ARTIFACTS_DIR/review/review-comment.md. +``` + +### CHECKPOINT +- [ ] `$ARTIFACTS_DIR/review/synthesis.md` written. +- [ ] `$ARTIFACTS_DIR/review/review-comment.md` written. +- [ ] Findings deduplicated across aspects. +- [ ] Severity ordering correct. +- [ ] Skipped aspects listed with reason. diff --git a/.archon/commands/maintainer-review-test-coverage.md b/.archon/commands/maintainer-review-test-coverage.md new file mode 100644 index 0000000000..5b91c4ef9b --- /dev/null +++ b/.archon/commands/maintainer-review-test-coverage.md @@ -0,0 +1,101 @@ +--- +description: Review the PR for test coverage — does new behavior have tests, are critical paths exercised, do existing tests still cover what they should (Pi-tuned) +argument-hint: (no arguments — reads PR data and writes findings artifact) +--- + +# Maintainer Review — Test Coverage + +You are a test-focused reviewer. Run **only** when the diff touches source code (not pure docs / config / tests). Your job: assess whether the new behavior is properly tested. + +**Workflow ID**: $WORKFLOW_ID + +--- + +## Phase 1: LOAD + +```bash +PR_NUMBER=$(cat $ARTIFACTS_DIR/.pr-number) +gh pr diff $PR_NUMBER +``` + +Read the project's testing conventions in `CLAUDE.md`: +- Mock isolation rules (Bun `mock.module` is process-global; spyOn is preferred for internal modules) +- Per-package test isolation (split bun test invocations to avoid mock pollution) +- `bun run test` (not `bun test` from repo root) + +--- + +## Phase 2: ANALYZE + +For each non-trivial code change, ask: + +### Behavioral coverage +- Is the **happy path** covered? +- Are **edge cases** covered? (Empty input, oversized input, malformed input, concurrent calls, etc.) +- Are **error paths** covered? (Throws when expected, returns null when expected.) +- Is the test asserting on the **right thing**? (Output value? Side effect? Both?) + +### Test quality +- Are tests deterministic? No timing, no real network, no real filesystem unless intentional? +- Mock pollution: does the file use `mock.module()` in a way that conflicts with other test files in the same package? +- Test isolation: does each test set up and tear down its own state? + +### Coverage gaps to flag +- New public function with no test → flag. +- New conditional branch with no test → flag. +- Bug fix without a regression test → flag (the test should fail before the fix). +- New error path with no test → flag. + +### Don't flag +- Trivial getters/setters with no logic. +- Internal helpers tested transitively through public API tests. +- Documentation-only or formatting-only changes. + +--- + +## Phase 3: WRITE FINDINGS + +Write `$ARTIFACTS_DIR/review/test-coverage-findings.md`: + +```markdown +# Test Coverage Review — PR # + +## Summary +<1-2 sentences. Coverage: adequate / minor-gaps / significant-gaps.> + +## Findings + +### CRITICAL — bug fix without regression test +- ****: + - **Suggested test**: + +### HIGH — new behavior without coverage +- (same format) + +### MEDIUM — edge cases / error paths missing +- (same format) + +### LOW — improvements +- (same format) + +## Mock isolation concerns + + +## Notes for synthesizer + +``` + +If coverage is adequate, write `## Findings\n\nAdequate coverage for the changed behavior.` and stop. + +--- + +## Phase 4: RETURN + +``` +Test-coverage review complete. CRITICAL, HIGH, MEDIUM, LOW findings. Coverage: . +``` + +### CHECKPOINT +- [ ] `$ARTIFACTS_DIR/review/test-coverage-findings.md` written. +- [ ] Each CRITICAL/HIGH cites a specific function / branch and proposes a concrete test. +- [ ] No invented gaps. If coverage is good, say so. diff --git a/.archon/commands/maintainer-standup.md b/.archon/commands/maintainer-standup.md new file mode 100644 index 0000000000..1d02d0c6e7 --- /dev/null +++ b/.archon/commands/maintainer-standup.md @@ -0,0 +1,180 @@ +--- +description: Synthesize the maintainer's morning standup brief from gathered git/PR/issue/state data +argument-hint: (no arguments — all context provided via upstream nodes) +--- + +# Maintainer Standup Synthesis + +You are producing a daily maintainer briefing for the Archon project. The user is the maintainer running this workflow. Your job is to read the gathered facts, cross-reference against the project's direction document and the maintainer's profile, and produce a prioritized brief plus state to persist for tomorrow's run. + +**Workflow ID**: $WORKFLOW_ID + +--- + +## Phase 1: LOAD INPUTS + +You have three sources of upstream context, all already gathered. Each is a JSON string that you should parse. + +### Git status (origin/dev movement since last run) + +``` +$git-status.output +``` + +Fields: `current_dev_sha`, `prior_dev_sha`, `current_branch`, `is_dirty`, `pull_status`, `new_commits`, `diff_stat`. + +### GitHub data (PRs, issues, review requests, recently closed) + +``` +$gh-data.output +``` + +Fields: `gh_handle`, `since_date`, `all_open_prs`, `review_requested`, `authored_by_me`, `issues_assigned`, `recent_unlabeled_issues`, `recently_closed_prs`, `recently_closed_issues`, `my_recent_commits`, `replies_since_last_run`. + +`replies_since_last_run` is an array of `{ number, kind, comments }` grouping contributor replies on PRs and issues since the last run. `kind` is one of `issue` / `pr_conversation` / `pr_review`; the maintainer's own comments are filtered out. Use this as the source for the **"Replies waiting on you"** brief section (see Phase 3). + +### Local context (direction doc, maintainer profile, prior state, recent briefs) + +``` +$read-context.output +``` + +Fields: `direction` (markdown string), `profile` (markdown string), `prior_state` (object or null), `recent_briefs` (array of `{date, content}`), `today` (`YYYY-MM-DD`), `deadline_3d` (`YYYY-MM-DD`), `reviewed_prs` (map of PR number → `{ reviewed_at, gate_verdict, run_id }` recording past maintainer-review-pr runs — see Phase 2h). + +--- + +## Phase 2: ANALYZE + +### 2a. Detect first-run vs ongoing + +If `prior_state` is `null` and `recent_briefs` is empty, this is a **first run**. Skip "Since last run" comparisons; produce a baseline triage and state snapshot the next run can diff against. + +### 2b. Compare prior state to current reality (progress detection) + +When `prior_state` exists: + +- **Resolved since last run**: PRs in `prior_state.observed_prs` whose numbers do NOT appear in current `gh-data.output.all_open_prs` — they were closed or merged. Cross-reference against `gh-data.output.recently_closed_prs` to know whether they merged or were closed without merging. Same for issues. +- **Carry-over revisited**: each item in `prior_state.carry_over` — is it still open? Did its status change? If resolved, mention briefly under "Resolved since last run" and DROP from `next_state.carry_over`. If still pending, keep with original `first_seen` date (so age is preserved). +- **What you shipped**: `gh-data.output.my_recent_commits` lists the maintainer's commits since the last run. Summarize meaningfully — group by area, highlight notable ones. Don't just list shas. +- **New since last run**: PRs in current `all_open_prs` whose numbers are NOT in `prior_state.observed_prs` are new this run. Same for issues. + +### 2c. Read the direction doc and profile + +The `direction` markdown defines what Archon IS / IS NOT. The `profile` markdown describes the maintainer's role, scope, and current focus. Both inform the triage: + +- **Profile scope** drives breadth of coverage. `scope: everything` (main maintainer) means classify all open PRs, not just ones touching the maintainer's focus areas. +- **Direction clauses** drive the polite-decline classification. PRs adding multi-tenancy, hosted-service features, or anything contradicting the IS-NOT list go to P4 with a citation. +- **Profile focus areas** weight prioritization within P1-P3 — items aligned with current focus rank higher. + +### 2d. Triage all open PRs into P1-P4 + +For each PR in `all_open_prs`: + +- **P1 (Do today)**: ready-to-merge PRs awaiting your review (`reviewDecision: APPROVED` or null AND `mergeStateStatus: clean`), security fixes, items breaking dev, blockers for an in-flight release. **Note**: `mergeStateStatus` is the only CI/merge signal in the gathered payload (values: `clean`, `unstable`, `dirty`, `blocked`, `behind`, `unknown`). For ambiguous cases run `gh pr checks ` to verify CI before classifying as P1. +- **P2 (This week)**: in-flight PRs needing review or maintainer feedback, PRs with merge conflicts that can be unblocked, PRs from the maintainer's current focus areas that are progressing. +- **P3 (Whenever)**: low-urgency items, drafts you authored, exploratory PRs, items outside current focus that aren't time-sensitive. +- **P4 (Polite-decline candidates)**: PRs that conflict with `direction.md`. Each P4 entry MUST cite a specific clause (e.g., `direction.md §single-developer-tool`). + +You may use `gh pr view `, `gh pr diff `, or `gh pr checks ` to drill into PRs whose triage classification cannot be determined from the metadata alone. Be selective — drilling into all 60+ PRs is wasteful. Drill into 5-10 of the most ambiguous or interesting cases. + +### 2e. Triage issues + +Issues in `issues_assigned` and `recent_unlabeled_issues` follow the same P1-P4 classification. Use `gh issue view ` to drill into ambiguous ones. Recently-filed unlabeled issues are likely candidates for first-pass labeling. + +### 2f. Surface direction questions + +If any PR raises a "we don't have a stance on this" question that `direction.md` doesn't answer, surface it under **Direction questions raised**. These go into `next_state.direction_questions` so the maintainer can absorb them into `direction.md` over time. + +### 2g. Carry-over aging + +Items that have been in `prior_state.carry_over` for multiple runs (check `first_seen` dates) are higher priority — surface them prominently and consider escalating their P-level. + +### 2h. Review-history awareness (cross-workflow memory) + +`read-context.output.reviewed_prs` is a map of PR number → `{ reviewed_at, gate_verdict, run_id }` recording past maintainer-review-pr runs. When listing PRs in any P1-P4 (or Polite-decline) section, append a marker if the PR has an entry: + +- **Reviewed (review branch)**: `✓ reviewed Nd ago` — N is days between `read-context.output.today` and `reviewed_at` (`YYYY-MM-DD` slice). Use `0d` for today, `1d` for yesterday, etc. +- **Declined (decline / needs_split branch)**: `✓ declined Nd ago` — same age math, distinct verb so the brief reads correctly when a PR was politely declined rather than reviewed. +- **Unclear**: `✓ triaged Nd ago (unclear)` — for `gate_verdict: 'unclear'` runs. + +**Staleness check**: compare `reviewed_at` to the PR's `updatedAt` (in `gh-data.output.all_open_prs`). If `updatedAt > reviewed_at`, append `⚠ contributor pushed since` so the maintainer knows the prior review may need re-running. Only flag when the gap is real and meaningful — same-day commits don't need a warning. + +PRs not in `reviewed_prs` get no marker (their absence is itself the signal: "not yet reviewed via the workflow"). + +--- + +## Phase 3: GENERATE OUTPUT + +Return a JSON object matching the workflow's `output_format` schema. Do not write any files yourself — the workflow's `persist` node handles disk writes from your structured response. + +### `brief_markdown` (string) + +A maintainer-ready markdown brief. Adapt sections — omit empty ones, add others if useful. Keep entries to one line each. The brief should be readable on a single screen. + +```markdown +# Maintainer Standup — YYYY-MM-DD + +## Since last run +- (Summary of new commits on dev with notable highlights, or "first run — baseline snapshot") +- (Mention pull_status if not 'pulled': dirty/not_on_dev/pull_failed) + +## What you shipped +- (One-line summary grouped by area, derived from `my_recent_commits`. Omit if empty.) + +## Resolved since last run +- **PR #N** — [title] — merged ✓ / closed +- **Issue #N** — [title] — closed +- (Omit section if nothing resolved.) + +## Replies waiting on you +- **PR #N** — @author replied (N comments since last run): [one-line excerpt of latest comment]. [URL] +- **Issue #N** — @author commented: [excerpt]. [URL] +- (Sort by recency; surface inline-review-comment kinds first since they usually need a code-level response. Omit section if `replies_since_last_run` is empty.) + +## P1 — Do today +- **PR #N** — [title] ([+X/-Y]) — [why P1, e.g. "ready to merge, awaiting your review"] +- **Issue #N** — [title] — [why P1] + +## P2 — This week +- (Same format) + +## P3 — Whenever +- (Same format) + +## P4 — Polite-decline candidates +- **PR #N** — [title] by @[author] — Conflicts with `direction.md §[clause]`. [One-line reason.] + +## Direction questions raised +- (PR #N raises: should Archon support [Y]? Add a stance to direction.md.) +- (Or omit if none.) + +## Carry-over still pending +- **PR #N** — [title] — first seen YYYY-MM-DD ([N] runs ago) — [current status] +- (Omit section if nothing carried over.) +``` + +### `next_state` (object) + +Carry-over state for tomorrow's run. Schema: + +- `last_run_at`: current ISO-8601 timestamp (use the actual timestamp at synthesis time). +- `last_dev_sha`: value from `git-status.output.current_dev_sha`. +- `carry_over`: items the next run should remember as "still pending." For items already in `prior_state.carry_over` that are still pending, **preserve the original `first_seen` date** so age is tracked correctly. +- `observed_prs`: snapshot of ALL currently-open PRs (number + title only) — used to detect new PRs and resolved PRs next run. This must include every entry in `all_open_prs`, not just ones you classified. +- `observed_issues`: same for assigned + unlabeled issues. +- `direction_questions`: new direction questions surfaced this run (string array). + +### PHASE_3_CHECKPOINT + +- [ ] Every PR in `all_open_prs` is either classified into P1-P4 OR included in `observed_prs` (no PR silently dropped). +- [ ] All P4 entries cite a specific `direction.md §clause`. +- [ ] Carry-over items still pending have their original `first_seen` preserved. +- [ ] Resolved-since-last-run items are surfaced in the brief AND removed from `next_state.carry_over`. +- [ ] `next_state.last_dev_sha` is set from `git-status.output.current_dev_sha`. +- [ ] `next_state.observed_prs` includes ALL currently-open PRs. + +--- + +## Phase 4: REPORT + +Return the JSON object only. The workflow's `persist` node writes `brief_markdown` to `.archon/maintainer-standup/briefs/.md` and `next_state` to `.archon/maintainer-standup/state.json`. Do not write files yourself. diff --git a/.archon/maintainer-standup/README.md b/.archon/maintainer-standup/README.md new file mode 100644 index 0000000000..3395682999 --- /dev/null +++ b/.archon/maintainer-standup/README.md @@ -0,0 +1,53 @@ +# Maintainer Standup + +Daily morning briefing for Archon maintainers. Pulls latest `dev`, fetches all open PRs and assigned issues, classifies them **P1–P4** against `direction.md`, and surfaces progress versus the previous run (merged, closed, what you shipped). + +## Files in this folder + +| File | Committed? | Purpose | +|------|:---:|---------| +| `direction.md` | ✓ | Project north-star — what Archon IS / IS NOT. **Shared by all maintainers.** Drives PR triage and polite-decline classification. | +| `README.md` | ✓ | This file. | +| `profile.md.example` | ✓ | Template for new maintainers to copy. | +| `profile.md` | gitignored | Your personal config (gh handle, role, focus areas). | +| `state.json` | gitignored | Auto-written carry-over for the next run. | +| `briefs/YYYY-MM-DD.md` | gitignored | Daily prose briefs. Last 3 are read into the next run. | + +`direction.md` is committed because triage decisions should be consistent across maintainers and across runs. `profile.md`, `state.json`, and `briefs/` are personal — your focus, your daily notes, your reading material — so each maintainer manages their own. + +## Setup for a new maintainer + +1. Copy the template: + ```bash + cp .archon/maintainer-standup/profile.md.example .archon/maintainer-standup/profile.md + ``` +2. Edit `profile.md`: + - Set `gh_handle` to your GitHub login. + - Set `role` and `scope` to match your maintainer focus (`main_maintainer` / `everything` for full coverage; narrower for sub-maintainers). + - Optionally fill in **Currently focused on** — the synthesizer weights items toward what you list there. +3. Run it: + ```bash + archon workflow run maintainer-standup "" + ``` +4. The first run is a baseline (no prior state to diff). Subsequent runs compare against `state.json` and surface "Resolved since last run" / "What you shipped" / aged carry-over items. + +## How it works (engine view) + +1. **Three gather scripts** run in parallel (`bun`, no AI): + - `maintainer-standup-git-status.ts` — fetches `origin/dev`, fast-forwards if safe, captures new commits + diff stat since the last recorded SHA. + - `maintainer-standup-gh-data.ts` — pulls open PRs (full metadata), review-requested PRs, authored-by-me PRs, assigned issues, recently-filed unlabeled issues, and recently-closed PRs/issues since the last run. + - `maintainer-standup-read-context.ts` — reads `direction.md`, `profile.md`, `state.json`, and the last 3 briefs. +2. **Synthesis node** (`command: maintainer-standup`, Claude Sonnet, structured output) reads everything, optionally drills into specific PRs/issues with `gh pr view` / `gh issue view`, classifies P1–P4 against `direction.md`, and returns `{ brief_markdown, next_state }`. +3. **Persist node** writes `brief_markdown` to `briefs/YYYY-MM-DD.md` and `next_state` to `state.json`. + +The workflow runs **in the live checkout** (`worktree.enabled: false`) — it has to read this folder and pull `dev`. `--branch` and `--no-worktree` flags are rejected. + +## Editing direction.md + +`direction.md` is the source of truth for "what Archon is / isn't" during PR triage. Add a clause when a triage decision needs justification (so the next maintainer can reach the same conclusion). When declining a PR, cite the clause inline (e.g., `direction.md §single-developer-tool`). + +The synthesizer also surfaces **Direction questions raised** — PRs that touch areas where `direction.md` has no stance yet. Use those to evolve the doc deliberately rather than deciding case-by-case. + +## Customizing the brief format + +The output structure is defined in `.archon/commands/maintainer-standup.md`. Adjust the Phase 3 template if you want different sections or a different P-tier scheme. The synthesizer's `output_format` schema lives in `.archon/workflows/maintainer-standup.yaml`. diff --git a/.archon/maintainer-standup/direction.md b/.archon/maintainer-standup/direction.md new file mode 100644 index 0000000000..07cd83ab79 --- /dev/null +++ b/.archon/maintainer-standup/direction.md @@ -0,0 +1,41 @@ +# Archon Direction + +The maintainer-standup workflow consults this document when triaging PRs and issues to suggest which contributions align with the project and which are likely polite-decline candidates. + +This file is **committed and shared by all maintainers**. Edit deliberately — direction calls live here so that PR triage stays consistent across runs and across maintainers. When declining a PR, cite the specific clause (e.g., `direction.md §single-developer-tool`). + +--- + +## What Archon IS + +- **A remote agentic coding platform.** Control AI coding assistants (Claude Code SDK, Codex SDK, Pi community provider) remotely from Slack, Telegram, GitHub, Discord, CLI, and Web UI. +- **A single-developer tool.** No multi-tenant complexity. Built for one practitioner running their own instance. +- **Platform-agnostic at the conversation layer.** Unified interface across adapters via `IPlatformAdapter`. Stream/batch AI responses in real time. +- **Workflow-driven.** Reproducible AI execution chains defined as YAML DAGs in `.archon/workflows/`. Workflows run in isolated git worktrees by default. +- **Type-safe.** Strict TypeScript everywhere. No `any` without justification. +- **Composable.** Scripts in `.archon/scripts/`, commands in `.archon/commands/`, workflows compose them. +- **Self-hostable.** Bun + TypeScript runtime. SQLite by default; PostgreSQL optional. Zero external service dependencies for core operation. + +## What Archon is NOT + +- **Not multi-tenant.** No user accounts, role management, billing, or SaaS scaffolding. PRs adding these conflict with the single-developer thesis. +- **Not a hosted service.** No proprietary backend dependencies. Self-hosted by design. +- **Not a general-purpose chat UI.** Adapters are conversation surfaces for *workflow execution*, not standalone chat experiences. +- **Not a replacement for the AI coding agent itself.** Archon orchestrates Claude Code / Codex / Pi — it doesn't reimplement them. +- **Not opinionated about the dev environment.** No mandatory editor integrations, framework lock-in, or Docker requirement beyond what users opt into. +- **Not a workflow marketplace.** Bundled workflows are reference patterns; Archon is not aiming to be a hub for third-party workflow distribution. + +## Open questions (no stance yet) + +These are direction calls we haven't made. PRs that touch these areas should surface the question for explicit decision rather than be silently accepted or rejected. The workflow may add to this list as new questions appear. + +- (No open questions yet — populated over time.) + +--- + +## How to evolve this doc + +- Add a "What Archon IS" or "is NOT" line when a PR triage forces a direction call. +- Move "Open questions" entries to the IS / IS NOT sections once decided. +- Reference the relevant clause in PR comments when declining: `direction.md §single-developer-tool`. +- Keep entries short — one or two lines each. The point is fast lookup during triage, not a manifesto. diff --git a/.archon/maintainer-standup/profile.md.example b/.archon/maintainer-standup/profile.md.example new file mode 100644 index 0000000000..220f7a26c6 --- /dev/null +++ b/.archon/maintainer-standup/profile.md.example @@ -0,0 +1,28 @@ +--- +# Required: your GitHub login (used by gh queries for review-requested / assigned filters). +gh_handle: your-github-login + +# Suggested: drives how broadly the synthesizer classifies the queue. +# - main_maintainer / everything → triage all open PRs, not just yours +# - reviewer / focus-area → narrower coverage +role: main_maintainer +scope: everything +--- + +# Maintainer Profile — Your Name + +One paragraph on how you want the brief tuned. The synthesizer reads this verbatim, so write what you actually want it to do. + +Example: + +> I'm a sub-maintainer focused on the workflow engine. Show me PRs that touch packages/workflows/ first; deprioritize adapter-only PRs unless they're P1. + +## What I want from the brief + +- (Whatever level of full-repo coverage you want) +- (How aggressively to flag polite-decline candidates) +- (Whether to surface drafts, third-party PRs, etc.) + +## Currently focused on + +- (Update as priorities shift. Items here rank higher within their P-tier.) diff --git a/.archon/scripts/maintainer-standup-backfill-reviews.ts b/.archon/scripts/maintainer-standup-backfill-reviews.ts new file mode 100644 index 0000000000..bd4fb25685 --- /dev/null +++ b/.archon/scripts/maintainer-standup-backfill-reviews.ts @@ -0,0 +1,180 @@ +#!/usr/bin/env bun +/** + * One-shot: scan the maintainer's recent GitHub comments and populate + * .archon/maintainer-standup/reviewed-prs.json with `{ reviewed_at, + * gate_verdict, run_id }` entries inferred from comment-body patterns. + * + * Use case: after adopting the cross-workflow memory feature, today's + * morning brief should already mark "✓ reviewed Nd ago" for the PRs that + * were reviewed before the writer node existed. Without backfill, those + * markers only appear for runs going forward. + * + * Inference patterns (from the maintainer-review-pr output): + * - Body contains "## Review Summary" → gate_verdict: review + * - Body contains "isn't a direction we're" → gate_verdict: decline + * OR "Conflicts with `direction.md" + * - Body contains "Could you split this" → gate_verdict: needs_split + * OR "split into focused PRs" + * + * Behavior: + * - Fetches the maintainer's comments authored in the last 7 days. + * - Per PR, takes the LATEST matching comment (newer comments win). + * - Existing entries (from real workflow runs) take precedence over + * backfilled ones — the writer-node record is more authoritative. + * - Idempotent: re-running adds nothing new if no new pattern-matching + * comments have been authored since. + */ +import { execFileSync } from 'node:child_process'; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; +import { resolve } from 'node:path'; + +type GhComment = { + user?: { login?: string }; + created_at?: string; + body?: string; + issue_url?: string; +}; + +type ReviewedEntry = { + reviewed_at: string; + gate_verdict: 'review' | 'decline' | 'needs_split' | 'unclear'; + run_id?: string; + source?: 'workflow' | 'backfill'; +}; + +const baseDir = resolve(process.cwd(), '.archon/maintainer-standup'); + +// ── Read gh handle from profile ── +const profilePath = resolve(baseDir, 'profile.md'); +if (!existsSync(profilePath)) { + console.error('No profile.md found — run from repo root, with .archon/maintainer-standup/profile.md present.'); + process.exit(1); +} +const ghHandleMatch = readFileSync(profilePath, 'utf8').match(/^gh_handle:\s*(\S+)/m); +if (!ghHandleMatch) { + console.error('No gh_handle in profile.md frontmatter'); + process.exit(1); +} +const ghHandle = ghHandleMatch[1]; + +// ── Resolve owner/repo from the origin remote ── +const remote = execFileSync('git', ['remote', 'get-url', 'origin'], { + stdio: ['ignore', 'pipe', 'pipe'], +}) + .toString() + .trim(); +const repoMatch = remote.match(/[:/]([^:/]+)\/([^/]+?)(?:\.git)?$/); +if (!repoMatch) { + console.error(`Could not parse owner/repo from origin remote: ${remote}`); + process.exit(1); +} +const [, owner, repo] = repoMatch; + +// ── Fetch issue/PR conversation comments since 7 days ago ── +const sevenDaysAgo = new Date(); +sevenDaysAgo.setDate(sevenDaysAgo.getDate() - 7); +const since = sevenDaysAgo.toISOString(); + +console.log(`Scanning ${ghHandle}'s comments on ${owner}/${repo} since ${since}...`); + +// Default maxBuffer is 1MB which 7 days of paginated comments easily exceeds +// in an active repo (1k+ comments → multi-MB JSON). 64MB is generous and +// well below available memory; if the repo grows past that, switch to +// streaming the gh process and parsing line-by-line. +const allComments = JSON.parse( + execFileSync( + 'gh', + [ + 'api', + `repos/${owner}/${repo}/issues/comments?since=${since}&per_page=100`, + '--paginate', + ], + { stdio: ['ignore', 'pipe', 'pipe'], maxBuffer: 64 * 1024 * 1024 }, + ).toString(), +) as GhComment[]; + +// ── Pattern-match the maintainer's own review/decline comments ── +function inferVerdict(body: string): ReviewedEntry['gate_verdict'] | null { + if (body.includes('## Review Summary')) return 'review'; + if ( + body.includes("isn't a direction we're") || + body.includes('Conflicts with `direction.md') || + body.includes('direction.md §') + ) + return 'decline'; + if ( + body.includes('Could you split this') || + body.includes('Could you two coordinate') || + /split into \d+ focused PRs/.test(body) + ) + return 'needs_split'; + return null; +} + +function extractPrNumber(issueUrl: string | undefined): string | null { + if (!issueUrl) return null; + const m = issueUrl.match(/\/(\d+)$/); + return m ? m[1] : null; +} + +const inferred: Record = {}; +let scanned = 0; +let mineMatching = 0; + +for (const c of allComments) { + scanned++; + const author = c.user?.login; + if (!author || author.toLowerCase() !== ghHandle.toLowerCase()) continue; + const body = c.body ?? ''; + const verdict = inferVerdict(body); + if (!verdict) continue; + const prNumber = extractPrNumber(c.issue_url); + if (!prNumber) continue; + const createdAt = c.created_at ?? ''; + // Latest comment per PR wins (newer reviews supersede older). + if (!inferred[prNumber] || createdAt > inferred[prNumber].reviewed_at) { + inferred[prNumber] = { + reviewed_at: createdAt, + gate_verdict: verdict, + source: 'backfill', + }; + } + mineMatching++; +} + +console.log( + `Scanned ${scanned} comments. ${mineMatching} authored by ${ghHandle} matched a review/decline pattern. Unique PRs: ${Object.keys(inferred).length}.`, +); + +// ── Merge with existing reviewed-prs.json ── +// Existing entries (especially those without source: 'backfill', i.e. written +// by the workflow's record-review node) take precedence — they're more +// authoritative than pattern-matched bodies. +if (!existsSync(baseDir)) mkdirSync(baseDir, { recursive: true }); +const outPath = resolve(baseDir, 'reviewed-prs.json'); +let existing: Record = {}; +if (existsSync(outPath)) { + try { + existing = JSON.parse(readFileSync(outPath, 'utf8')); + } catch { + existing = {}; + } +} + +let added = 0; +let skipped = 0; +for (const [num, entry] of Object.entries(inferred)) { + if (existing[num]) { + skipped++; + continue; + } + existing[num] = entry; + added++; +} + +writeFileSync(outPath, JSON.stringify(existing, null, 2) + '\n'); + +console.log( + `Backfilled ${added} new entries (skipped ${skipped} that already had workflow-recorded entries). Total tracked: ${Object.keys(existing).length}.`, +); +console.log(`Written to: ${outPath}`); diff --git a/.archon/scripts/maintainer-standup-gh-data.ts b/.archon/scripts/maintainer-standup-gh-data.ts new file mode 100644 index 0000000000..53e842d8e4 --- /dev/null +++ b/.archon/scripts/maintainer-standup-gh-data.ts @@ -0,0 +1,324 @@ +#!/usr/bin/env bun +/** + * Fetches GitHub data for the maintainer-standup synthesis: all open PRs + * (light metadata), review-requested PRs, authored-by-me PRs, assigned issues, + * recent unlabeled issues, and recently-closed PRs/issues since the last run. + * + * Reads gh_handle from .archon/maintainer-standup/profile.md frontmatter. + * + * Output: JSON to stdout. + */ +import { execFileSync } from 'node:child_process'; +import { existsSync, readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; + +// execFileSync with argv arrays — avoids shell-string interpolation and the +// associated quoting hazards (esp. for handles loaded from profile.md). +function exec(file: string, args: string[]): string { + try { + return execFileSync(file, args, { stdio: ['ignore', 'pipe', 'pipe'] }).toString(); + } catch (e) { + process.stderr.write(`${file} command failed: ${file} ${args.join(' ')}\n${(e as Error).message}\n`); + return '[]'; + } +} + +function parseJson(s: string, fallback: T): T { + try { + return JSON.parse(s) as T; + } catch { + return fallback; + } +} + +// ── Load gh_handle from profile.md frontmatter ── +let ghHandle = ''; +const profilePath = resolve(process.cwd(), '.archon/maintainer-standup/profile.md'); +if (existsSync(profilePath)) { + const profile = readFileSync(profilePath, 'utf8'); + const match = profile.match(/^gh_handle:\s*(\S+)\s*$/m); + if (match) ghHandle = match[1]; +} +if (!ghHandle) { + process.stderr.write('Warning: no gh_handle found in profile.md frontmatter\n'); +} + +// ── Load prior state to scope "recently closed" lookups ── +let lastRunAt = ''; +const statePath = resolve(process.cwd(), '.archon/maintainer-standup/state.json'); +if (existsSync(statePath)) { + try { + const state = JSON.parse(readFileSync(statePath, 'utf8')) as { last_run_at?: string }; + lastRunAt = state.last_run_at ?? ''; + } catch { + // ignore corrupt state + } +} + +// ── Open PRs (full metadata for triage) ── +const prFields = [ + 'number', + 'title', + 'author', + 'labels', + 'createdAt', + 'updatedAt', + 'isDraft', + 'mergeable', + 'mergeStateStatus', + 'reviewDecision', + 'headRefName', + 'baseRefName', + 'additions', + 'deletions', + 'changedFiles', + 'reviewRequests', +].join(','); + +// `gh pr list --json` does NOT auto-paginate beyond `--limit`. 1000 is the +// practical ceiling for a single GraphQL call and gives ~15× headroom over +// today's open-PR count. The next-run-diff invariant in the synthesis +// command (observed_prs must include every entry in all_open_prs) requires +// completeness here, so we warn loudly if we ever hit the cap. +const PR_LIMIT = 1000; +const allOpenPrs = parseJson( + exec('gh', ['pr', 'list', '--state', 'open', '--limit', String(PR_LIMIT), '--json', prFields]), + [], +); +if (allOpenPrs.length === PR_LIMIT) { + process.stderr.write( + `Warning: hit --limit ${PR_LIMIT} on all_open_prs. Some PRs may be silently truncated; ` + + `next-run "resolved since last run" detection will misclassify the dropped tail. ` + + `Switch to gh api graphql --paginate when this becomes a persistent issue.\n`, + ); +} + +let reviewRequested: unknown[] = []; +let authoredByMe: unknown[] = []; +let issuesAssigned: unknown[] = []; + +if (ghHandle) { + reviewRequested = parseJson( + exec('gh', [ + 'pr', 'list', + '--search', `is:open is:pr review-requested:${ghHandle}`, + '--json', 'number,title,author,createdAt,updatedAt', + ]), + [], + ); + authoredByMe = parseJson( + exec('gh', [ + 'pr', 'list', + '--author', ghHandle, + '--state', 'open', + '--json', 'number,title,createdAt,updatedAt,reviewDecision,mergeStateStatus', + ]), + [], + ); + issuesAssigned = parseJson( + exec('gh', [ + 'issue', 'list', + '--assignee', ghHandle, + '--state', 'open', + '--json', 'number,title,labels,createdAt,updatedAt,author', + ]), + [], + ); +} + +// ── Recent unlabeled issues (last 7 days) ── +const sevenDaysAgo = new Date(); +sevenDaysAgo.setDate(sevenDaysAgo.getDate() - 7); +const sevenDaysAgoStr = sevenDaysAgo.toISOString().slice(0, 10); +const recentUnlabeledIssues = parseJson( + exec('gh', [ + 'issue', 'list', + '--state', 'open', + '--search', `no:label created:>${sevenDaysAgoStr}`, + '--json', 'number,title,createdAt,author', + '--limit', '30', + ]), + [], +); + +// ── Recently closed/merged since last run (or last 7 days as fallback) ── +const sinceDate = lastRunAt ? lastRunAt.slice(0, 10) : sevenDaysAgoStr; +const recentlyClosedPrs = parseJson( + exec('gh', [ + 'pr', 'list', + '--state', 'closed', + '--search', `closed:>${sinceDate}`, + '--json', 'number,title,author,closedAt,mergedAt,state', + '--limit', '50', + ]), + [], +); +const recentlyClosedIssues = parseJson( + exec('gh', [ + 'issue', 'list', + '--state', 'closed', + '--search', `closed:>${sinceDate}`, + '--json', 'number,title,author,closedAt,state', + '--limit', '50', + ]), + [], +); + +// ── Maintainer's recent commits on dev (what you shipped) ── +let myRecentCommits = ''; +if (ghHandle) { + const since = lastRunAt || '7 days ago'; + try { + myRecentCommits = execFileSync( + 'git', + ['log', 'origin/dev', `--since=${since}`, `--author=${ghHandle}`, '--no-decorate', '--format=%h %s'], + { stdio: ['ignore', 'pipe', 'pipe'] }, + ).toString(); + } catch { + myRecentCommits = ''; + } +} + +// ── Replies since last run (contributor comments on PRs/issues) ── +// Fetches all conversation + inline review comments since the last run, +// filters out the maintainer's own comments, and groups by PR/issue number. +// Lets the synthesizer surface "@author replied on PR #N" items for the +// maintainer to triage today. +// +// GitHub endpoints: +// - /repos/{o}/{r}/issues/comments conversation comments on PRs and issues +// (same endpoint; issue_url disambiguates) +// - /repos/{o}/{r}/pulls/comments inline code-review comments +// Both accept ?since=ISO8601. +type GhComment = { + user?: { login?: string }; + created_at?: string; + body?: string; + html_url?: string; + issue_url?: string; + pull_request_url?: string; +}; + +type GroupedReply = { + number: number; + kind: 'issue' | 'pr_conversation' | 'pr_review'; + comments: { + author: string; + created_at: string; + body_excerpt: string; + url: string; + }[]; +}; + +function ownerRepo(): { owner: string; repo: string } | null { + try { + const url = execFileSync('git', ['remote', 'get-url', 'origin'], { + stdio: ['ignore', 'pipe', 'pipe'], + }) + .toString() + .trim(); + // ssh: git@github.com:owner/repo.git ; https: https://github.com/owner/repo.git + const m = url.match(/[:/]([^:/]+)\/([^/]+?)(?:\.git)?$/); + if (!m) return null; + return { owner: m[1], repo: m[2] }; + } catch { + return null; + } +} + +function extractNumber(url: string | undefined): number | null { + if (!url) return null; + const m = url.match(/\/(?:issues|pulls)\/(\d+)$/); + return m ? Number(m[1]) : null; +} + +const repliesByNumber: Record = {}; +const repoIds = ownerRepo(); + +if (repoIds && lastRunAt) { + const openPrNumbers = new Set( + (allOpenPrs as Array<{ number?: number }>) + .map((p) => p.number) + .filter((n): n is number => typeof n === 'number'), + ); + + const addComment = ( + num: number, + kind: GroupedReply['kind'], + c: GhComment, + fallbackUrl: string, + ): void => { + const author = c.user?.login; + if (!author) return; + if (ghHandle && author.toLowerCase() === ghHandle.toLowerCase()) return; + // Skip GitHub bots — coderabbitai, codex-connector, dependabot, etc. The + // "[bot]" suffix is the canonical GitHub convention for bot accounts and + // is reliable across all bot integrations. Maintainer wants human replies + // worth responding to, not the constant churn of automated review tooling. + if (author.endsWith('[bot]')) return; + if (!repliesByNumber[num]) repliesByNumber[num] = { number: num, kind, comments: [] }; + // Upgrade kind toward pr_review (most actionable) when both arrive on the same PR. + if (kind === 'pr_review') repliesByNumber[num].kind = 'pr_review'; + repliesByNumber[num].comments.push({ + author, + created_at: c.created_at ?? '', + body_excerpt: (c.body ?? '').slice(0, 240).replace(/\s+/g, ' ').trim(), + url: c.html_url ?? fallbackUrl, + }); + }; + + // /issues/comments covers PR + issue conversations under one endpoint. + // Disambiguate by checking whether the parsed number is an open PR. + const issueComments = parseJson( + exec('gh', [ + 'api', + `repos/${repoIds.owner}/${repoIds.repo}/issues/comments?since=${lastRunAt}&per_page=100`, + '--paginate', + ]), + [], + ); + for (const c of issueComments) { + const num = extractNumber(c.issue_url); + if (!num) continue; + const kind: GroupedReply['kind'] = openPrNumbers.has(num) ? 'pr_conversation' : 'issue'; + addComment(num, kind, c, c.issue_url ?? ''); + } + + // /pulls/comments are inline code-review comments — most specific signal, + // usually need a code-level response. + const reviewComments = parseJson( + exec('gh', [ + 'api', + `repos/${repoIds.owner}/${repoIds.repo}/pulls/comments?since=${lastRunAt}&per_page=100`, + '--paginate', + ]), + [], + ); + for (const c of reviewComments) { + const num = extractNumber(c.pull_request_url); + if (!num) continue; + addComment(num, 'pr_review', c, c.pull_request_url ?? ''); + } +} + +const repliesSinceLastRun = Object.values(repliesByNumber).sort((a, b) => { + const aLatest = a.comments[a.comments.length - 1]?.created_at ?? ''; + const bLatest = b.comments[b.comments.length - 1]?.created_at ?? ''; + return bLatest.localeCompare(aLatest); // newest first +}); + +console.log( + JSON.stringify({ + gh_handle: ghHandle, + since_date: sinceDate, + all_open_prs: allOpenPrs, + review_requested: reviewRequested, + authored_by_me: authoredByMe, + issues_assigned: issuesAssigned, + recent_unlabeled_issues: recentUnlabeledIssues, + recently_closed_prs: recentlyClosedPrs, + recently_closed_issues: recentlyClosedIssues, + my_recent_commits: myRecentCommits, + replies_since_last_run: repliesSinceLastRun, + }), +); diff --git a/.archon/scripts/maintainer-standup-git-status.ts b/.archon/scripts/maintainer-standup-git-status.ts new file mode 100644 index 0000000000..9076c0eb0a --- /dev/null +++ b/.archon/scripts/maintainer-standup-git-status.ts @@ -0,0 +1,80 @@ +#!/usr/bin/env bun +/** + * Fetches origin/dev, optionally fast-forwards local dev, and reports new + * commits + diff stat since the last run's recorded SHA. + * + * Output: JSON to stdout with shape: + * { + * current_dev_sha, prior_dev_sha, current_branch, is_dirty, + * pull_status: 'pulled' | 'fetch_only' | 'pull_failed' | 'not_on_dev' | 'dirty', + * new_commits, diff_stat + * } + */ +import { execFileSync } from 'node:child_process'; +import { existsSync, readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; + +// execFileSync (argv array, no shell) — defense-in-depth for git invocations. +// All args are hardcoded literals or values from `git` output (SHAs); using +// execFileSync removes any need to reason about shell metacharacters. +function git(args: string[]): { stdout: string; ok: boolean } { + try { + const out = execFileSync('git', args, { stdio: ['ignore', 'pipe', 'pipe'] }).toString(); + return { stdout: out, ok: true }; + } catch { + return { stdout: '', ok: false }; + } +} + +let priorSha = ''; +const stateFile = resolve(process.cwd(), '.archon/maintainer-standup/state.json'); +if (existsSync(stateFile)) { + try { + const state = JSON.parse(readFileSync(stateFile, 'utf8')) as { last_dev_sha?: string }; + priorSha = state.last_dev_sha ?? ''; + } catch { + // ignore corrupt state — first-run-like behavior + } +} + +git(['fetch', 'origin', 'dev']); + +const currentBranch = git(['rev-parse', '--abbrev-ref', 'HEAD']).stdout.trim(); +const isDirty = git(['status', '--porcelain']).stdout.trim().length > 0; + +let pullStatus: 'pulled' | 'fetch_only' | 'pull_failed' | 'not_on_dev' | 'dirty'; +if (currentBranch !== 'dev') { + pullStatus = 'not_on_dev'; +} else if (isDirty) { + pullStatus = 'dirty'; +} else { + const result = git(['pull', '--ff-only', 'origin', 'dev']); + pullStatus = result.ok ? 'pulled' : 'pull_failed'; +} + +const currentDevSha = git(['rev-parse', 'origin/dev']).stdout.trim(); + +let newCommits = ''; +let diffStat = ''; +if (priorSha && priorSha !== currentDevSha) { + // %h short SHA, %an author name, %s subject + const log = git(['log', `${priorSha}..origin/dev`, '--no-decorate', '--format=%h %an: %s']); + if (log.ok) { + newCommits = log.stdout; + diffStat = git(['diff', '--stat', `${priorSha}..origin/dev`]).stdout; + } else { + newCommits = '(prior SHA not found locally — full diff unavailable)'; + } +} + +console.log( + JSON.stringify({ + current_dev_sha: currentDevSha, + prior_dev_sha: priorSha, + current_branch: currentBranch, + is_dirty: isDirty, + pull_status: pullStatus, + new_commits: newCommits, + diff_stat: diffStat, + }), +); diff --git a/.archon/scripts/maintainer-standup-read-context.ts b/.archon/scripts/maintainer-standup-read-context.ts new file mode 100644 index 0000000000..1d2173ecee --- /dev/null +++ b/.archon/scripts/maintainer-standup-read-context.ts @@ -0,0 +1,82 @@ +#!/usr/bin/env bun +/** + * Loads local context for the maintainer-standup synthesis: direction.md + * (committed), profile.md (per-maintainer), prior state.json, and the most + * recent N briefs. + * + * Output: JSON to stdout. + */ +import { existsSync, readFileSync, readdirSync } from 'node:fs'; +import { resolve } from 'node:path'; + +const RECENT_BRIEFS_LIMIT = 3; + +const baseDir = resolve(process.cwd(), '.archon/maintainer-standup'); + +const directionPath = resolve(baseDir, 'direction.md'); +const direction = existsSync(directionPath) ? readFileSync(directionPath, 'utf8') : ''; + +const profilePath = resolve(baseDir, 'profile.md'); +const profile = existsSync(profilePath) ? readFileSync(profilePath, 'utf8') : ''; + +const statePath = resolve(baseDir, 'state.json'); +let priorState: unknown = null; +if (existsSync(statePath)) { + try { + priorState = JSON.parse(readFileSync(statePath, 'utf8')); + } catch { + priorState = null; + } +} + +const briefsDir = resolve(baseDir, 'briefs'); +const recentBriefs: { date: string; content: string }[] = []; +if (existsSync(briefsDir)) { + const files = readdirSync(briefsDir) + .filter((f) => f.endsWith('.md')) + .sort() + .reverse() + .slice(0, RECENT_BRIEFS_LIMIT); + for (const f of files) { + recentBriefs.push({ + date: f.replace(/\.md$/, ''), + content: readFileSync(resolve(briefsDir, f), 'utf8'), + }); + } +} + +// Deterministic clock — emit today's local date + a precomputed 3-day-out +// deadline so downstream prompts don't have to do calendar arithmetic +// (LLMs are unreliable at it) and don't anchor to stale prior_state.last_run_at +// (which can produce past deadlines on long gaps between runs). +const todayDate = new Date(); +const today = todayDate.toLocaleDateString('sv-SE'); // YYYY-MM-DD local +const deadlineDate = new Date(todayDate); +deadlineDate.setDate(deadlineDate.getDate() + 3); +const deadline_3d = deadlineDate.toLocaleDateString('sv-SE'); + +// Cross-workflow memory: which PRs has maintainer-review-pr already triaged? +// Written by maintainer-review-pr's `record-review` node; surfaced here so +// the standup synthesizer can mark "✓ reviewed Nd ago" next to P1-P4 entries +// and flag staleness when the contributor pushes after a prior review. +const reviewedPrsPath = resolve(baseDir, 'reviewed-prs.json'); +let reviewedPrs: unknown = {}; +if (existsSync(reviewedPrsPath)) { + try { + reviewedPrs = JSON.parse(readFileSync(reviewedPrsPath, 'utf8')); + } catch { + reviewedPrs = {}; + } +} + +console.log( + JSON.stringify({ + direction, + profile, + prior_state: priorState, + recent_briefs: recentBriefs, + today, + deadline_3d, + reviewed_prs: reviewedPrs, + }), +); diff --git a/.archon/workflows/defaults/archon-adversarial-dev.yaml b/.archon/workflows/defaults/archon-adversarial-dev.yaml index 68722c8b1a..bea7117f4a 100644 --- a/.archon/workflows/defaults/archon-adversarial-dev.yaml +++ b/.archon/workflows/defaults/archon-adversarial-dev.yaml @@ -117,7 +117,7 @@ nodes: - id: adversarial-sprint depends_on: [init-workspace] idle_timeout: 600000 - model: claude-opus-4-6[1m] + model: opus[1m] loop: prompt: | # Adversarial Development — Sprint Loop diff --git a/.archon/workflows/defaults/archon-feature-development.yaml b/.archon/workflows/defaults/archon-feature-development.yaml index 6d0747700d..a2ab7da87d 100644 --- a/.archon/workflows/defaults/archon-feature-development.yaml +++ b/.archon/workflows/defaults/archon-feature-development.yaml @@ -8,7 +8,7 @@ description: | nodes: - id: implement command: archon-implement - model: claude-opus-4-6[1m] + model: opus[1m] - id: create-pr command: archon-create-pr diff --git a/.archon/workflows/defaults/archon-fix-github-issue.yaml b/.archon/workflows/defaults/archon-fix-github-issue.yaml index 12ad675de9..a6fd0d235c 100644 --- a/.archon/workflows/defaults/archon-fix-github-issue.yaml +++ b/.archon/workflows/defaults/archon-fix-github-issue.yaml @@ -133,7 +133,7 @@ nodes: command: archon-fix-issue depends_on: [bridge-artifacts] context: fresh - model: claude-opus-4-6[1m] + model: opus[1m] # ═══════════════════════════════════════════════════════════════ # PHASE 5: VALIDATE diff --git a/.archon/workflows/defaults/archon-idea-to-pr.yaml b/.archon/workflows/defaults/archon-idea-to-pr.yaml index 9329c55021..1c2fe738d3 100644 --- a/.archon/workflows/defaults/archon-idea-to-pr.yaml +++ b/.archon/workflows/defaults/archon-idea-to-pr.yaml @@ -52,7 +52,7 @@ nodes: command: archon-implement-tasks depends_on: [confirm-plan] context: fresh - model: claude-opus-4-6[1m] + model: opus[1m] # ═══════════════════════════════════════════════════════════════════ # PHASE 4: VALIDATE diff --git a/.archon/workflows/defaults/archon-piv-loop.yaml b/.archon/workflows/defaults/archon-piv-loop.yaml index 7227900c2f..b544814e6b 100644 --- a/.archon/workflows/defaults/archon-piv-loop.yaml +++ b/.archon/workflows/defaults/archon-piv-loop.yaml @@ -198,14 +198,10 @@ nodes: 3. **Read example test files** — understand testing patterns 4. **Check for any recent changes** — `git log --oneline -10` - ## Step 2: Determine Plan Location + ## Step 2: Plan File Location - Generate a kebab-case slug from the feature name. - Save to `.claude/archon/plans/{slug}.plan.md`. - - ```bash - mkdir -p .claude/archon/plans - ``` + Save the plan to `$ARTIFACTS_DIR/plan.md`. + The directory already exists (pre-created by the workflow executor). ## Step 3: Write the Plan @@ -282,7 +278,7 @@ nodes: ``` ## Plan Created - **File**: `.claude/archon/plans/{slug}.plan.md` + **File**: `$ARTIFACTS_DIR/plan.md` **Tasks**: {count} **Files to change**: {count} @@ -310,13 +306,9 @@ nodes: --- - ## Step 1: Find and Read the Plan + ## Step 1: Read the Plan - ```bash - ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1 - ``` - - Read the entire plan file. Also read CLAUDE.md for conventions. + Read `$ARTIFACTS_DIR/plan.md` and CLAUDE.md for conventions. ## Step 2: Process Feedback @@ -375,10 +367,10 @@ nodes: bash: | set -e - PLAN_FILE=$(ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1) + PLAN_FILE="$ARTIFACTS_DIR/plan.md" - if [ -z "$PLAN_FILE" ]; then - echo "ERROR: No plan file found in .claude/archon/plans/" + if [ ! -f "$PLAN_FILE" ]; then + echo "ERROR: No plan file found at $ARTIFACTS_DIR/plan.md" exit 1 fi @@ -403,8 +395,12 @@ nodes: echo "" echo "=== PLAN_END ===" - TASK_COUNT=$(grep -c "^### Task [0-9]" "$PLAN_FILE" || true) - echo "TASK_COUNT=${TASK_COUNT:-0}" + TASK_COUNT=$(grep -c "^### Task [0-9]" "$PLAN_FILE" 2>/dev/null || echo "0") + if [ "$TASK_COUNT" -eq 0 ]; then + echo "ERROR: No '### Task N:' sections found in $PLAN_FILE. Plan may be malformed." + exit 1 + fi + echo "TASK_COUNT=${TASK_COUNT}" # ═══════════════════════════════════════════════════════════════ # PHASE 3b: IMPLEMENT — Task-by-Task Loop (Ralph pattern) @@ -447,7 +443,7 @@ nodes: may have changed things. **You MUST re-read from disk:** 1. **Read the plan file** — your implementation guide - 2. **Read progress tracking** — check if `.claude/archon/plans/progress.txt` exists + 2. **Read progress tracking** — check if `$ARTIFACTS_DIR/progress.txt` exists 3. **Read CLAUDE.md** — project conventions and constraints ### 0.3 Check Git State @@ -511,7 +507,7 @@ nodes: )" ``` - Track progress in `.claude/archon/plans/progress.txt`: + Track progress in `$ARTIFACTS_DIR/progress.txt`: ``` ## Task {N}: {title} — COMPLETED Date: {ISO date} @@ -552,11 +548,9 @@ nodes: --- - ## Step 1: Find and Read the Plan + ## Step 1: Read the Plan - ```bash - ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1 - ``` + Read `$ARTIFACTS_DIR/plan.md` to understand the intended implementation. ## Step 2: Review All Changes @@ -581,7 +575,7 @@ nodes: Fix type errors, lint warnings, missing imports, formatting. Commit any fixes: ```bash - git add -A && git commit -m "fix: address code review findings" 2>/dev/null || true + git add -A && git commit -m "fix: address code review findings" || true ``` ## Step 6: Present Review @@ -627,11 +621,7 @@ nodes: ## Step 1: Read Context - ```bash - ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1 - ``` - - Read the plan file and CLAUDE.md for conventions. + Read `$ARTIFACTS_DIR/plan.md` and CLAUDE.md for conventions. ## Step 2: Process Feedback @@ -710,7 +700,7 @@ nodes: ## Step 1: Push Changes ```bash - git push -u origin HEAD 2>&1 || true + git push -u origin HEAD 2>&1 || echo "WARNING: Push failed — verify remote authentication and branch state before creating the PR." ``` ## Step 2: Generate Summary @@ -720,7 +710,7 @@ nodes: git diff --stat $(git merge-base HEAD $BASE_BRANCH)..HEAD ``` - Read the plan file and progress tracking for context. + Read `$ARTIFACTS_DIR/plan.md` and `$ARTIFACTS_DIR/progress.txt` for context. ## Step 3: Create PR (if not already created) diff --git a/.archon/workflows/defaults/archon-plan-to-pr.yaml b/.archon/workflows/defaults/archon-plan-to-pr.yaml index 067c1a818e..83dbbebd88 100644 --- a/.archon/workflows/defaults/archon-plan-to-pr.yaml +++ b/.archon/workflows/defaults/archon-plan-to-pr.yaml @@ -42,7 +42,7 @@ nodes: command: archon-implement-tasks depends_on: [confirm-plan] context: fresh - model: claude-opus-4-6[1m] + model: opus[1m] # ═══════════════════════════════════════════════════════════════════ # PHASE 4: VALIDATE diff --git a/.archon/workflows/defaults/archon-ralph-dag.yaml b/.archon/workflows/defaults/archon-ralph-dag.yaml index 5c0d7c9099..5482fd5a15 100644 --- a/.archon/workflows/defaults/archon-ralph-dag.yaml +++ b/.archon/workflows/defaults/archon-ralph-dag.yaml @@ -189,7 +189,7 @@ nodes: - id: implement depends_on: [validate-prd] idle_timeout: 600000 - model: claude-opus-4-6[1m] + model: opus[1m] loop: prompt: | # Ralph Agent — Autonomous Story Implementation diff --git a/.archon/workflows/defaults/archon-refactor-safely.yaml b/.archon/workflows/defaults/archon-refactor-safely.yaml index 56bc96ac36..81e4cb5f09 100644 --- a/.archon/workflows/defaults/archon-refactor-safely.yaml +++ b/.archon/workflows/defaults/archon-refactor-safely.yaml @@ -207,7 +207,7 @@ nodes: # ═══════════════════════════════════════════════════════════════ - id: execute-refactor - model: claude-opus-4-6[1m] + model: opus[1m] prompt: | You are executing a refactoring plan with strict safety guardrails. diff --git a/.archon/workflows/defaults/archon-workflow-builder.yaml b/.archon/workflows/defaults/archon-workflow-builder.yaml index a311b8d970..a12758b0ec 100644 --- a/.archon/workflows/defaults/archon-workflow-builder.yaml +++ b/.archon/workflows/defaults/archon-workflow-builder.yaml @@ -61,7 +61,8 @@ nodes: 5. Whether this should be a simple DAG or include a loop node Be specific and concrete. Each proposed node should have a clear type - (bash, prompt, command, or loop) and a one-line description of what it does. + (bash, prompt, command, script, loop, or approval) and a one-line + description of what it does. model: haiku allowed_tools: [] output_format: @@ -115,7 +116,7 @@ nodes: nodes: - id: node-id-kebab-case - # Choose ONE of: prompt, bash, command, loop + # Choose ONE of: prompt, bash, command, script, loop, approval # --- prompt node (AI-executed) --- prompt: | @@ -131,6 +132,17 @@ nodes: # --- command node (references a .archon/commands/ file) --- command: command-name + # --- script node (TypeScript via bun, or Python via uv — no AI, stdout = $.output) --- + # Use for deterministic data transforms the shell would mangle (JSON parsing, etc.) + script: | + // JSON is valid JS expression syntax — assign directly (String.raw breaks on backticks) + const data = $other-node.output; + console.log(JSON.stringify({ count: data.items.length })); + runtime: bun # required: 'bun' (.ts/.js) or 'uv' (.py) + # deps: [requests] # uv only + # Or reference a named script in .archon/scripts/: + # script: extract-labels # no extension; bun resolves .ts/.js, uv resolves .py + # --- loop node (iterative AI execution) --- loop: prompt: | @@ -139,17 +151,22 @@ nodes: max_iterations: 10 fresh_context: true # optional: reset context each iteration + # --- approval node (human gate — pauses workflow) --- + approval: + message: "Review the plan above. Approve to continue." + # capture_response: true # store reviewer comment as $.output + # Common options for all node types: depends_on: [other-node-id] # DAG edges when: "$.output == 'value'" # conditional execution trigger_rule: all_success # all_success | one_success | all_done - timeout: 120000 # ms, for bash nodes + timeout: 120000 # ms, for bash and script nodes ``` ## Variable Reference - `$ARGUMENTS` — user's input text - `$ARTIFACTS_DIR` — pre-created directory for workflow artifacts - - `$.output` — stdout from a bash node or AI response from a prompt node + - `$.output` — stdout from a bash/script node or AI response from a prompt node - `$.output.field` — JSON field from a node with output_format - `$BASE_BRANCH` — base git branch @@ -158,12 +175,20 @@ nodes: 2. The `description:` MUST follow the "Use when / Triggers / Does / NOT for" pattern 3. Every node MUST have a unique kebab-case `id` 4. Use `depends_on` to define execution order - 5. Use `bash` nodes for deterministic operations (file checks, git commands, installs) - 6. Use `prompt` nodes for AI reasoning tasks - 7. Use `output_format` on prompt nodes when downstream nodes need structured data - 8. Use `allowed_tools: []` on classification/analysis nodes that don't need tools - 9. Use `denied_tools: [Edit, Bash]` when a node should only use Write (not edit existing files) - 10. Prefer `model: haiku` for simple classification tasks to save cost + 5. Use `bash` nodes for deterministic shell operations (file checks, git commands, installs) + 6. Use `script` nodes for typed data transforms (TypeScript JSON parsing, Python with deps) + — stdout is captured as output, stderr is forwarded as a warning. + $nodeId.output is NOT shell-quoted in script bodies. + - **TypeScript/bun**: assign directly — `const data = $nodeId.output;` + (JSON is valid JS expression syntax; avoid String.raw — it breaks on backticks) + - **Python/uv**: use json.loads — `import json; data = json.loads("""$nodeId.output""")` + Never interpolate into shell syntax. + 7. Use `prompt` nodes for AI reasoning tasks + 8. Use `approval` nodes to pause for human review at risky gates (plan→execute boundary, destructive actions) + 9. Use `output_format` on prompt nodes when downstream nodes need structured data + 10. Use `allowed_tools: []` on classification/analysis nodes that don't need tools + 11. Use `denied_tools: [Edit, Bash]` when a node should only use Write (not edit existing files) + 12. Prefer `model: haiku` for simple classification tasks to save cost ## Output diff --git a/.archon/workflows/experimental/archon-fix-github-issue-experimental.yaml b/.archon/workflows/experimental/archon-fix-github-issue-experimental.yaml new file mode 100644 index 0000000000..f94d496d46 --- /dev/null +++ b/.archon/workflows/experimental/archon-fix-github-issue-experimental.yaml @@ -0,0 +1,440 @@ +name: archon-fix-github-issue-experimental +description: | + EXPERIMENTAL: Path A variant of archon-fix-github-issue. Same DAG shape — same nodes, + same dependencies, same command files. Additions: + - Two extra classifier fields: `scope` (small/medium/large) and `needs_external_research`. + - A new `smoke-validate` node that checks the issue's concrete claims (file paths, + line numbers, symbols, repro commands) against the current codebase before any + skip gate fires. Every skip gate has a `claims_accurate == 'false'` override so an + inaccurate issue cannot cause a skip. + - `when:` gates on web-research and 4 reviewers so small, claim-verified issues + skip them. For medium/large issues or when the issue claims don't match the code, + behavior is identical to the full workflow. + + Skip gates (all overridden when smoke-validate flags the issue as inaccurate): + - web-research → runs when needs_external_research=='true' OR smoke=='false' + - error-handling → runs when review-classify says yes AND (scope!='small' OR smoke=='false') + - test-coverage → same as error-handling + - comment-quality → same as error-handling + - docs-impact → same as error-handling + + Always runs (same as full): classify, smoke-validate, investigate/plan, bridge-artifacts, + implement, validate, create-pr, review-scope, review-classify, code-review, synthesize, + self-fix, simplify, report. + + Use when: User wants to FIX, RESOLVE, or IMPLEMENT a solution for a GitHub issue. + Triggers: "fix this issue", "implement issue #123", "resolve this bug", "fix it", + "fix issue", "resolve issue", "fix #123". + NOT for: Comprehensive multi-agent reviews (use archon-issue-review-full), + questions about issues, CI failures, PR reviews, general exploration. + + DAG workflow that: + 1. Classifies the issue (bug/feature/enhancement/etc) + 2. Researches context (web research + codebase exploration via investigate/plan) + 3. Routes to investigate (bugs) or plan (features) based on classification + 4. Implements the fix/feature with validation + 5. Creates a draft PR using the repo's PR template + 6. Runs smart review (always code review + CLAUDE.md check, conditional additional agents) + 7. Aggressively self-fixes all findings (tests, docs, error handling) + 8. Simplifies changed code (implements fixes directly, not just reports) + 9. Reports results back to the GitHub issue with follow-up suggestions + +provider: claude +model: sonnet + +nodes: + # ═══════════════════════════════════════════════════════════════ + # PHASE 1: FETCH & CLASSIFY + # ═══════════════════════════════════════════════════════════════ + + - id: extract-issue-number + prompt: | + Find the GitHub issue number for this request. + + Request: $ARGUMENTS + + Rules: + - If the message contains an explicit issue number (e.g., "#709", "issue 709", "709"), extract that number. + - If the message is ambiguous (e.g., "fix the SQLite timestamp bug"), use `gh issue list` to search for matching issues and pick the best match. + + CRITICAL: Your final output must be ONLY the bare number with no quotes, no markdown, no explanation. Example correct output: 709 + + - id: fetch-issue + bash: | + # Strip quotes, whitespace, markdown backticks from AI output + ISSUE_NUM=$(echo "$extract-issue-number.output" | tr -d "'\"\`\n " | grep -oE '[0-9]+' | head -1) + if [ -z "$ISSUE_NUM" ]; then + echo "Failed to extract issue number from: $extract-issue-number.output" >&2 + exit 1 + fi + gh issue view "$ISSUE_NUM" --json title,body,labels,comments,state,url,author + depends_on: [extract-issue-number] + + - id: classify + prompt: | + You are an issue classifier. Analyze the GitHub issue below and determine: + (1) its type, (2) its scope, and (3) whether external web research is needed. + + ## Issue Content + + $fetch-issue.output + + ## Type + + | Type | Indicators | + |------|------------| + | bug | "broken", "error", "crash", "doesn't work", stack traces, regression | + | feature | "add", "new", "support", "would be nice", net-new capability | + | enhancement | "improve", "better", "update existing", "extend", incremental improvement | + | refactor | "clean up", "simplify", "reorganize", "restructure" | + | chore | "update deps", "upgrade", "maintenance", "CI/CD" | + | documentation | "docs", "readme", "clarify", "examples" | + + ## Scope + + Estimate how much code the fix is likely to touch. The issue body is your best + signal — reporter-pointed file paths, length of the reproducer, how specific the + request is. When uncertain, round UP (pick the larger scope). + + | Scope | Indicators | + |-------|------------| + | small | 1-3 files, single subsystem, clear from the body. Typos, one-line bugs, isolated refactors, doc fixes, small enhancements pointing at specific code. | + | medium | 3-10 files, one or two subsystems, some investigation needed. Most features, non-trivial bugs, refactors that cross a few files. | + | large | 10+ files, cross-subsystem, vague/exploratory, or requires real codebase discovery before a fix direction is clear. | + + ## External Research + + Does this issue need external (web) research to fix correctly? Say "true" only if + the fix depends on specifics of an external library, API, protocol, or standard + that are NOT already apparent from the codebase. Internal plumbing, refactoring, + obvious bug fixes, and issues where the reporter already cited the relevant docs + → "false". + + Provide reasoning that covers all three decisions. + depends_on: [fetch-issue] + model: haiku + allowed_tools: [] + output_format: + type: object + properties: + issue_type: + type: string + enum: ["bug", "feature", "enhancement", "refactor", "chore", "documentation"] + title: + type: string + scope: + type: string + enum: ["small", "medium", "large"] + needs_external_research: + type: string + enum: ["true", "false"] + reasoning: + type: string + required: [issue_type, title, scope, needs_external_research, reasoning] + + # ═══════════════════════════════════════════════════════════════ + # PHASE 1.5: SMOKE-VALIDATE + # Verifies that the issue's concrete claims (file paths, line numbers, + # symbols, repro commands) match the current codebase. Its `claims_accurate` + # verdict gates every skip decision downstream — if the issue body is + # inaccurate, the workflow falls back to the full pipeline. + # ═══════════════════════════════════════════════════════════════ + + - id: smoke-validate + prompt: | + You are a smoke validator. Your job: verify that the issue's claims about the + code are ACCURATE, so downstream skip decisions rest on a reliable foundation. + + ## Context + + ### Issue content + $fetch-issue.output + + ### Classifier verdict + $classify.output + + ## Your Task + + Extract the concrete, verifiable claims from the issue body and comments: + - File paths mentioned (e.g. "packages/core/src/foo.ts") + - Line numbers or specific code snippets quoted + - Function, class, type, or symbol names referenced + - Reproduction commands (e.g. "run bun test X") + + Then verify each concrete claim against the current codebase — TARGETED checks, + no Explore sub-agent: + - Use the Read tool on cited file paths. Confirm the file exists. + - If a line or region is cited, Read it and check the described code is there. + - If a symbol is cited, `grep -rn "" packages/` to confirm it exists. + - If a repro command is cited, check `package.json` / the referenced file to + confirm the command is plausible. Do NOT execute it. + + ## Budget + + Spend at most ~30 seconds on this. Check the 2-3 most concrete claims — the + ones the fix most likely hinges on. Don't exhaustively verify every mention. + Prefer false-negative safety (flag inaccurate when uncertain) over + false-positive (risking a skip on shaky evidence). + + If the issue has NO concrete claims (purely descriptive — "feature X is broken", + no file paths, no line numbers, no symbols), default to `claims_accurate: "false"`. + Vibes aren't a reliable foundation for skipping work. + + ## Output + + Set `claims_accurate`: + - "true": The concrete claims you checked match the current code. The issue body + is a reliable spec — downstream gates can trust the classifier's skip verdict. + - "false": One or more claims don't match reality — cited file doesn't exist, the + line doesn't contain the described code, the symbol was renamed/removed, the + repro command doesn't fit the project. The issue body is NOT a reliable + foundation for skipping. Downstream gates will fall back to the full pipeline + (research + all review agents). + + In `reasoning`, list exactly what you checked and what you found. + depends_on: [classify] + context: fresh + output_format: + type: object + properties: + claims_accurate: + type: string + enum: ["true", "false"] + reasoning: + type: string + required: [claims_accurate, reasoning] + + # ═══════════════════════════════════════════════════════════════ + # PHASE 2: RESEARCH (parallel with PR template fetch) + # ═══════════════════════════════════════════════════════════════ + + - id: web-research + command: archon-web-research + depends_on: [classify, smoke-validate] + # Runs when research is flagged OR smoke-validate finds the issue unreliable (fallback) + when: "$classify.output.needs_external_research == 'true' || $smoke-validate.output.claims_accurate == 'false'" + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 3: INVESTIGATE (bugs) / PLAN (features) + # ═══════════════════════════════════════════════════════════════ + + - id: investigate + command: archon-investigate-issue + depends_on: [classify, web-research] + when: "$classify.output.issue_type == 'bug'" + # Allow web-research to be skipped (needs_external_research == 'false') without blocking + trigger_rule: none_failed_min_one_success + context: fresh + + - id: plan + command: archon-create-plan + depends_on: [classify, web-research] + when: "$classify.output.issue_type != 'bug'" + # Allow web-research to be skipped (needs_external_research == 'false') without blocking + trigger_rule: none_failed_min_one_success + context: fresh + + # Bridge: ensure investigation.md exists for the implement step + # archon-fix-issue reads from $ARTIFACTS_DIR/investigation.md + # archon-create-plan writes to $ARTIFACTS_DIR/plan.md + # This node copies plan.md → investigation.md when the plan path was taken + - id: bridge-artifacts + bash: | + if [ -f "$ARTIFACTS_DIR/plan.md" ] && [ ! -f "$ARTIFACTS_DIR/investigation.md" ]; then + cp "$ARTIFACTS_DIR/plan.md" "$ARTIFACTS_DIR/investigation.md" + echo "Bridged plan.md to investigation.md for implement step" + elif [ -f "$ARTIFACTS_DIR/investigation.md" ]; then + echo "investigation.md exists from investigate step" + else + echo "WARNING: No investigation.md or plan.md found — implement may fail" + fi + depends_on: [investigate, plan] + trigger_rule: one_success + + # ═══════════════════════════════════════════════════════════════ + # PHASE 4: IMPLEMENT + # ═══════════════════════════════════════════════════════════════ + + - id: implement + command: archon-fix-issue + depends_on: [bridge-artifacts] + context: fresh + model: opus[1m] + + # ═══════════════════════════════════════════════════════════════ + # PHASE 5: VALIDATE + # ═══════════════════════════════════════════════════════════════ + + - id: validate + command: archon-validate + depends_on: [implement] + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 6: CREATE DRAFT PR + # ═══════════════════════════════════════════════════════════════ + + - id: create-pr + prompt: | + Create a draft pull request for the current branch. + + ## Context + + - **Issue**: $ARGUMENTS + - **Classification**: $classify.output + - **Issue title**: $classify.output.title + + ## Instructions + + 1. Check git status — ensure all changes are committed. If uncommitted changes exist, stage and commit them. + 2. Push the branch: `git push -u origin HEAD` + 3. Read implementation artifacts from `$ARTIFACTS_DIR/` for context: + - `$ARTIFACTS_DIR/investigation.md` or `$ARTIFACTS_DIR/plan.md` + - `$ARTIFACTS_DIR/implementation.md` + - `$ARTIFACTS_DIR/validation.md` + 4. Check if a PR already exists for this branch: `gh pr list --head $(git branch --show-current)` + - If PR exists, skip creation and capture its number + 5. Look for the project's PR template at `.github/pull_request_template.md`, `.github/PULL_REQUEST_TEMPLATE.md`, or `docs/PULL_REQUEST_TEMPLATE.md`. Read whichever one exists. + 6. Create a DRAFT PR: `gh pr create --draft --base $BASE_BRANCH` + - Title: concise, imperative mood, under 70 chars + - Body: if a PR template was found, fill in **every section** with details from the artifacts. Don't skip sections or leave placeholders. If no template, write a body with summary, changes, validation evidence, and `Fixes #...`. + - Link to issue: include `Fixes #...` or `Closes #...` + 7. Capture PR identifiers: + ```bash + PR_NUMBER=$(gh pr view --json number -q '.number') + echo "$PR_NUMBER" > "$ARTIFACTS_DIR/.pr-number" + PR_URL=$(gh pr view --json url -q '.url') + echo "$PR_URL" > "$ARTIFACTS_DIR/.pr-url" + ``` + depends_on: [validate] + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 7: REVIEW + # ═══════════════════════════════════════════════════════════════ + + - id: review-scope + command: archon-pr-review-scope + depends_on: [create-pr] + context: fresh + + - id: review-classify + prompt: | + You are a PR review classifier. Analyze the PR scope and determine + which review agents should run. + + ## PR Scope + + $review-scope.output + + ## Rules + + - **Code review**: ALWAYS run. This is mandatory for every PR. It also checks + the PR against CLAUDE.md rules and project conventions. + - **Error handling**: Run if the diff touches code with try/catch, error handling, + async/await, or adds new failure paths. + - **Test coverage**: Run if the diff touches source code (not just tests, docs, or config). + - **Comment quality**: Run if the diff adds or modifies comments, docstrings, JSDoc, + or significant documentation within code files. + - **Docs impact**: Run if the diff adds/removes/renames public APIs, commands, CLI flags, + environment variables, or user-facing features. + + Provide your reasoning for each decision. + depends_on: [review-scope] + model: haiku + allowed_tools: [] + context: fresh + output_format: + type: object + properties: + run_code_review: + type: string + enum: ["true", "false"] + run_error_handling: + type: string + enum: ["true", "false"] + run_test_coverage: + type: string + enum: ["true", "false"] + run_comment_quality: + type: string + enum: ["true", "false"] + run_docs_impact: + type: string + enum: ["true", "false"] + reasoning: + type: string + required: + - run_code_review + - run_error_handling + - run_test_coverage + - run_comment_quality + - run_docs_impact + - reasoning + + # Code review always runs — mandatory + - id: code-review + command: archon-code-review-agent + depends_on: [review-classify] + context: fresh + + # Reviewer gates: run when review-classify flags them AND the scope is non-small, + # OR when smoke-validate found the issue claims unreliable (fallback to full review). + # Expression form: A && B || A && C (the condition evaluator has no parens; && binds tighter than ||) + - id: error-handling + command: archon-error-handling-agent + depends_on: [review-classify] + when: "$review-classify.output.run_error_handling == 'true' && $classify.output.scope != 'small' || $review-classify.output.run_error_handling == 'true' && $smoke-validate.output.claims_accurate == 'false'" + context: fresh + + - id: test-coverage + command: archon-test-coverage-agent + depends_on: [review-classify] + when: "$review-classify.output.run_test_coverage == 'true' && $classify.output.scope != 'small' || $review-classify.output.run_test_coverage == 'true' && $smoke-validate.output.claims_accurate == 'false'" + context: fresh + + - id: comment-quality + command: archon-comment-quality-agent + depends_on: [review-classify] + when: "$review-classify.output.run_comment_quality == 'true' && $classify.output.scope != 'small' || $review-classify.output.run_comment_quality == 'true' && $smoke-validate.output.claims_accurate == 'false'" + context: fresh + + - id: docs-impact + command: archon-docs-impact-agent + depends_on: [review-classify] + when: "$review-classify.output.run_docs_impact == 'true' && $classify.output.scope != 'small' || $review-classify.output.run_docs_impact == 'true' && $smoke-validate.output.claims_accurate == 'false'" + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 8: SYNTHESIZE + SELF-FIX + # ═══════════════════════════════════════════════════════════════ + + - id: synthesize + command: archon-synthesize-review + depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact] + trigger_rule: one_success + context: fresh + + - id: self-fix + command: archon-self-fix-all + depends_on: [synthesize] + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 9: SIMPLIFY + # ═══════════════════════════════════════════════════════════════ + + - id: simplify + command: archon-simplify-changes + depends_on: [self-fix] + context: fresh + + # ═══════════════════════════════════════════════════════════════ + # PHASE 10: REPORT + # ═══════════════════════════════════════════════════════════════ + + - id: report + command: archon-issue-completion-report + depends_on: [simplify] + context: fresh diff --git a/.archon/workflows/experimental/archon-release.yaml b/.archon/workflows/experimental/archon-release.yaml new file mode 100644 index 0000000000..afd8681f79 --- /dev/null +++ b/.archon/workflows/experimental/archon-release.yaml @@ -0,0 +1,946 @@ +name: archon-release +description: | + Use when: User says "/release", "release", "cut a release", "ship it", + "release to main", or asks to release the project. + Triggers: "/release", "/release minor", "/release major", "ship it", + "release patch", "release minor", "release major". + Does: Cuts a release from the dev branch end-to-end. Validates state, + smoke-tests the compiled binary, bumps version, drafts a changelog + from commits via AI, gets human approval, then commits, opens a PR, + tags after merge, creates the GitHub release, and updates the + Homebrew formula and tap. + NOT for: Hotfix recovery from a broken release CI run (manual recovery + path), publishing release notes only, retroactive tagging. + + Pass `--dry-run` (or `dry-run`) anywhere in the message to preview every + step without touching git, GitHub, the filesystem, or any remote state. + Bump type defaults to `patch`. Accepts: `patch`, `minor`, `major`. + + Examples: + archon workflow run archon-release "" # patch release + archon workflow run archon-release "minor" # minor release + archon workflow run archon-release "patch --dry-run" # dry-run patch + archon workflow run archon-release "--dry-run" # dry-run patch (implicit) + +provider: claude +model: sonnet +interactive: true # required: has approval gates + +worktree: + enabled: false # operates on the live dev branch — never use a worktree + +nodes: + # ═══════════════════════════════════════════════════════════════════ + # PHASE 1 — Parse args and validate preconditions (always run) + # ═══════════════════════════════════════════════════════════════════ + + - id: parse-args + script: | + const raw = String.raw`$ARGUMENTS`.trim().toLowerCase(); + const tokens = raw.split(/\s+/).filter(Boolean); + const dryRun = tokens.includes("--dry-run") || tokens.includes("dry-run"); + const bumpToken = tokens.find((t) => ["patch", "minor", "major"].includes(t)); + const bump = bumpToken ?? "patch"; + // dryRun is stringified as "true"/"false" so `when:` can compare against quoted strings + console.log(JSON.stringify({ bump, dryRun: String(dryRun) })); + runtime: bun + timeout: 5000 + + - id: validate-state + bash: | + set -euo pipefail + + echo "::: Validating release preconditions :::" + echo "Bump: $parse-args.output.bump" + echo "Dry run: $parse-args.output.dryRun" + echo + + git fetch origin --quiet + git checkout dev + git pull origin dev --ff-only --quiet + + # Only check TRACKED files for modifications. Untracked files don't + # affect a release because `git add -u` in commit-and-push won't pick + # them up. Being strict about untracked files would block this very + # workflow on the first run (the workflow YAML is itself untracked). + if ! git diff --quiet || ! git diff --cached --quiet; then + echo "ERROR: tracked files have uncommitted changes. Commit or stash before releasing." + git status --short + exit 1 + fi + + untracked=$(git ls-files --others --exclude-standard) + if [ -n "$untracked" ]; then + echo "WARNING: untracked files present (will NOT be included in release commit):" + echo "$untracked" | sed 's/^/ /' + echo + fi + + echo "OK: on dev, tracked files clean, fast-forwarded to origin/dev" + timeout: 60000 + depends_on: [parse-args] + + # ═══════════════════════════════════════════════════════════════════ + # PHASE 2 — Pre-flight compiled-binary smoke test (always run) + # ─────────────────────────────────────────────────────────────────── + # Mirrors release skill Step 1.5. Catches bundler regressions before + # the tag is pushed. If this fails, abort immediately. + # ═══════════════════════════════════════════════════════════════════ + + - id: preflight-smoke + script: | + import { spawnSync } from "node:child_process"; + import { mkdtempSync, existsSync, rmSync } from "node:fs"; + import { tmpdir } from "node:os"; + import { join } from "node:path"; + + const result = { passed: "true", skipped: "false", reason: "" }; + + if (!existsSync("scripts/build-binaries.sh") || !existsSync("packages/cli/src/cli.ts")) { + result.skipped = "true"; + result.reason = "Not a Bun CLI project — pre-flight smoke skipped."; + console.log(JSON.stringify(result)); + process.exit(0); + } + + const dir = mkdtempSync(join(tmpdir(), "release-smoke-")); + const binaryPath = join(dir, "archon-smoke"); + + try { + const build = spawnSync( + "bun", + ["build", "--compile", "--minify", "--target=bun", `--outfile=${binaryPath}`, "packages/cli/src/cli.ts"], + { encoding: "utf-8", stdio: "pipe" }, + ); + + if (build.status !== 0) { + result.passed = "false"; + result.reason = `bun build --compile failed (exit ${build.status}):\n${build.stderr || build.stdout}`; + console.log(JSON.stringify(result)); + process.exit(0); + } + + // --help instead of `version` because version's compiled-binary branch + // requires BUNDLED_IS_BINARY=true, which scripts/build-binaries.sh sets + // but a bare `bun build --compile` does not. + const run = spawnSync(binaryPath, ["--help"], { encoding: "utf-8", timeout: 30000 }); + const out = `${run.stdout || ""}${run.stderr || ""}`; + + if (run.status !== 0) { + result.passed = "false"; + result.reason = `compiled binary crashed at startup (exit ${run.status}):\n${out}`; + console.log(JSON.stringify(result)); + process.exit(0); + } + + if (/Expected CommonJS module|TypeError:|ReferenceError:|SyntaxError:/.test(out)) { + result.passed = "false"; + result.reason = `compiled binary emitted runtime error despite exit 0:\n${out}`; + console.log(JSON.stringify(result)); + process.exit(0); + } + + result.reason = "Pre-flight binary smoke: PASSED"; + console.log(JSON.stringify(result)); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + runtime: bun + timeout: 180000 + depends_on: [validate-state] + + - id: abort-if-smoke-failed + cancel: | + Pre-flight compiled-binary smoke test FAILED. The release is aborted + before any version bump, commit, tag, or PR is created. + + Common causes: + - Bun --bytecode producing invalid output for the current module graph + - A dependency reading package.json or other files at module top level + - Circular imports that break under minification + - A new package shipping CJS with an unusual wrapper shape + + Fix the underlying issue on a feature branch, merge to dev, then re-run /release. + The smoke test output is in the run log — check the preflight-smoke node. + when: "$preflight-smoke.output.passed == 'false'" + depends_on: [preflight-smoke] + + # ═══════════════════════════════════════════════════════════════════ + # PHASE 3 — Detect stack, compute next version, collect commits + # ═══════════════════════════════════════════════════════════════════ + + - id: detect-stack + script: | + import { readFileSync, existsSync } from "node:fs"; + + const candidates = [ + { file: "package.json", stack: "node", extract: (s) => JSON.parse(s).version }, + { file: "pyproject.toml", stack: "python", extract: (s) => s.match(/^version\s*=\s*"([^"]+)"/m)?.[1] }, + { file: "Cargo.toml", stack: "rust", extract: (s) => s.match(/^version\s*=\s*"([^"]+)"/m)?.[1] }, + ]; + + for (const { file, stack, extract } of candidates) { + if (!existsSync(file)) continue; + const contents = readFileSync(file, "utf-8"); + const version = extract(contents); + if (!version) { + console.error(`Found ${file} but could not parse version field.`); + process.exit(1); + } + console.log(JSON.stringify({ stack, versionFile: file, currentVersion: version })); + process.exit(0); + } + + console.error("No supported version file found (package.json, pyproject.toml, Cargo.toml)."); + process.exit(1); + runtime: bun + timeout: 10000 + depends_on: [preflight-smoke] + + - id: bump-version + script: | + const stack = JSON.parse(String.raw`$detect-stack.output`); + const args = JSON.parse(String.raw`$parse-args.output`); + + const m = stack.currentVersion.match(/^(\d+)\.(\d+)\.(\d+)/); + if (!m) { + console.error(`Cannot parse semver from current version: ${stack.currentVersion}`); + process.exit(1); + } + let [, major, minor, patch] = m.map(Number); + + switch (args.bump) { + case "major": major += 1; minor = 0; patch = 0; break; + case "minor": minor += 1; patch = 0; break; + case "patch": patch += 1; break; + default: + console.error(`Unknown bump type: ${args.bump}`); + process.exit(1); + } + + const newVersion = `${major}.${minor}.${patch}`; + console.log(JSON.stringify({ + oldVersion: stack.currentVersion, + newVersion, + bump: args.bump, + stack: stack.stack, + versionFile: stack.versionFile, + })); + runtime: bun + timeout: 5000 + depends_on: [detect-stack, parse-args] + + - id: collect-commits + bash: | + set -euo pipefail + commits=$(git log main..dev --oneline --no-merges) + if [ -z "$commits" ]; then + echo "NO_COMMITS" + exit 0 + fi + echo "$commits" + timeout: 15000 + depends_on: [validate-state] + + - id: abort-if-no-commits + cancel: "Nothing to release — dev has no commits ahead of main." + when: "$collect-commits.output == 'NO_COMMITS'" + depends_on: [collect-commits] + + - id: collect-diff-stat + bash: | + git diff --stat main..dev | tail -60 + timeout: 15000 + depends_on: [validate-state] + + # ═══════════════════════════════════════════════════════════════════ + # PHASE 4 — AI drafts the changelog from commits + diff (always runs) + # ═══════════════════════════════════════════════════════════════════ + + - id: draft-changelog + prompt: | + You are drafting a CHANGELOG entry for the upcoming release. + + Bumping `$bump-version.output.oldVersion` -> `$bump-version.output.newVersion` + (bump type: $bump-version.output.bump). + + Commits being shipped (oneline, no merges): + + ``` + $collect-commits.output + ``` + + Diff stat: + + ``` + $collect-diff-stat.output + ``` + + Categorize commits into Keep a Changelog sections: Added, Changed, Fixed, + Removed. Rules: + + - Rewrite commit subjects into clear user-facing changelog entries. Do NOT + copy commit messages verbatim. + - Group related commits into single entries where it makes sense. + - Each entry starts with a noun or gerund describing WHAT changed. + - Skip internal-only changes (CI tweaks, typo fixes) unless they affect + user-visible behavior. + - Include PR numbers in parentheses when visible: `(#12)`. + - Write a one-line summary that captures the release theme. + - No emoji. No AI attribution. No "Co-Authored-By". + - Empty arrays are fine if a category has no entries. + + Return strictly valid JSON matching the schema. + depends_on: [bump-version, collect-commits, collect-diff-stat] + allowed_tools: [] + output_format: + type: object + properties: + summary: + type: string + description: One-line summary of the release theme + added: + type: array + items: { type: string } + changed: + type: array + items: { type: string } + fixed: + type: array + items: { type: string } + removed: + type: array + items: { type: string } + required: [summary, added, changed, fixed, removed] + + # Bridge: persist draft-changelog's AI output to disk via auto-shell-quoted + # bash, so downstream SCRIPT nodes can read the JSON via fs instead of + # String.raw template substitution. Necessary because AI-generated content + # routinely contains backticks (markdown code spans) that would terminate + # a JS template literal mid-string. + # + # CRITICAL: do NOT wrap $draft-changelog.output in your own quotes. Archon + # already wraps it in single quotes via shellQuote(). Adding your own quotes + # like '$node.output' produces '''' which collapses to bare unquoted + # JSON, and bash brace-expands the {...} into separate words. + - id: save-draft-json + bash: | + mkdir -p "$ARTIFACTS_DIR" + printf '%s' $draft-changelog.output > "$ARTIFACTS_DIR/draft-changelog.json" + echo "wrote $ARTIFACTS_DIR/draft-changelog.json ($(wc -c < $ARTIFACTS_DIR/draft-changelog.json) bytes)" + timeout: 10000 + depends_on: [draft-changelog] + + - id: format-changelog + script: | + import { mkdirSync, writeFileSync, readFileSync } from "node:fs"; + import { join } from "node:path"; + + const artifactsDir = String.raw`$ARTIFACTS_DIR`; + // Read AI output from disk (file bridge) — see save-draft-json + // for why we don't use String.raw on $draft-changelog.output directly. + const cl = JSON.parse(readFileSync(join(artifactsDir, "draft-changelog.json"), "utf-8")); + // bump-version and parse-args produce safe deterministic JSON; substitution OK. + const ver = JSON.parse(String.raw`$bump-version.output`); + const args = JSON.parse(String.raw`$parse-args.output`); + + const today = new Date().toISOString().slice(0, 10); + + const sections = [ + ["Added", cl.added], + ["Changed", cl.changed], + ["Fixed", cl.fixed], + ["Removed", cl.removed], + ]; + + let md = `## [${ver.newVersion}] - ${today}\n\n${cl.summary}\n`; + for (const [name, items] of sections) { + if (!items?.length) continue; + md += `\n### ${name}\n\n`; + for (const it of items) md += `- ${it}\n`; + } + + // Persist a copy to the run's artifacts so the user has a record. + // Reuses `artifactsDir` declared above for reading draft-changelog.json. + try { + mkdirSync(artifactsDir, { recursive: true }); + writeFileSync(join(artifactsDir, "changelog-section.md"), md); + } catch (e) { + console.error(`(non-fatal) could not write artifact: ${e.message}`); + } + + console.log(JSON.stringify({ + rendered: md, + oldVersion: ver.oldVersion, + newVersion: ver.newVersion, + bump: ver.bump, + dryRun: args.dryRun, + stack: ver.stack, + versionFile: ver.versionFile, + })); + runtime: bun + timeout: 10000 + depends_on: [save-draft-json, draft-changelog, bump-version, parse-args] + + # ═══════════════════════════════════════════════════════════════════ + # PHASE 5 — Human approval gate (always runs) + # ─────────────────────────────────────────────────────────────────── + # In dry-run mode the workflow stops here cleanly; in full mode it + # proceeds to write files and create the PR. + # ═══════════════════════════════════════════════════════════════════ + + # ── Pre-approval summary ── + # Approval messages don't get variable substitution today, so we emit + # the dynamic summary as a Haiku prompt-node output (which DOES get + # substituted and streams to chat). Cheap pass-through, ~200 tokens. + - id: review-summary + prompt: | + Reply to the user with EXACTLY this text, verbatim, no elaboration, + no markdown-rendering, no commentary, no questions. Just print it. + + ══════════════════════════════════════════════════════════════════ + RELEASE REVIEW + ══════════════════════════════════════════════════════════════════ + + Version : $bump-version.output.oldVersion → $bump-version.output.newVersion + Bump : $bump-version.output.bump + Dry run : $parse-args.output.dryRun + + ── Proposed CHANGELOG ─────────────────────────────────────────── + + $format-changelog.output.rendered + + ── Commits being shipped ──────────────────────────────────────── + + $collect-commits.output + + ══════════════════════════════════════════════════════════════════ + + Reply with `/workflow approve ` to continue, or + `/workflow reject ` to abort. + depends_on: [format-changelog, collect-commits, bump-version, parse-args] + model: haiku + allowed_tools: [] + + - id: review-changelog + approval: + message: | + Approve the release review above. + + - In dry-run mode the workflow ends here without modifying any files. + - In full mode approval triggers: write files, commit + push to dev, + open a PR dev → main, then pause again before tag/release. + depends_on: [review-summary] + + # ═══════════════════════════════════════════════════════════════════ + # PHASE 6 — Apply local file changes (skipped in --dry-run) + # ═══════════════════════════════════════════════════════════════════ + + - id: write-files + script: | + import { readFileSync, writeFileSync, existsSync } from "node:fs"; + import { execSync } from "node:child_process"; + import { join } from "node:path"; + + // bump-version is safe to substitute (deterministic JSON, no backticks). + const ver = JSON.parse(String.raw`$bump-version.output`); + // format-changelog.output.rendered contains AI-authored markdown with + // backticks → unsafe via String.raw. Read the .md file from disk instead. + const artifactsDir = String.raw`$ARTIFACTS_DIR`; + const renderedMd = readFileSync(join(artifactsDir, "changelog-section.md"), "utf-8"); + const fmt = { rendered: renderedMd }; + + const written = []; + + // 1. Bump the version file + switch (ver.stack) { + case "node": { + const pkg = JSON.parse(readFileSync(ver.versionFile, "utf-8")); + pkg.version = ver.newVersion; + writeFileSync(ver.versionFile, JSON.stringify(pkg, null, 2) + "\n"); + break; + } + case "python": + case "rust": { + const original = readFileSync(ver.versionFile, "utf-8"); + const updated = original.replace(/^(version\s*=\s*")[^"]+(")/m, `$1${ver.newVersion}$2`); + if (updated === original) throw new Error(`Failed to update version in ${ver.versionFile}`); + writeFileSync(ver.versionFile, updated); + break; + } + default: + throw new Error(`Unknown stack: ${ver.stack}`); + } + written.push(ver.versionFile); + + // 2. Workspace version sync (monorepo only) + if (existsSync("scripts/sync-versions.sh")) { + execSync("bash scripts/sync-versions.sh", { stdio: "inherit" }); + // Stage workspace package.json files explicitly downstream + written.push("packages/*/package.json"); + } + + // 3. Lockfile refresh + const lockfileCommands = { + node: existsSync("bun.lock") ? ["bun", "install"] : + existsSync("package-lock.json") ? ["npm", "install", "--package-lock-only"] : null, + python: existsSync("uv.lock") ? ["uv", "lock", "--quiet"] : null, + rust: ["cargo", "update", "--workspace"], + }[ver.stack]; + + if (lockfileCommands) { + execSync(lockfileCommands.join(" "), { stdio: "inherit" }); + const lockFile = { + node: existsSync("bun.lock") ? "bun.lock" : "package-lock.json", + python: "uv.lock", + rust: "Cargo.lock", + }[ver.stack]; + if (lockFile && existsSync(lockFile)) written.push(lockFile); + } + + // 4. Update CHANGELOG.md — prepend the new section under [Unreleased] + const changelogPath = "CHANGELOG.md"; + let changelog = existsSync(changelogPath) + ? readFileSync(changelogPath, "utf-8") + : "# Changelog\n\nAll notable changes to this project will be documented in this file.\n\n## [Unreleased]\n\n"; + + // Insert the new section right after the [Unreleased] header (and any blank lines beneath it) + const unreleasedMatch = changelog.match(/(## \[Unreleased\]\s*\n+)/); + if (unreleasedMatch) { + const insertAt = unreleasedMatch.index + unreleasedMatch[0].length; + changelog = changelog.slice(0, insertAt) + fmt.rendered + "\n" + changelog.slice(insertAt); + } else { + // No [Unreleased] header — prepend at the top below the title + const titleMatch = changelog.match(/^# .+\n+/); + const insertAt = titleMatch ? titleMatch[0].length : 0; + changelog = changelog.slice(0, insertAt) + "## [Unreleased]\n\n" + fmt.rendered + "\n" + changelog.slice(insertAt); + } + writeFileSync(changelogPath, changelog); + written.push(changelogPath); + + console.log(JSON.stringify({ filesModified: written, newVersion: ver.newVersion })); + runtime: bun + timeout: 120000 + depends_on: [review-changelog, bump-version, format-changelog] + when: "$parse-args.output.dryRun == 'false'" + + - id: commit-and-push + bash: | + set -euo pipefail + + # Working tree was clean at validate-state; only write-files modified it, + # so `git add -A` stages exactly what the release should ship. + git add -A + git status --short + + git commit -m "Release $bump-version.output.newVersion" + git push origin dev + timeout: 60000 + depends_on: [write-files, bump-version] + when: "$parse-args.output.dryRun == 'false'" + + - id: create-pr + bash: | + set -euo pipefail + + ver=$bump-version.output.newVersion + body=$format-changelog.output.rendered + + # If a PR already exists for this branch, just print its URL. + existing=$(gh pr list --head dev --base main --state open --json url --jq '.[0].url' 2>/dev/null || true) + if [ -n "$existing" ]; then + echo "PR already open: $existing" + echo "$existing" + exit 0 + fi + + # Build the PR body in a way that doesn't put a literal "---" at YAML column 1 + pr_body=$(printf '%s\n\n---\n\nMerging this PR releases %s to main.\n' "$body" "$ver") + + url=$(gh pr create --base main --head dev --title "Release $ver" --body "$pr_body") + echo "$url" + timeout: 60000 + depends_on: [commit-and-push, format-changelog, bump-version] + when: "$parse-args.output.dryRun == 'false'" + + # ═══════════════════════════════════════════════════════════════════ + # PHASE 7 — Wait for the PR to merge (skipped in --dry-run) + # ─────────────────────────────────────────────────────────────────── + # The user (or a reviewer) merges the PR however they prefer: + # gh pr merge --squash --delete-branch=false + # then approves here. We don't auto-merge — keeps reviewer in control. + # ═══════════════════════════════════════════════════════════════════ + + # Pre-merge-gate summary (same pass-through pattern as review-summary) + - id: merge-summary + prompt: | + Reply to the user with EXACTLY this text, verbatim, no elaboration: + + ══════════════════════════════════════════════════════════════════ + PR OPENED — waiting for merge + ══════════════════════════════════════════════════════════════════ + + $create-pr.output + + Merge the PR however you prefer: + gh pr merge --squash --delete-branch=false + (or use the GitHub web UI) + + Then approve here to continue with tag, GitHub release, dev sync, + binary wait, and Homebrew formula update. + depends_on: [create-pr] + model: haiku + allowed_tools: [] + when: "$parse-args.output.dryRun == 'false'" + + - id: wait-for-merge + approval: + message: | + Approve once the PR above has been merged into main. + Reject to stop — the PR will remain open and reviewable. + depends_on: [merge-summary] + when: "$parse-args.output.dryRun == 'false'" + + # ═══════════════════════════════════════════════════════════════════ + # PHASE 8 — Tag, GitHub release, sync dev with main + # ═══════════════════════════════════════════════════════════════════ + + - id: tag-and-release + bash: | + set -euo pipefail + + ver=$bump-version.output.newVersion + body=$format-changelog.output.rendered + + git fetch origin main --quiet + + # Tag the merge commit on main, push the tag. + git tag "v$ver" origin/main + git push origin "v$ver" + + # Strip the leading "## [x.y.z] - YYYY-MM-DD" header line for the release body. + notes=$(printf '%s\n' "$body" | sed '1{/^## /d;}; 2{/^$/d;}') + + gh release create "v$ver" --title "v$ver" --notes "$notes" + + echo "Tagged and released v$ver" + timeout: 90000 + depends_on: [wait-for-merge, bump-version, format-changelog] + when: "$parse-args.output.dryRun == 'false'" + + - id: sync-dev-with-main + bash: | + set -euo pipefail + + # Ensure dev contains the merge commit from main so they don't diverge. + git checkout dev + git pull origin main --ff-only --quiet + git push origin dev + + echo "dev fast-forwarded to include main's merge commit" + timeout: 60000 + depends_on: [tag-and-release] + when: "$parse-args.output.dryRun == 'false'" + + # ═══════════════════════════════════════════════════════════════════ + # PHASE 9 — Wait for release CI to finish building binaries + # ─────────────────────────────────────────────────────────────────── + # Poll the release until all 7 expected assets exist (5 binaries + + # archon-web.tar.gz + checksums.txt). Bail out early if the release + # workflow fails — no point waiting if CI is broken. + # ═══════════════════════════════════════════════════════════════════ + + - id: check-homebrew + bash: | + if [ -f homebrew/archon.rb ]; then + echo "true" + else + echo "false" + fi + timeout: 5000 + depends_on: [validate-state] + + - id: wait-for-binaries + bash: | + set -uo pipefail + + ver=$bump-version.output.newVersion + repo=$(gh repo view --json nameWithOwner -q .nameWithOwner) + + echo "Waiting for release workflow to finish uploading binaries to v$ver..." + + for i in $(seq 1 30); do + asset_count=$(gh release view "v$ver" --repo "$repo" --json assets --jq '.assets | length' 2>/dev/null || echo "0") + + if [ "$asset_count" -ge 7 ]; then + echo "All $asset_count assets uploaded" + exit 0 + fi + + # Short-circuit: if the release workflow itself failed, stop waiting. + workflow_status=$(gh run list --workflow release.yml --event push --limit 1 --json conclusion,status --jq '.[0] | "\(.status)|\(.conclusion)"' 2>/dev/null || echo "unknown|unknown") + if [ "$workflow_status" = "completed|failure" ]; then + echo "Release workflow FAILED — see: gh run view --log-failed" + exit 1 + fi + + echo " Assets so far: $asset_count/7 — waiting 30s (attempt $i/30)..." + sleep 30 + done + + echo "Timed out waiting for binaries after 15 minutes" + exit 1 + timeout: 1000000 # 16 minutes — outer bound on the polling loop + depends_on: [sync-dev-with-main, check-homebrew, bump-version] + when: "$parse-args.output.dryRun == 'false' && $check-homebrew.output == 'true'" + + # ═══════════════════════════════════════════════════════════════════ + # PHASE 10 — Update Homebrew formula and sync the tap repo + # ─────────────────────────────────────────────────────────────────── + # Only runs if homebrew/archon.rb exists in the repo. The formula + # version and SHAs MUST move atomically (per the release skill's + # critical warning) — we regenerate the entire file from a template. + # ═══════════════════════════════════════════════════════════════════ + + - id: fetch-and-update-formula + script: | + import { spawnSync } from "node:child_process"; + import { writeFileSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; + import { tmpdir } from "node:os"; + import { join } from "node:path"; + + const ver = JSON.parse(String.raw`$bump-version.output`).newVersion; + + const repoOwnerName = spawnSync("gh", ["repo", "view", "--json", "nameWithOwner", "-q", ".nameWithOwner"], { encoding: "utf-8" }).stdout.trim(); + + const dir = mkdtempSync(join(tmpdir(), "release-shas-")); + try { + const dl = spawnSync( + "gh", + ["release", "download", `v${ver}`, "--repo", repoOwnerName, "--pattern", "checksums.txt", "--dir", dir], + { encoding: "utf-8", stdio: "pipe" }, + ); + if (dl.status !== 0) { + console.error(`Failed to download checksums.txt: ${dl.stderr}`); + process.exit(1); + } + + const checksums = readFileSync(join(dir, "checksums.txt"), "utf-8"); + + const sha = (asset) => { + const m = checksums.match(new RegExp(`^([a-f0-9]{64})\\s+\\*?${asset}$`, "m")); + if (!m) throw new Error(`Missing SHA for ${asset} in checksums.txt:\n${checksums}`); + return m[1]; + }; + + const shas = { + darwinArm64: sha("archon-darwin-arm64"), + darwinX64: sha("archon-darwin-x64"), + linuxArm64: sha("archon-linux-arm64"), + linuxX64: sha("archon-linux-x64"), + }; + + // Regenerate the entire formula from the canonical template. + // Editing in place is forbidden — version + SHAs MUST move atomically. + // Built as a line array so the YAML block scalar's indentation rules don't fight us. + const formula = [ + "# Homebrew formula for Archon CLI", + "# To install: brew install coleam00/archon/archon", + "#", + "# This formula downloads pre-built binaries from GitHub releases.", + "# For development, see: https://github.com/coleam00/Archon", + "", + "class Archon < Formula", + ' desc "Remote agentic coding platform - control AI assistants from anywhere"', + ' homepage "https://github.com/coleam00/Archon"', + ` version "${ver}"`, + ' license "MIT"', + "", + " on_macos do", + " on_arm do", + ' url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-darwin-arm64"', + ` sha256 "${shas.darwinArm64}"`, + " end", + " on_intel do", + ' url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-darwin-x64"', + ` sha256 "${shas.darwinX64}"`, + " end", + " end", + "", + " on_linux do", + " on_arm do", + ' url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-linux-arm64"', + ` sha256 "${shas.linuxArm64}"`, + " end", + " on_intel do", + ' url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-linux-x64"', + ` sha256 "${shas.linuxX64}"`, + " end", + " end", + "", + " def install", + " binary_name = case", + " when OS.mac? && Hardware::CPU.arm?", + ' "archon-darwin-arm64"', + " when OS.mac? && Hardware::CPU.intel?", + ' "archon-darwin-x64"', + " when OS.linux? && Hardware::CPU.arm?", + ' "archon-linux-arm64"', + " when OS.linux? && Hardware::CPU.intel?", + ' "archon-linux-x64"', + " end", + "", + ' bin.install binary_name => "archon"', + " end", + "", + " test do", + ' assert_match version.to_s, shell_output("#{bin}/archon version")', + " end", + "end", + "", + ].join("\n"); + + writeFileSync("homebrew/archon.rb", formula); + console.log(JSON.stringify({ updatedTo: ver, shas })); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + runtime: bun + timeout: 120000 + depends_on: [wait-for-binaries, bump-version] + when: "$parse-args.output.dryRun == 'false' && $check-homebrew.output == 'true'" + + - id: commit-formula + bash: | + set -euo pipefail + + ver=$bump-version.output.newVersion + + git checkout main + git pull origin main --ff-only --quiet + git add homebrew/archon.rb + git commit -m "chore(homebrew): update formula to v$ver" + git push origin main + + # Sync dev with main so the formula update is on both branches + git checkout dev + git pull origin main --ff-only --quiet + git push origin dev + + echo "Formula committed to main and synced to dev" + timeout: 90000 + depends_on: [fetch-and-update-formula, bump-version] + when: "$parse-args.output.dryRun == 'false' && $check-homebrew.output == 'true'" + + - id: sync-tap + script: | + import { spawnSync } from "node:child_process"; + import { mkdtempSync, copyFileSync, rmSync } from "node:fs"; + import { tmpdir } from "node:os"; + import { join } from "node:path"; + + const ver = JSON.parse(String.raw`$bump-version.output`).newVersion; + const tapRepo = "git@github.com:coleam00/homebrew-archon.git"; + + const dir = mkdtempSync(join(tmpdir(), "tap-sync-")); + try { + const clone = spawnSync("git", ["clone", "--depth=1", tapRepo, dir], { encoding: "utf-8", stdio: "pipe" }); + if (clone.status !== 0) { + console.error("Failed to clone tap repo. You may need push access to coleam00/homebrew-archon."); + console.error("Run this manually after the release:"); + console.error(` git clone ${tapRepo} && cp homebrew/archon.rb /Formula/archon.rb && git -C commit -am 'chore: sync formula to v${ver}' && git -C push`); + process.exit(1); + } + + copyFileSync("homebrew/archon.rb", join(dir, "Formula", "archon.rb")); + + const diff = spawnSync("git", ["-C", dir, "diff", "--quiet"], { encoding: "utf-8" }); + if (diff.status === 0) { + console.log("Tap formula already in sync — no changes needed"); + process.exit(0); + } + + for (const args of [ + ["-C", dir, "add", "Formula/archon.rb"], + ["-C", dir, "commit", "-m", `chore: sync formula to v${ver}`], + ["-C", dir, "push", "origin", "main"], + ]) { + const r = spawnSync("git", args, { encoding: "utf-8", stdio: "inherit" }); + if (r.status !== 0) { + console.error(`git ${args.slice(2).join(" ")} failed`); + process.exit(1); + } + } + + console.log(`Tap synced to v${ver}`); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + runtime: bun + timeout: 120000 + depends_on: [commit-formula, bump-version] + when: "$parse-args.output.dryRun == 'false' && $check-homebrew.output == 'true'" + + # ═══════════════════════════════════════════════════════════════════ + # PHASE 11 — Final summary (always runs in both modes) + # ─────────────────────────────────────────────────────────────────── + # `trigger_rule: all_done` lets this run regardless of which downstream + # nodes were skipped (dry-run path or no-homebrew path). + # ═══════════════════════════════════════════════════════════════════ + + - id: final-summary + script: | + // Defensive: this node runs with trigger_rule: all_done, so any upstream + // node may have been skipped or failed. Empty $node.output substitutions + // resolve to "" and would break JSON.parse if not guarded. + const safeJson = (raw) => { + const s = raw.trim(); + if (!s) return null; + try { return JSON.parse(s); } catch { return null; } + }; + + const args = safeJson(String.raw`$parse-args.output`); + const ver = safeJson(String.raw`$bump-version.output`); + + const lines = []; + lines.push("══════════════════════════════════════════════════════════════════"); + + if (!args || !ver) { + lines.push("WORKFLOW ENDED EARLY — see prior node failures or skips."); + lines.push(""); + lines.push(`parse-args : ${args ? "ok" : "missing/skipped"}`); + lines.push(`bump-version: ${ver ? "ok" : "missing/skipped"}`); + lines.push(""); + lines.push("Check the run log for the first failed node and address it."); + } else if (args.dryRun === "true") { + lines.push(`DRY RUN COMPLETE — would have released v${ver.newVersion} (from v${ver.oldVersion})`); + lines.push(""); + lines.push("No files were written. No commits were made. No PR was created."); + lines.push("Re-run without --dry-run to actually cut the release."); + } else { + lines.push(`RELEASE COMPLETE — v${ver.newVersion} (from v${ver.oldVersion}, ${ver.bump})`); + lines.push(""); + lines.push("Verify the release end-to-end with the test-release skill:"); + lines.push(` /test-release brew ${ver.newVersion}`); + lines.push(` /test-release curl-mac ${ver.newVersion}`); + lines.push(""); + lines.push("If verification fails, file a hotfix and cut the next patch."); + lines.push("DO NOT announce the release until /test-release passes."); + } + lines.push("══════════════════════════════════════════════════════════════════"); + console.log(lines.join("\n")); + runtime: bun + timeout: 5000 + depends_on: + - review-changelog + - write-files + - commit-and-push + - create-pr + - wait-for-merge + - tag-and-release + - sync-dev-with-main + - wait-for-binaries + - fetch-and-update-formula + - commit-formula + - sync-tap + trigger_rule: all_done diff --git a/.archon/workflows/maintainer/maintainer-review-pr.yaml b/.archon/workflows/maintainer/maintainer-review-pr.yaml new file mode 100644 index 0000000000..7cacad57b6 --- /dev/null +++ b/.archon/workflows/maintainer/maintainer-review-pr.yaml @@ -0,0 +1,383 @@ +name: maintainer-review-pr +description: | + Use when: Maintainer wants to review a SINGLE PR with direction-and-scope + gating before any deep review. Skips deep review entirely when the PR is + off-direction, too broad, or has multiple concerns; instead drafts a polite- + decline comment for human approval. + Triggers: "maintainer review", "maintainer review pr ", "review and gate", + "should i review this PR", "gate this PR", "review pr as maintainer". + Does: Loads maintainer direction + profile + state -> gates the PR on + direction alignment, scope focus, and PR-template fill -> if review- + worthy, runs comprehensive review (5 parallel review aspects); if + decline-worthy, drafts a polite-decline comment that you approve before + it posts. + Provider: Pi (Minimax M2.7) — runs cheaper than Claude. Each review aspect + is its own Archon node, so Pi handles them as independent calls. + NOT for: Comprehensive review of a PR you've already decided to merge + (use archon-comprehensive-pr-review). Quick triage of all open PRs + (use maintainer-standup). + +provider: pi +model: minimax/MiniMax-M2.7 + +interactive: true # Required for the decline-approval gate + +worktree: + enabled: false # Live checkout — needs to read .archon/maintainer-standup/ +mutates_checkout: false # Read-only + per-run artifact writes; concurrent runs safe + +nodes: + # ═══════════════════════════════════════════════════════════════ + # PHASE 1: EXTRACT PR NUMBER FROM ARGUMENTS + # ═══════════════════════════════════════════════════════════════ + + - id: extract-pr-number + prompt: | + Find the GitHub PR number for this request. + + Request: $ARGUMENTS + + Rules: + - If the message contains an explicit PR number (e.g., "#1428", "PR 1428", "1428"), extract that number. + - If the message contains a PR URL (https://github.com/.../pull/N), extract N. + - If you cannot determine a single PR number, output ERROR. + + CRITICAL: Output ONLY the bare number with no quotes, markdown, or explanation. + Example correct output: 1428 + allowed_tools: [] + idle_timeout: 30000 + + # ═══════════════════════════════════════════════════════════════ + # PHASE 2: GATHER PR DATA + MAINTAINER CONTEXT (parallel) + # ═══════════════════════════════════════════════════════════════ + + - id: fetch-pr + bash: | + PR_NUM=$(echo "$extract-pr-number.output" | tr -d "'\"\`\n " | grep -oE '[0-9]+' | head -1) + if [ -z "$PR_NUM" ]; then + echo "Failed to extract PR number from: $extract-pr-number.output" >&2 + exit 1 + fi + echo "$PR_NUM" > "$ARTIFACTS_DIR/.pr-number" + gh pr view "$PR_NUM" --json number,title,body,labels,comments,reviews,state,mergeable,mergeStateStatus,additions,deletions,changedFiles,files,author,createdAt,updatedAt,baseRefName,headRefName,reviewDecision,reviewRequests,isDraft + depends_on: [extract-pr-number] + timeout: 30000 + + - id: fetch-diff + bash: | + PR_NUM=$(cat "$ARTIFACTS_DIR/.pr-number") + # Don't redirect stderr — let auth / network / deleted-PR failures surface + # as a node failure rather than feeding an empty diff to the gate (which + # would produce a confident verdict on no evidence). + if ! diff_output=$(gh pr diff "$PR_NUM"); then + echo "ERROR: gh pr diff failed for PR #$PR_NUM" >&2 + exit 1 + fi + # Cap at 2500 lines to keep prompt size bounded; gate cares about shape, not every line. + if [ -z "$diff_output" ]; then + echo "(empty diff — PR has no changes)" + else + echo "$diff_output" | head -2500 + fi + depends_on: [fetch-pr] + timeout: 30000 + + - id: read-context + # Reuses the maintainer-standup script — same direction.md / profile.md / + # state.json / recent briefs we want for gate decisions. + script: maintainer-standup-read-context + runtime: bun + timeout: 10000 + depends_on: [extract-pr-number] + + # ═══════════════════════════════════════════════════════════════ + # PHASE 3: GATE — direction + scope + template check + # ═══════════════════════════════════════════════════════════════ + + - id: gate + command: maintainer-review-gate + depends_on: [fetch-pr, fetch-diff, read-context] + context: fresh + output_format: + type: object + properties: + verdict: + type: string + enum: [review, decline, needs_split, unclear] + description: | + 'review' = passes gates, proceed to deep review. + 'decline' = wrong direction; draft polite-decline comment. + 'needs_split' = scope is multiple concerns; draft split-up request. + 'unclear' = gate cannot decide confidently; ask maintainer manually. + direction_alignment: + type: string + enum: [aligned, conflict, unclear] + scope_assessment: + type: string + enum: [focused, multiple_concerns, too_broad] + template_quality: + type: string + enum: [good, partial, empty] + decline_categories: + type: array + items: + type: string + description: e.g. ['direction', 'scope', 'template']. Empty when verdict == 'review'. + cited_direction_clauses: + type: array + items: + type: string + description: | + Specific direction.md clauses cited (e.g., 'direction.md §single-developer-tool'). + Empty when verdict == 'review'. + reasoning: + type: string + description: 1-3 sentences summarizing why this verdict. + required: + - verdict + - direction_alignment + - scope_assessment + - template_quality + - decline_categories + - cited_direction_clauses + - reasoning + + # ═══════════════════════════════════════════════════════════════ + # PHASE 4a: REVIEW BRANCH (verdict == 'review') + # ═══════════════════════════════════════════════════════════════ + + - id: review-classify + prompt: | + Determine which review aspects to run for this PR. + + ## PR Metadata + $fetch-pr.output + + ## Diff (truncated) + $fetch-diff.output + + ## Rules + - **Code review**: ALWAYS run. Mandatory for every PR. + - **Error handling**: Run if diff touches code with try/catch, async/await, or new failure paths. + - **Test coverage**: Run if diff touches source code (not just tests, docs, or config). + - **Comment quality**: Run if diff adds/modifies comments, docstrings, JSDoc, or in-code documentation. + - **Docs impact**: Run if diff adds/removes/renames public APIs, CLI flags, env vars, or user-facing features. + + Provide reasoning for each decision. Output JSON only. + depends_on: [gate] + when: "$gate.output.verdict == 'review'" + allowed_tools: [] + context: fresh + idle_timeout: 60000 + output_format: + type: object + properties: + run_code_review: + type: string + enum: ['true', 'false'] + run_error_handling: + type: string + enum: ['true', 'false'] + run_test_coverage: + type: string + enum: ['true', 'false'] + run_comment_quality: + type: string + enum: ['true', 'false'] + run_docs_impact: + type: string + enum: ['true', 'false'] + reasoning: + type: string + required: + - run_code_review + - run_error_handling + - run_test_coverage + - run_comment_quality + - run_docs_impact + - reasoning + + - id: code-review + command: maintainer-review-code-review + depends_on: [review-classify] + when: "$review-classify.output.run_code_review == 'true'" + context: fresh + + - id: error-handling + command: maintainer-review-error-handling + depends_on: [review-classify] + when: "$review-classify.output.run_error_handling == 'true'" + context: fresh + + - id: test-coverage + command: maintainer-review-test-coverage + depends_on: [review-classify] + when: "$review-classify.output.run_test_coverage == 'true'" + context: fresh + + - id: comment-quality + command: maintainer-review-comment-quality + depends_on: [review-classify] + when: "$review-classify.output.run_comment_quality == 'true'" + context: fresh + + - id: docs-impact + command: maintainer-review-docs-impact + depends_on: [review-classify] + when: "$review-classify.output.run_docs_impact == 'true'" + context: fresh + + - id: synthesize-review + command: maintainer-review-synthesize + depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact] + trigger_rule: one_success + context: fresh + + # Auto-post — once the gate said 'review', the deep review is feedback worth + # delivering. No approval required; the maintainer can always edit/delete on + # GitHub. (Approval gates are reserved for the higher-stakes decline branch + # where the comment closes the door on the contribution.) + - id: post-review + bash: | + PR_NUM=$(cat "$ARTIFACTS_DIR/.pr-number") + if [ ! -f "$ARTIFACTS_DIR/review/review-comment.md" ]; then + echo "ERROR: review-comment.md missing — synthesize did not write it" >&2 + exit 1 + fi + gh pr comment "$PR_NUM" --body-file "$ARTIFACTS_DIR/review/review-comment.md" + echo "Posted review comment to PR #$PR_NUM" + depends_on: [synthesize-review] + timeout: 30000 + + # ═══════════════════════════════════════════════════════════════ + # PHASE 4b: DECLINE BRANCH (verdict in ['decline', 'needs_split']) + # ═══════════════════════════════════════════════════════════════ + + - id: approve-decline + approval: + message: | + Gate flagged this PR for polite-decline. Review the gate decision and the + drafted decline comment in the workflow output above (and in + $ARTIFACTS_DIR/gate-decision.md). + + Approve to post the drafted comment to the PR. + Reject with a reason to redraft (max 3 attempts). + capture_response: true + on_reject: + prompt: | + Reviewer feedback on the previous decline draft: + $REJECTION_REASON + + Re-read the gate decision at `$ARTIFACTS_DIR/gate-decision.md` and the + current drafted comment at `$ARTIFACTS_DIR/decline-comment.md`. Revise + the decline comment based on the feedback, then OVERWRITE + `$ARTIFACTS_DIR/decline-comment.md` with the new version. + + Output the revised decline comment as raw markdown — no JSON wrapper. + max_attempts: 3 + depends_on: [gate] + when: "$gate.output.verdict == 'decline' || $gate.output.verdict == 'needs_split'" + + - id: post-decline + bash: | + PR_NUM=$(cat "$ARTIFACTS_DIR/.pr-number") + if [ ! -f "$ARTIFACTS_DIR/decline-comment.md" ]; then + echo "ERROR: decline-comment.md missing — gate command did not write it" >&2 + exit 1 + fi + gh pr comment "$PR_NUM" --body-file "$ARTIFACTS_DIR/decline-comment.md" + + # Tag the PR so the morning brief can surface "awaiting author". + # Failure (label not present in repo, permissions, etc.) is non-fatal, + # but record the actual outcome so the report node doesn't claim the + # label was applied when it wasn't. + if gh pr edit "$PR_NUM" --add-label awaiting-author 2>"$ARTIFACTS_DIR/.label-error"; then + echo "applied" > "$ARTIFACTS_DIR/.label-applied" + rm -f "$ARTIFACTS_DIR/.label-error" + else + echo "skipped" > "$ARTIFACTS_DIR/.label-applied" + echo "WARN: gh pr edit --add-label failed; see $ARTIFACTS_DIR/.label-error" >&2 + fi + + echo "Posted decline comment to PR #$PR_NUM" + depends_on: [approve-decline] + timeout: 30000 + + # ═══════════════════════════════════════════════════════════════ + # PHASE 4c: UNCLEAR BRANCH (verdict == 'unclear') + # ═══════════════════════════════════════════════════════════════ + + - id: approve-unclear + approval: + message: | + Gate could not classify this PR confidently. Read the raw gate output + and any artifacts in $ARTIFACTS_DIR/, then decide manually. + + Approve (with optional comment) = workflow ends here (no comment posted, + no review run). Your comment is captured as $approve-unclear.output and + the report node will include it. + Reject (with reason) = workflow is cancelled; reasoning is recorded in + the run. + capture_response: true + depends_on: [gate] + when: "$gate.output.verdict == 'unclear'" + + # ═══════════════════════════════════════════════════════════════ + # PHASE 5: RECORD REVIEW IN SHARED STATE + # ═══════════════════════════════════════════════════════════════ + + # Append this run's PR number + verdict + timestamp to + # .archon/maintainer-standup/reviewed-prs.json so the morning standup + # brief can mark "✓ reviewed Nd ago" next to PRs that have already + # been triaged. Cross-workflow memory; gitignored, per-maintainer. + # + # Runs deterministically (no AI) after whichever branch fired. Inline + # script for the same reason persist is inline in maintainer-standup: + # JSON is valid JS expression syntax so $gate.output substitutes + # directly without a String.raw template literal. Records the gate + # verdict (review / decline / needs_split / unclear), not the + # synthesis verdict — keeps the contract narrow. + - id: record-review + runtime: bun + timeout: 10000 + depends_on: [post-review, post-decline, approve-unclear] + trigger_rule: one_success + script: | + import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; + import { resolve } from 'node:path'; + + const gate = $gate.output; + + const baseDir = resolve(process.cwd(), '.archon/maintainer-standup'); + if (!existsSync(baseDir)) mkdirSync(baseDir, { recursive: true }); + + const prPath = resolve(process.cwd(), '$ARTIFACTS_DIR/.pr-number'); + const prNumber = readFileSync(prPath, 'utf8').trim(); + + const reviewedPath = resolve(baseDir, 'reviewed-prs.json'); + let reviewed = {}; + if (existsSync(reviewedPath)) { + try { + reviewed = JSON.parse(readFileSync(reviewedPath, 'utf8')); + } catch { + reviewed = {}; + } + } + + reviewed[prNumber] = { + reviewed_at: new Date().toISOString(), + gate_verdict: gate.verdict, + run_id: '$WORKFLOW_ID', + }; + + writeFileSync(reviewedPath, JSON.stringify(reviewed, null, 2) + '\n'); + console.log(`Recorded review of PR #${prNumber} (gate: ${gate.verdict})`); + + # ═══════════════════════════════════════════════════════════════ + # PHASE 6: FINAL REPORT (whichever branch ran) + # ═══════════════════════════════════════════════════════════════ + + - id: report + command: maintainer-review-report + depends_on: [record-review] + context: fresh diff --git a/.archon/workflows/maintainer/maintainer-standup.yaml b/.archon/workflows/maintainer/maintainer-standup.yaml new file mode 100644 index 0000000000..9382ce0887 --- /dev/null +++ b/.archon/workflows/maintainer/maintainer-standup.yaml @@ -0,0 +1,162 @@ +name: maintainer-standup +description: | + Use when: Maintainer wants their morning briefing — what changed on dev, + what's in the review queue, what to focus on today across PRs and issues. + Triggers: "morning standup", "maintainer standup", "what's new today", + "daily brief", "morning brief", "what should i work on today", + "start my day". + Does: Pulls latest dev, fetches all open PRs and assigned issues, cross- + references against direction.md to flag polite-decline candidates, + compares against prior run state to surface progress (merged, closed, + what you shipped), produces a prioritized P1-P4 brief. Saves dated + brief + state for next-run continuity. + NOT for: Fixing issues (use archon-fix-github-issue), reviewing a specific + PR (use archon-comprehensive-pr-review), repo-wide triage automation + (use repo-triage). + +provider: claude +model: sonnet + +worktree: + enabled: false # Live checkout — needs to git pull and read .archon/maintainer-standup/ + +nodes: + # ── Layer 0: gather facts in parallel ── + + - id: git-status + script: maintainer-standup-git-status + runtime: bun + timeout: 60000 + + - id: gh-data + script: maintainer-standup-gh-data + runtime: bun + timeout: 180000 + + - id: read-context + script: maintainer-standup-read-context + runtime: bun + timeout: 10000 + + # ── Layer 1: synthesize the brief ── + + - id: synthesize + command: maintainer-standup + depends_on: [git-status, gh-data, read-context] + output_format: + type: object + properties: + brief_markdown: + type: string + description: Human-readable maintainer brief in markdown, with P1-P4 sections. + next_state: + type: object + description: Carry-over state for tomorrow's run. + properties: + last_run_at: + type: string + description: ISO-8601 timestamp of this run. + last_dev_sha: + type: string + description: origin/dev SHA at the end of this run. + carry_over: + type: array + description: Items still pending from previous runs (or surfaced this run). + items: + type: object + properties: + kind: + type: string + enum: [pr, issue, task, direction_question] + id: + type: string + description: PR/issue number as string, or task identifier. + note: + type: string + description: Why this is being carried over. + first_seen: + type: string + description: ISO-8601 date when this item first appeared in carry_over (preserved across runs). + required: [kind, id, note, first_seen] + observed_prs: + type: array + description: Snapshot of ALL currently-open PRs, used to detect resolved/new PRs next run. + items: + type: object + properties: + number: + type: number + title: + type: string + required: [number, title] + observed_issues: + type: array + description: Snapshot of currently-tracked issues (assigned + recent unlabeled). + items: + type: object + properties: + number: + type: number + title: + type: string + required: [number, title] + direction_questions: + type: array + description: New "we don't have a stance on this" questions surfaced this run. + items: + type: string + required: [last_run_at, last_dev_sha, carry_over, observed_prs, observed_issues, direction_questions] + required: [brief_markdown, next_state] + + # ── Layer 2: persist state and dated brief ── + + - id: persist + depends_on: [synthesize] + runtime: bun + timeout: 15000 + script: | + import { writeFileSync, mkdirSync, existsSync } from 'node:fs'; + import { resolve } from 'node:path'; + + // JSON is valid JS expression syntax — substitute directly without a + // template literal. Wrapping in String.raw breaks if the output contains + // backticks (e.g. markdown code spans inside brief_markdown). + const data = $synthesize.output; + + // Local YYYY-MM-DD (sv-SE locale gives ISO format in local time) so a + // late-night run doesn't write tomorrow's UTC date and confuse next-run + // recent_briefs lookups. + const date = new Date().toLocaleDateString('sv-SE'); + + try { + const baseDir = resolve(process.cwd(), '.archon/maintainer-standup'); + if (!existsSync(baseDir)) mkdirSync(baseDir, { recursive: true }); + + writeFileSync( + resolve(baseDir, 'state.json'), + JSON.stringify(data.next_state, null, 2) + '\n', + ); + + const briefsDir = resolve(baseDir, 'briefs'); + if (!existsSync(briefsDir)) mkdirSync(briefsDir, { recursive: true }); + const briefPath = resolve(briefsDir, `${date}.md`); + writeFileSync(briefPath, data.brief_markdown); + + console.log(JSON.stringify({ + date, + state_path: '.archon/maintainer-standup/state.json', + brief_path: `.archon/maintainer-standup/briefs/${date}.md`, + })); + } catch (err) { + // Synthesis (Sonnet, ~5 min) is the expensive part. If persist fails + // (disk full, read-only fs, permission), dump the brief + state to + // stderr so the run isn't a total loss — they're recoverable from logs. + process.stderr.write(`PERSIST FAILED: ${err.message}\n`); + process.stderr.write('--- BEGIN brief_markdown (recoverable from logs) ---\n'); + process.stderr.write(data.brief_markdown + '\n'); + process.stderr.write('--- END brief_markdown ---\n'); + process.stderr.write('--- BEGIN next_state (recoverable from logs) ---\n'); + process.stderr.write(JSON.stringify(data.next_state, null, 2) + '\n'); + process.stderr.write('--- END next_state ---\n'); + process.exit(1); + } diff --git a/.archon/workflows/repo-triage.yaml b/.archon/workflows/maintainer/repo-triage.yaml similarity index 100% rename from .archon/workflows/repo-triage.yaml rename to .archon/workflows/maintainer/repo-triage.yaml diff --git a/.archon/workflows/e2e-claude-smoke.yaml b/.archon/workflows/test-workflows/e2e-claude-smoke.yaml similarity index 100% rename from .archon/workflows/e2e-claude-smoke.yaml rename to .archon/workflows/test-workflows/e2e-claude-smoke.yaml diff --git a/.archon/workflows/e2e-codex-smoke.yaml b/.archon/workflows/test-workflows/e2e-codex-smoke.yaml similarity index 100% rename from .archon/workflows/e2e-codex-smoke.yaml rename to .archon/workflows/test-workflows/e2e-codex-smoke.yaml diff --git a/.archon/workflows/e2e-deterministic.yaml b/.archon/workflows/test-workflows/e2e-deterministic.yaml similarity index 100% rename from .archon/workflows/e2e-deterministic.yaml rename to .archon/workflows/test-workflows/e2e-deterministic.yaml diff --git a/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml b/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml new file mode 100644 index 0000000000..eefae0d35a --- /dev/null +++ b/.archon/workflows/test-workflows/e2e-minimax-smoke.yaml @@ -0,0 +1,126 @@ +# E2E smoke test — Minimax M2.7 via the Pi community provider +# Verifies: Pi can resolve and call Minimax M2.7 using the user's local +# `pi /login` credentials (api_key entry in ~/.pi/agent/auth.json). +# Design: mirrors e2e-pi-smoke.yaml structure. Three nodes verify +# (1) the model responds at all, (2) it can self-identify as Minimax, +# (3) it can produce parseable JSON via output_format (best-effort on Pi). +# The final bash node fails fast if any signal is missing. +# Auth: requires a `minimax` entry in ~/.pi/agent/auth.json. No env vars. +name: e2e-minimax-smoke +description: | + Use when: Verifying that Minimax M2.7 loads via the Pi provider with the + user's local Pi auth (api_key in ~/.pi/agent/auth.json). + Triggers: "minimax smoke", "test minimax", "verify minimax", "minimax test". + Does: Sends three tiny prompts to Minimax M2.7 (math, self-identification, + structured JSON), asserts non-empty output and basic plausibility. + NOT for: Production work — connectivity / capability sanity check only. + +provider: pi +model: minimax/MiniMax-M2.7 + +worktree: + enabled: false # Smoke test — no need to isolate + +nodes: + # 1. Connectivity — does Pi resolve the model and stream a response? + - id: hello + prompt: 'What is 2+2? Answer with just the number, nothing else.' + allowed_tools: [] + effort: low + idle_timeout: 60000 + + # 2. Self-identification — INFORMATIONAL ONLY. Do not assert on the result. + # LLMs are unreliable narrators about their own identity, and Pi's system + # prompt mentions OpenAI-codex defaults, which causes Minimax (and likely + # other models) to pattern-match and claim that identity. The real proof + # of routing is in Pi's session jsonl (provider=minimax, real billing). + - id: identify + prompt: 'Without using any tools, on a single short line, tell me which model and provider you are.' + allowed_tools: [] + idle_timeout: 60000 + depends_on: [hello] + + # 3. Structured output — exercises Pi's best-effort output_format path + # (schema appended to prompt + JSON extracted from result text). + # This is the same machinery the maintainer-standup synthesis relies on. + - id: json + prompt: | + Return a JSON object with two fields, no fences and no prose: + - "name": your model name (string) + - "ok": always true (boolean) + allowed_tools: [] + idle_timeout: 60000 + depends_on: [hello] + output_format: + type: object + properties: + name: + type: string + ok: + type: boolean + required: [name, ok] + + # 4. Assertions — fail loudly if any node returned empty / unparseable. + - id: assert + depends_on: [hello, identify, json] + bash: | + math="$hello.output" + ident="$identify.output" + jname="$json.output.name" + jok="$json.output.ok" + + echo "── results ──" + echo "math = $math" + echo "identify = $ident" + echo "json.name = $jname" + echo "json.ok = $jok" + echo "──────────────" + + if [ -z "$math" ] || [ -z "$ident" ]; then + echo "FAIL: empty output from hello or identify node" + exit 1 + fi + if [ -z "$jname" ] || [ -z "$jok" ]; then + echo "FAIL: structured-output fields missing — Pi best-effort JSON parse failed" + exit 1 + fi + + # Real proof of routing: Pi writes a session jsonl per call. Find ALL + # session jsonls modified in the last 10 minutes (generous window — + # smoke's three Pi nodes + assert can collectively take several + # minutes on a slow network; capped at 10 to avoid matching old runs). + # Check each for the minimax routing signal — any one matching is + # sufficient evidence. This avoids: + # - brittle path-encoding assumptions about Pi's per-cwd session dir, + # - non-deterministic `head -1` over `find` output (find doesn't + # guarantee any order), + # - JSON field-order brittleness in a single combined regex + # (`provider` may appear before or after `modelId` in the jsonl). + recent_sessions=$(find "$HOME/.pi/agent/sessions" -name '*.jsonl' -mmin -10 -print 2>/dev/null) + if [ -z "$recent_sessions" ]; then + echo "FAIL: no Pi session jsonl modified in the last 10 minutes" + exit 1 + fi + + matched="" + while IFS= read -r session; do + # Two separate greps for order-independence — JSON field ordering + # isn't part of Pi's contract, so a single regex with `.*` between + # the two fields would silently false-FAIL if Pi ever reorders. + if grep -q '"provider":"minimax"' "$session" \ + && grep -q '"modelId":"MiniMax-M2.7"' "$session"; then + matched="$session" + break + fi + done <<< "$recent_sessions" + + if [ -n "$matched" ]; then + echo "PASS: Pi session log confirms provider=minimax, modelId=MiniMax-M2.7" + echo " session: $matched" + else + echo "FAIL: no recent Pi session log confirmed minimax routing — possible misroute" + echo " checked sessions:" + echo "$recent_sessions" | sed 's/^/ /' + exit 1 + fi + echo "PASS: smoke complete" diff --git a/.archon/workflows/e2e-mixed-providers.yaml b/.archon/workflows/test-workflows/e2e-mixed-providers.yaml similarity index 100% rename from .archon/workflows/e2e-mixed-providers.yaml rename to .archon/workflows/test-workflows/e2e-mixed-providers.yaml diff --git a/.archon/workflows/e2e-pi-all-nodes-smoke.yaml b/.archon/workflows/test-workflows/e2e-pi-all-nodes-smoke.yaml similarity index 100% rename from .archon/workflows/e2e-pi-all-nodes-smoke.yaml rename to .archon/workflows/test-workflows/e2e-pi-all-nodes-smoke.yaml diff --git a/.archon/workflows/e2e-pi-smoke.yaml b/.archon/workflows/test-workflows/e2e-pi-smoke.yaml similarity index 100% rename from .archon/workflows/e2e-pi-smoke.yaml rename to .archon/workflows/test-workflows/e2e-pi-smoke.yaml diff --git a/.archon/workflows/e2e-worktree-disabled.yaml b/.archon/workflows/test-workflows/e2e-worktree-disabled.yaml similarity index 100% rename from .archon/workflows/e2e-worktree-disabled.yaml rename to .archon/workflows/test-workflows/e2e-worktree-disabled.yaml diff --git a/.claude/skills/archon/SKILL.md b/.claude/skills/archon/SKILL.md index f36e7391b8..c844ad0eb9 100644 --- a/.claude/skills/archon/SKILL.md +++ b/.claude/skills/archon/SKILL.md @@ -37,17 +37,60 @@ Determine the user's intent and dispatch to the appropriate guide: | **Config / settings** | Read `guides/config.md` — interactive config editor | | **Initialize .archon/ in a repo** | Read `references/repo-init.md` | | **Create a workflow** | Read `references/workflow-dag.md` — the complete workflow authoring guide | +| **Quick parameter lookup — which field works on which node type** | Read `references/parameter-matrix.md` — master matrix, intent-based lookup, silent-failure catalog | | **Advanced features (hooks/MCP/skills)** | Read `references/dag-advanced.md` | | **Create a command file** | Read `references/authoring-commands.md` | | **Variable substitution reference** | Read `references/variables.md` | | **CLI command reference** | Read `references/cli-commands.md` | | **Run an interactive workflow** | Read `references/interactive-workflows.md` — transparent relay protocol | +| **Workflow good practices / anti-patterns** | Read `references/good-practices.md` — read before designing a non-trivial workflow | +| **Troubleshoot a failing / stuck workflow** | Read `references/troubleshooting.md` — log locations, common failure modes | | **Run a workflow (default)** | Continue with "Running Workflows" below | If the intent is ambiguous, ask the user to clarify. --- +## Richer Context: [archon.diy](https://archon.diy) + +The references in this skill are a distilled subset. The full, canonical docs live at **[archon.diy](https://archon.diy)** (Starlight site from `packages/docs-web/`). If the skill's reference pages don't cover what you need — an edge case, a worked example, a diagram, a deeper section on a feature — fetch the matching page from archon.diy. + +### When to reach for the live docs + +- You need an end-to-end example that's longer than what the skill shows (e.g. full patterns for hooks, MCP config, sandbox schema, approval flows) +- You're explaining a concept to the user and want the most readable framing (the `book/` series is written as a tutorial, not a reference) +- You hit a feature the skill only mentions in passing (e.g. `agents:` inline sub-agents, advanced Codex options, the full SyncHookJSONOutput schema) +- The user asks "where is this documented?" — point them at the archon.diy URL, not a skill file path + +### URL map + +| Topic | URL | +|-------|-----| +| Landing + install | [archon.diy](https://archon.diy) | +| Getting started (installation, quick start, concepts) | [archon.diy/getting-started/](https://archon.diy/getting-started/overview/) | +| The book (tutorial-style walkthrough) | [archon.diy/book/](https://archon.diy/book/) | +| Workflow authoring guide | [archon.diy/guides/authoring-workflows/](https://archon.diy/guides/authoring-workflows/) | +| Command authoring guide | [archon.diy/guides/authoring-commands/](https://archon.diy/guides/authoring-commands/) | +| Node type guides | [archon.diy/guides/loop-nodes/](https://archon.diy/guides/loop-nodes/), [/approval-nodes/](https://archon.diy/guides/approval-nodes/), [/script-nodes/](https://archon.diy/guides/script-nodes/) | +| Per-node features (Claude only) | [/hooks/](https://archon.diy/guides/hooks/), [/mcp-servers/](https://archon.diy/guides/mcp-servers/), [/skills/](https://archon.diy/guides/skills/) | +| Global workflows/commands/scripts | [archon.diy/guides/global-workflows/](https://archon.diy/guides/global-workflows/) | +| Variables reference | [archon.diy/reference/variables/](https://archon.diy/reference/variables/) | +| CLI reference | [archon.diy/reference/cli/](https://archon.diy/reference/cli/) | +| Security model (env, sandbox, target-repo `.env` stripping) | [archon.diy/reference/security/](https://archon.diy/reference/security/) | +| Architecture | [archon.diy/reference/architecture/](https://archon.diy/reference/architecture/) | +| Configuration (`.archon/config.yaml` full schema) | [archon.diy/reference/configuration/](https://archon.diy/reference/configuration/) | +| Troubleshooting | [archon.diy/reference/troubleshooting/](https://archon.diy/reference/troubleshooting/) | +| Adapter setup (Slack/Telegram/GitHub/Web/Discord/Gitea/GitLab) | [archon.diy/adapters/](https://archon.diy/adapters/) | +| Deployment (Docker, cloud, Windows) | [archon.diy/deployment/](https://archon.diy/deployment/) | + +URL shape is `archon.diy/
//` — the paths mirror the filenames under `packages/docs-web/src/content/docs/`. + +### Precedence + +This skill's reference pages are the primary source for routine workflow authoring, CLI use, and setup. Reach for archon.diy when the skill is incomplete for your case — don't go to the live docs first by default (skill refs load into context faster and are tuned for agents). + +--- + ## Running Workflows ### Core Command @@ -152,9 +195,9 @@ nodes: depends_on: [first-node] ``` -### Four Node Types +### Node Types -Each node has exactly ONE of: `command`, `prompt`, `bash`, or `loop`. +Each node has exactly ONE of: `command`, `prompt`, `bash`, `script`, `loop`, `approval`, or `cancel`. **Command node** — runs a `.archon/commands/*.md` file: ```yaml @@ -177,6 +220,22 @@ Each node has exactly ONE of: `command`, `prompt`, `bash`, or `loop`. timeout: 15000 ``` +**Script node** — TypeScript/JavaScript (via `bun`) or Python (via `uv`), no AI, stdout captured as output: +```yaml +- id: transform + script: | + const raw = process.argv.slice(2).join(' ') || '{}'; + console.log(JSON.stringify({ parsed: JSON.parse(raw) })); + runtime: bun # 'bun' (.ts/.js) or 'uv' (.py) — REQUIRED + timeout: 30000 # Optional, ms, default 120000 + +# Or reference a named script from .archon/scripts/ or ~/.archon/scripts/ +- id: analyze + script: analyze-metrics # loads .archon/scripts/analyze-metrics.py + runtime: uv + deps: ["pandas>=2.0"] # Optional, uv only — 'uv run --with ' +``` + **Loop node** — iterates AI prompt until completion: ```yaml - id: implement @@ -188,6 +247,29 @@ Each node has exactly ONE of: `command`, `prompt`, `bash`, or `loop`. until_bash: "bun run test" # Optional: exit 0 = done ``` +**Approval node** — pauses the workflow for human review. Requires `interactive: true` at the workflow level for Web UI delivery: +```yaml +interactive: true # workflow level — required for web UI + +nodes: + - id: review-gate + approval: + message: "Review the plan above before proceeding." + capture_response: true # Optional: user's comment → $review-gate.output + on_reject: # Optional: AI rework on rejection instead of cancel + prompt: "Revise based on feedback: $REJECTION_REASON" + max_attempts: 3 # Range 1-10, default 3 + depends_on: [plan] +``` + +**Cancel node** — terminates the workflow with a reason. Typically gated with `when:`: +```yaml +- id: stop-if-unsafe + cancel: "Refusing to proceed: input flagged UNSAFE." + depends_on: [classify] + when: "$classify.output != 'SAFE'" +``` + For the full authoring guide with all fields, conditions, trigger rules, and patterns: Read `references/workflow-dag.md` ### Creating a Command File @@ -230,7 +312,7 @@ For details: Read `references/dag-advanced.md` ### Example Files -- `examples/dag-workflow.yaml` — workflow with conditions, bash nodes, structured output +- `examples/dag-workflow.yaml` — workflow with conditions, bash + script + loop nodes, structured output - `examples/command-template.md` — Command file skeleton with all variables --- diff --git a/.claude/skills/archon/examples/dag-workflow.yaml b/.claude/skills/archon/examples/dag-workflow.yaml index 5e15f4c77c..50fcbdada1 100644 --- a/.claude/skills/archon/examples/dag-workflow.yaml +++ b/.claude/skills/archon/examples/dag-workflow.yaml @@ -1,7 +1,8 @@ -# Example: Workflow with all four node types +# Example: Workflow demonstrating multiple node types # -# Demonstrates: bash nodes, structured output, when: conditions, -# trigger_rule, per-node model, context: fresh, loop nodes, and output substitution. +# Demonstrates: bash nodes, script nodes (TypeScript via bun), structured output, +# when: conditions, trigger_rule, per-node model, context: fresh, loop nodes, +# and output substitution. # # IMPORTANT: This is a reference example. Design your actual workflow # around the user's specific needs — the number of nodes, their types, @@ -42,6 +43,26 @@ nodes: fi timeout: 5000 + # ── SCRIPT NODE: TypeScript (bun runtime), no AI, stdout captured as output ── + # Deterministic parsing the shell would mangle — extracts labels cleanly as JSON. + # + # NOTE: `$fetch-issue.output` is substituted *raw* into the script body (no shell + # quoting — see reference/variables.md). JSON is valid JS expression syntax — + # assign directly without String.raw or JSON.parse. String.raw breaks if the + # output contains backticks (e.g. markdown code spans in AI-generated content). + - id: extract-labels + script: | + try { + const issue = $fetch-issue.output; + const labels = (issue.labels ?? []).map((l) => l.name); + console.log(JSON.stringify({ labels, count: labels.length })); + } catch { + console.log(JSON.stringify({ labels: [], count: 0 })); + } + runtime: bun + depends_on: [fetch-issue] + timeout: 10000 + # ── PROMPT NODE: Inline AI prompt with structured output ── - id: classify prompt: | diff --git a/.claude/skills/archon/references/authoring-commands.md b/.claude/skills/archon/references/authoring-commands.md index 0b1240da6b..603dd3e4a3 100644 --- a/.claude/skills/archon/references/authoring-commands.md +++ b/.claude/skills/archon/references/authoring-commands.md @@ -4,14 +4,29 @@ Commands are plain Markdown files containing AI prompt templates. They are the a ## File Location +Commands are discovered from three scopes, highest-precedence first: + ``` -.archon/commands/ -├── my-command.md # Custom command -├── review-code.md # Another custom command -└── defaults/ # Optional: override bundled defaults - └── archon-assist.md # Overrides the bundled archon-assist +/.archon/commands/ # 1. Repo-scoped (wins) +├── my-command.md # Custom command for this repo +├── archon-assist.md # Overrides the bundled archon-assist +└── triage/ # Subfolders allowed, 1 level deep + └── review.md # Resolves as 'review', not 'triage/review' + +~/.archon/commands/ # 2. Home-scoped (user-level, shared across all repos) +├── review-checklist.md # Personal helper available in every repo +└── pr-style-guide.md + + # 3. Shipped with Archon (archon-assist, etc.) ``` +**Resolution rules:** + +- Filename-without-extension is the command name (e.g. `my-command.md` → `my-command`). +- 1-level subfolders are supported for grouping; resolution is still by filename (`triage/review.md` → `review`). +- Repo scope overrides home scope overrides bundled, by name. +- Duplicate basenames **within a scope** (e.g. two different `review.md` files in `triage/` and `security/`) are a user error — keep names unique within each scope. + Commands are referenced by name (without `.md`) in workflow YAML files. ## File Format @@ -78,11 +93,14 @@ Command names must: ## Discovery and Priority When a workflow references `command: my-command`, Archon searches in this order: -1. `.archon/commands/my-command.md` (repo custom) -2. `.archon/commands/defaults/my-command.md` (repo default overrides) + +1. `/.archon/commands/my-command.md` (repo scope) +2. `~/.archon/commands/my-command.md` (home scope — shared across every repo on the machine) 3. Bundled defaults (shipped with Archon) -First match wins. To override a bundled command, create a file with the same name in your repo. +First match wins. To override a bundled command, drop a file with the same name at either scope. To override a home-scoped command for a specific repo, drop a file with the same name in that repo's `.archon/commands/`. + +> **Web UI note**: Home-scoped commands appear in the workflow builder's node palette under a dedicated "Global (~/.archon/commands/)" section, distinct from project and bundled entries. ## Referencing Commands from Workflows diff --git a/.claude/skills/archon/references/cli-commands.md b/.claude/skills/archon/references/cli-commands.md index 157eacb713..0cc1a0ee06 100644 --- a/.claude/skills/archon/references/cli-commands.md +++ b/.claude/skills/archon/references/cli-commands.md @@ -32,7 +32,7 @@ archon workflow run archon-fix-github-issue --resume | `--branch ` / `-b` | Branch name for worktree. Reuses existing worktree if healthy | | `--from ` / `--from-branch ` | Start-point branch for new worktree (default: repo default branch) | | `--no-worktree` | Skip isolation — run in the live checkout | -| `--resume` | Resume the last failed run of this workflow (skips completed steps/nodes) | +| `--resume` | Resume the last failed run of this workflow at this cwd (skips completed nodes) | | `--cwd ` | Working directory override | **Flag conflicts** (errors): @@ -42,6 +42,87 @@ archon workflow run archon-fix-github-issue --resume **Default behavior** (no flags): Auto-creates a worktree with branch name `{workflow-name}-{timestamp}`. +**Auto-resume without `--resume`**: If a prior invocation of the same workflow at the same cwd failed, the next invocation automatically skips completed nodes. `--resume` is only needed when you want to force resume a specific failed run or to reuse the worktree from that run. + +### `archon workflow status` + +Show the currently running workflow (if any) with its run ID, state, and last activity. + +```bash +archon workflow status +archon workflow status --json # Machine-readable output +``` + +### `archon workflow approve [comment]` + +Approve a paused approval-node workflow. Auto-resumes the workflow. + +```bash +archon workflow approve abc123 +archon workflow approve abc123 --comment "Plan looks good" +archon workflow approve abc123 "Plan looks good" # positional form +``` + +For interactive loop nodes, the comment becomes `$LOOP_USER_INPUT` on the next iteration. For approval nodes with `capture_response: true`, the comment becomes `$.output` for downstream nodes. + +### `archon workflow reject [reason]` + +Reject a paused approval gate. Without `on_reject` on the node, cancels the workflow. With `on_reject`, runs the rework prompt with `$REJECTION_REASON` substituted and re-pauses. + +```bash +archon workflow reject abc123 +archon workflow reject abc123 --reason "Plan misses test coverage" +archon workflow reject abc123 "Plan misses test coverage" +``` + +### `archon workflow abandon ` + +Mark a non-terminal workflow run as cancelled. Use when a `running` row is stuck after a server crash or when you want to discard a paused run without rejecting. This does NOT kill an in-flight subprocess — it only transitions the DB row. + +```bash +archon workflow abandon abc123 +``` + +> **There is no `archon workflow cancel` CLI subcommand.** To actively cancel a running workflow (terminate its subprocess), use the chat slash command `/workflow cancel ` on the platform that started it (Web UI, Slack, Telegram, etc.), or the Cancel button on the Web UI dashboard. The CLI only offers `abandon`, which is the right tool for orphan cleanup but does not interrupt a live subprocess. + +### `archon workflow resume [message]` + +Explicitly re-run a failed run. Most workflows auto-resume without this — use it when you want to force a specific run ID. + +```bash +archon workflow resume abc123 +archon workflow resume abc123 "continue with the plan" +``` + +### `archon workflow cleanup [days]` + +**Deletes** old terminal workflow runs (`completed`/`failed`/`cancelled`) from the database for disk hygiene. Does NOT transition `running` rows — use `abandon`/`cancel` for those. + +```bash +archon workflow cleanup # Default: 7 days +archon workflow cleanup 30 # Custom: 30 days +``` + +### `archon workflow event emit --run-id --type [--data ]` + +Emit a workflow event to a running workflow. Used inside loop prompts to signal state (e.g. "checkpoint written") for observability. Rarely invoked from the shell directly. + +```bash +archon workflow event emit --run-id abc123 --type checkpoint --data '{"step":"plan"}' +``` + +### `archon continue [flags] [message]` + +Continue work on a branch with prior context. Defaults to `archon-assist`; use `--workflow` to pick a different workflow. Useful for iterative sessions on the same worktree without typing the full `workflow run` incantation. + +```bash +archon continue feat/auth "Add password reset" +archon continue feat/auth --workflow archon-feature-development "Continue from step 3" +archon continue feat/auth --no-context "Start fresh without loading prior artifacts" +``` + +Flags: `--workflow `, `--no-context`. + ## Isolation Commands ### `archon isolation list` @@ -59,11 +140,20 @@ Outputs: branch name, path, workflow type, platform, last activity age. Ghost en Remove stale worktree environments. ```bash -archon isolation cleanup # Default: 7 days -archon isolation cleanup 14 # Custom: 14 days -archon isolation cleanup --merged # Remove branches merged into main (+ remote branches) +archon isolation cleanup # Default: 7 days +archon isolation cleanup 14 # Custom: 14 days +archon isolation cleanup --merged # Also remove worktrees whose branches merged into main (deletes remote branches too) +archon isolation cleanup --merged --include-closed # Also remove worktrees whose PRs were closed without merging ``` +**Flags:** + +| Flag | Description | +|------|-------------| +| `[days]` | Positional — age threshold in days. Environments untouched for longer than this are removed. Default: 7 | +| `--merged` | Union of three signals — ancestry (`git branch --merged`), patch equivalence (`git cherry`), and PR state (`gh`) — safely catches squash-merges | +| `--include-closed` | With `--merged`, also remove worktrees whose PRs were closed (abandoned, not merged) | + ## Validate Commands ### `archon validate workflows [name]` diff --git a/.claude/skills/archon/references/dag-advanced.md b/.claude/skills/archon/references/dag-advanced.md index 4add35d8f7..63a83e9101 100644 --- a/.claude/skills/archon/references/dag-advanced.md +++ b/.claude/skills/archon/references/dag-advanced.md @@ -1,6 +1,6 @@ # Advanced Features: Hooks, MCP, Skills, Retry -These features are available on **command and prompt nodes** (hooks, MCP, skills, tool restrictions) and **command, prompt, and bash nodes** (retry, output_format). Loop nodes do not support these features (`retry` on loop nodes is a hard error; others are silently ignored). +These features are available on **command and prompt nodes** (hooks, MCP, skills, tool restrictions, `output_format`, `agents`, Claude SDK options) and **command, prompt, bash, and script nodes** (retry). Loop nodes do not support these features (`retry` on loop nodes is a hard error; others are silently ignored). Bash and script nodes silently ignore AI-specific fields (a loader warning lists the ignored fields). ## Provider Compatibility diff --git a/.claude/skills/archon/references/good-practices.md b/.claude/skills/archon/references/good-practices.md new file mode 100644 index 0000000000..e731a2583d --- /dev/null +++ b/.claude/skills/archon/references/good-practices.md @@ -0,0 +1,241 @@ +# Workflow Good Practices and Anti-Patterns + +Guidance for authoring workflows that survive first contact with a real codebase. Written for an agent or human writing their first non-trivial workflow. + +## Good Practices + +### 1. Use deterministic nodes for deterministic work + +AI nodes are expensive, non-reproducible, and can hallucinate. Use `bash:` or `script:` for anything that has a right answer a computer can produce. + +- **Run tests** with `bash: "bun run test"`, not `prompt: "run the tests and tell me if they passed"`. +- **Parse JSON** with `script:` (bun/uv), not a `prompt:` that re-derives structure from free text. +- **Read files with known paths** via `bash: "cat path/to/file"` or `Read` in an AI node where the agent actually needs to reason about the content. +- **Git state checks** (current branch, uncommitted changes, merge-base) → `bash:`. + +### 2. Use `output_format` for every node whose output downstream `when:` reads + +`when:` conditions do best-effort JSON parsing on `$nodeId.output` for `.field` access. If the upstream node doesn't enforce a shape, you're pattern-matching free-form AI text — fragile. + +```yaml +# GOOD +- id: classify + prompt: "Classify as BUG or FEATURE" + output_format: # enforces the JSON shape + type: object + properties: + type: { type: string, enum: [BUG, FEATURE] } + required: [type] + +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output.type == 'BUG'" # safe field access + +# BAD +- id: classify + prompt: "Is this a bug or a feature?" + # no output_format; AI might reply "it looks like a bug", "BUG", or "This is a bug.\n\n..." + +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output == 'BUG'" # fragile string match +``` + +### 3. `trigger_rule: none_failed_min_one_success` after conditional branches + +After `when:`-gated branches, the downstream merge node will see one or more **skipped** dependencies. Skipped ≠ success. Default `all_success` fails. + +```yaml +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output.type == 'BUG'" + +- id: plan + command: plan-feature + depends_on: [classify] + when: "$classify.output.type == 'FEATURE'" + +- id: implement + command: implement + depends_on: [investigate, plan] + trigger_rule: none_failed_min_one_success # CORRECT — exactly one ran + # trigger_rule: all_success ← would fail here (one dep skipped) +``` + +Use `one_success` when any dep succeeding is enough; `none_failed_min_one_success` when no dep should have failed AND at least one must have succeeded; `all_done` for "run cleanup regardless" patterns with `cancel:` or notification nodes. + +### 4. `context: fresh` requires artifacts for state passing + +A node with `context: fresh` starts with no memory of prior nodes in the same workflow. The only way state moves is via files. Default is `fresh` for parallel layers and `shared` for sequential — explicit `context: fresh` is common when you want cost isolation. + +```yaml +- id: investigate + command: investigate-bug + # Investigator WRITES to $ARTIFACTS_DIR/investigation.md + +- id: implement + command: implement-fix + depends_on: [investigate] + context: fresh + # Implementer MUST read $ARTIFACTS_DIR/investigation.md — it has no memory + # of what the investigator found. +``` + +Command files should lead with "read artifacts from `$ARTIFACTS_DIR/...`" when they're downstream of a fresh node. This is the single biggest quality lever on multi-node workflows. + +### 5. Cheap models for glue, strong models for substance + +Classification, routing, formatting, and short summaries don't need Opus. Use `model: haiku` for these and reserve `sonnet`/`opus` for the nodes that actually produce code or long-form analysis. Combined with `allowed_tools: []` on pure-text nodes, this cuts cost dramatically. + +```yaml +- id: classify + prompt: "Classify this issue" + model: haiku # fast + cheap + allowed_tools: [] # no tool overhead + output_format: { ... } + +- id: implement + command: implement-fix + model: sonnet # where the thinking happens +``` + +### 6. Write the workflow description for routing + +Archon's orchestrator routes user intent to workflows by description. Write descriptions that make routing obvious. + +- Start with the imperative action: "Fix a GitHub issue end-to-end", "Generate a Remotion video composition". +- Mention triggers: "Use when the user asks to review a PR", "Use when there's a failing test run". +- Mention what it does NOT do: "Does not create a PR — use `archon-plan-to-pr` for that". + +### 7. Validate before shipping + +Never declare a workflow "done" without: + +```bash +archon validate workflows # YAML + DAG structure + resource refs +``` + +This checks: YAML syntax, node ID uniqueness, no cycles, all `depends_on` exist, all `$nodeId.output` refs point to known nodes, all `command:` files exist, all `mcp:` configs parse, all `skills:` directories exist, provider/model compatibility, named script existence, runtime availability. Fix everything it reports before first run. + +For brand-new workflows, also: +1. Run once against a trivial input (`archon workflow run my-workflow --branch test/sanity "hello"`) +2. Check the run log at `~/.archon/workspaces///logs/.jsonl` +3. Check artifacts at `~/.archon/workspaces///artifacts/runs//` + +See `references/troubleshooting.md` for how to read those. + +### 8. Design the artifact chain before writing command files + +In a multi-node workflow, each node's artifact IS the specification for the next node. Before writing any command body, map out: + +| Node | Reads | Writes | +|------|-------|--------| +| `investigate-issue` | GitHub issue via `gh` | `$ARTIFACTS_DIR/issues/issue-{n}.md` | +| `implement-issue` | Artifact from `investigate-issue` | Code files, tests | +| `create-pr` | Git diff | GitHub PR, `$ARTIFACTS_DIR/pr-body.md` | + +If a downstream agent can't execute from just its artifact, the artifact is incomplete. This is the single most common failure mode in multi-node workflows. + +### 9. Keep workflows reversible + +Use `worktree.enabled: true` at the workflow level for anything that modifies the codebase. The CLI `--no-worktree` flag will hard-error, forcing users into isolation. The cost is a one-time cp of the worktree; the benefit is never having a failed workflow corrupt a live checkout. + +For read-only workflows (triage, reporting, code analysis), pin `worktree.enabled: false` instead — saves the worktree setup cost. + +--- + +## Anti-Patterns + +### ❌ Asking AI to run deterministic checks + +```yaml +# BAD +- id: test + prompt: "Run bun run test and tell me if it passed" + +# GOOD +- id: test + bash: "bun run test 2>&1" + +- id: react-to-tests + prompt: "Fix any failures: $test.output" + depends_on: [test] + trigger_rule: all_done # run even if tests failed +``` + +### ❌ Pattern-matching free-form AI output in `when:` + +```yaml +# BAD — brittle +- id: decide + prompt: "Should we proceed? Answer yes or no." +- id: do-thing + depends_on: [decide] + when: "$decide.output == 'yes'" # AI says "Yes!" or "Yes, because..." — no match + +# GOOD +- id: decide + prompt: "Should we proceed?" + output_format: + type: object + properties: { proceed: { type: boolean } } + required: [proceed] +- id: do-thing + depends_on: [decide] + when: "$decide.output.proceed == 'true'" +``` + +### ❌ Commands that assume prior-node memory in a `context: fresh` chain + +```markdown + +Fix the bug we discussed in the investigation phase. + + +Read the investigation at `$ARTIFACTS_DIR/issues/issue-{n}.md`. +Extract the root cause, affected files, and implementation plan. +Implement the changes exactly as specified in the plan. +``` + +### ❌ Long flat layers of AI nodes + +Ten sibling `prompt:` nodes in one layer all depending on one upstream is a $N/run cost bomb and a latency trap. If the work is parallel and similar, use the `agents:` inline sub-agent map-reduce pattern with a cheap model per item and a single stronger reducer. See `references/dag-advanced.md` and the [Inline sub-agents section on archon.diy](https://archon.diy/guides/authoring-workflows/#inline-sub-agents) for a worked example. + +### ❌ Hardcoding secrets in YAML or MCP configs + +Use `$ENV_VAR` expansion in MCP configs and the `env:` block in `.archon/config.yaml` (or Web UI Settings → Projects → Env Vars). See `references/repo-init.md` §Per-Project Env Injection. + +### ❌ `retry` on a loop node + +Loop nodes manage their own iteration via `max_iterations`. Setting `retry:` on a loop is a **hard parse error** — the workflow fails to load. If a loop iteration is flaky, handle it inside the loop prompt (the AI can retry tool calls) or use `until_bash` to gate completion on a deterministic check. + +### ❌ Tiny `max_iterations` on open-ended loops + +A loop with `max_iterations: 3` that's supposed to implement N stories from a PRD will silently stop after 3 iterations and leave the work half-done. Think about the worst case — multi-story PRDs need 10–20, fix-iterate cycles need 5–8, refinement loops need 3–5. + +### ❌ Missing `interactive: true` at workflow level for approval/loop gates on web + +Web UI dispatches non-interactive workflows to a background worker that cannot deliver chat messages. Approval-gate messages and loop `gate_message` prompts will never reach the user. If the workflow has `approval:` nodes OR `loop.interactive: true`, set workflow-level `interactive: true`. + +### ❌ Tool-restricted nodes without the MCP wildcard + +```yaml +# BAD — no tools available, including MCP +- id: analyze + prompt: "Use the Postgres MCP to query users" + mcp: .archon/mcp/postgres.json + allowed_tools: [] # OOPS — disables EVERYTHING, including MCP tools + +# FIXED — Archon auto-adds mcp____* wildcards when mcp: is set, +# so this actually works out of the box. The anti-pattern is forgetting +# and manually adding Read/Write/Bash/etc. when you only want MCP. +- id: analyze + prompt: "Use Postgres MCP to query users" + mcp: .archon/mcp/postgres.json + allowed_tools: [] # correct — MCP tools auto-attached +``` + +Caveat: this only helps Claude. Codex gets MCP config from `~/.codex/config.toml` globally, not per-node. diff --git a/.claude/skills/archon/references/interactive-workflows.md b/.claude/skills/archon/references/interactive-workflows.md index 243cfdb7b0..856d50afd1 100644 --- a/.claude/skills/archon/references/interactive-workflows.md +++ b/.claude/skills/archon/references/interactive-workflows.md @@ -103,4 +103,4 @@ archon workflow reject "reason for rejection" - **Workflow shows `running` for a long time**: The AI is doing research/implementation. Be patient — check again in a few minutes. - **Log file not found**: The log is at `~/.archon/workspaces///logs/.jsonl` -- **User wants to cancel**: Run `archon workflow reject ` or `archon workflow cancel ` +- **User wants to cancel**: Run `archon workflow reject ` to stop at an approval gate, or `archon workflow abandon ` to mark the run cancelled without killing any subprocess. To actively terminate a still-live subprocess, use the chat slash command `/workflow cancel ` on the platform that started it — there is no `archon workflow cancel` CLI subcommand diff --git a/.claude/skills/archon/references/parameter-matrix.md b/.claude/skills/archon/references/parameter-matrix.md new file mode 100644 index 0000000000..2d7fec80ce --- /dev/null +++ b/.claude/skills/archon/references/parameter-matrix.md @@ -0,0 +1,193 @@ +# Parameter Matrix (Quick Reference) + +One-page lookup for Archon workflow parameters: which field works on which node type, how to pick the right parameter for a given intent, and the gotchas that don't fail loudly. + +This is a **lookup reference**. For the full explanation of any field, follow the cross-references at the bottom to the detailed guides. + +## Master Matrix: Parameters × Node Types + +There are seven node types. Exactly one of `command`, `prompt`, `bash`, `script`, `loop`, `approval`, or `cancel` must appear per node. + +| Parameter | command | prompt | bash | script | loop | approval | cancel | +| -------------------------------------------- | :-----: | :-----: | :-----: | :-----: | :--------------------------: | :------------: | :-----: | +| `id` | yes | yes | yes | yes | yes | yes | yes | +| `depends_on` | yes | yes | yes | yes | yes | yes | yes | +| `when` | yes | yes | yes | yes | yes | yes | yes | +| `trigger_rule` | yes | yes | yes | yes | yes | yes | yes | +| `idle_timeout` | yes | yes | ignored (use `timeout`) | ignored (use `timeout`) | yes (per-iter) | yes | yes | +| `timeout` (total, not idle) | — | — | yes | yes | — | — | — | +| `model` / `provider` | yes | yes | ignored | ignored | **ignored at runtime** | ignored | ignored | +| `context: fresh` \| `shared` | yes | yes | ignored | ignored | ignored (use `loop.fresh_context`) | ignored | ignored | +| `output_format` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `allowed_tools` / `denied_tools` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `hooks` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `mcp` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `skills` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `agents` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `retry` | yes | yes | yes | yes | **hard error** | yes (`on_reject`) | yes | +| `effort` / `thinking` / `fallbackModel` / `betas` / `sandbox` / `maxBudgetUsd` / `systemPrompt` | yes | yes | ignored | ignored | ignored | ignored | ignored | +| `bash` / `script` / `runtime` / `deps` | — | — | `bash` required | `script` + `runtime` required | — | — | — | +| `loop` (nested config) | — | — | — | — | **required** | — | — | +| `approval` (nested config) | — | — | — | — | — | **required** | — | +| `cancel` (reason string) | — | — | — | — | — | — | **required** | + +**Reading the matrix:** +- **yes** — field works as expected on this node type. +- **ignored** — field is accepted by the parser but has no effect at runtime. Loader emits a warning (`_node_ai_fields_ignored`). +- **hard error** — workflow fails to load. Only `retry` on a loop node does this. + +Most AI features work on `command` and `prompt` nodes. Loop nodes are thin controllers — the AI fields inside `loop.prompt` are what actually run. `bash` and `script` nodes silently ignore AI fields. `approval` and `cancel` nodes don't invoke AI at all. + +## Parameter Selection by Intent + +Organized by what you're trying to do, not by field name. Useful when you know the outcome you want but aren't sure which parameter gets you there. + +| You want to... | Use | +| ------------------------------------------------ | ------------------------------------------------------------ | +| Control cost per node | `model: haiku`, `maxBudgetUsd: 0.50`, `effort: low` | +| Force pure reasoning (no tools) | `allowed_tools: []` | +| Read-only analysis phase | `denied_tools: [Write, Edit, Bash]` | +| Route based on upstream output | Upstream `output_format: {...}` + downstream `when:` | +| Join after mutually-exclusive routes | `trigger_rule: none_failed_min_one_success` or `one_success` | +| Run two independent branches in parallel | Two nodes with no shared `depends_on` | +| Iterate until tests pass | `loop: {until_bash: "bun run test", max_iterations: N}` | +| Iterate through a backlog without memory bleed | `loop: {fresh_context: true}`, state written to `$ARTIFACTS_DIR` | +| Iterate with human feedback between iterations | `loop: {interactive: true, gate_message: "..."}` + workflow `interactive: true` | +| Single human approval gate | `approval:` node with `on_reject: {prompt, max_attempts}` | +| Fail fast if upstream output is wrong | `cancel:` node with `when:` | +| Enforce a rule on every file edit | `hooks.PostToolUse` with `matcher: "Write\|Edit"` | +| Deny dangerous commands | `hooks.PreToolUse` with `permissionDecision: deny` | +| Give a node domain knowledge | `skills: [skill-name]` | +| Give a node external tools | `mcp: .archon/mcp/server.json` | +| Retry flaky API calls | `retry: {max_attempts: 3, delay_ms: 2000}` | +| Run Python in a node | `script:` node with `runtime: uv`, `deps: [...]` | +| Run TypeScript in a node | `script:` node with `runtime: bun` | +| Mix providers in one workflow | Workflow-level `provider: claude`, per-node `provider: codex` | +| Use a non-default model for one node | Node-level `model:` override | +| Run on a 1M context window | `model: opus[1m]` + `betas: ['context-1m-2025-08-07']` | +| Increase per-iteration timeout on a long loop | `idle_timeout: 600000` on the loop node | +| Pass large artifacts between nodes | Write to `$ARTIFACTS_DIR/...`, read in downstream node | +| Pass small structured data | `output_format` + `$nodeId.output.field` access | +| Block workflow on an external condition | `bash:` polling loop or `approval:` node | +| Spawn parallel sub-tasks inside one node | Inline `agents:` map (see below) | +| Force isolation regardless of CLI flags | Workflow-level `worktree: {enabled: true}` | +| Force live checkout for read-only workflows | Workflow-level `worktree: {enabled: false}` | + +## Silent Failures (what gets ignored without erroring) + +Things that don't fail parsing but don't do what you'd expect: + +1. **`model` / `provider` on a loop node** → silently ignored. Logged as `loop_node_ai_fields_ignored`. The loop is a controller; set model at workflow level or inside the loop prompt body. +2. **`hooks` / `mcp` / `skills` / `output_format` / `allowed_tools` / `denied_tools` on a loop, bash, script, approval, or cancel node** → silently ignored. +3. **`context: fresh` on a loop** → ignored. Use `loop.fresh_context: true` instead. +4. **`output_format` on a bash or script node** → schema is accepted but bash/script output is whatever stdout says; no JSON coercion. +5. **Unknown `$nodeId.output` reference** → resolves to empty string + warning; does not fail the workflow. +6. **Invalid `when:` expression** → node silently skipped (fail-closed). +7. **`allowed_tools` / `denied_tools` on Codex nodes** → ignored. Use Codex CLI config (`~/.codex/config.toml`). +8. **`hooks` on Codex nodes** → ignored + warning logged. +9. **`mcp` or `skills` per-node on Codex** → ignored. Configure globally in `~/.codex/config.toml` or `~/.agents/skills/`. +10. **`trigger_rule: all_success` after `when:`-gated fan-out** → branches that didn't run count as "not succeeded"; the join node will never fire. Use `none_failed_min_one_success` or `one_success`. +11. **Node-level `interactive: true` on an approval node or loop, without workflow-level `interactive: true`** → on the Web UI, gate messages never reach the user. The workflow dispatches to a background worker that can't deliver chat messages. +12. **Missing env var in MCP config** → warning logged, node continues with empty string substitution. +13. **`retry` on a loop node** → this one is a **hard parse error** (not silent). Use the loop's own `max_iterations` and `until_bash` for finish-line detection. +14. **`String.raw\`$nodeId.output\`` in a `script:` body** → silently corrupts when the substituted value contains a backtick (e.g. markdown code spans in AI output or `output_format` payloads). The template literal terminates early, producing a cryptic `Expected ";"` parse error. Use direct assignment instead: `const data = $nodeId.output;` — JSON is valid JS expression syntax and needs no wrapper. + +The pattern across these: if you set an AI feature on a non-AI node, it's silently ignored. Watch loader logs for `_ignored` warnings when debugging. + +## Inline `agents:` (Task-tool sub-agents) + +A node can define named sub-agents that Claude invokes via the `Task` tool. Useful for map-reduce patterns: one node spawns N parallel sub-tasks with a cheap model, then a reducer summarizes. + +```yaml +- id: analysis + prompt: | + For each area of the codebase, delegate to the appropriate sub-agent + via the Task tool. Summarize all findings into a single report. + agents: + security-scanner: # kebab-case id + description: "Scan for common web vulnerabilities" + prompt: "Run OWASP top-10 style checks on the given files" + model: haiku + tools: [Read, Grep, Glob] # tool whitelist for this sub-agent + disallowedTools: [Write, Edit, Bash] + maxTurns: 5 + test-coverage-auditor: + description: "Report untested or weakly-tested surfaces" + prompt: "Identify code paths without corresponding tests" + model: haiku + tools: [Read, Grep, Glob] + skills: [test-coverage-patterns] # skill injection per sub-agent + maxTurns: 5 +``` + +**Fields per agent:** + +| Field | Required | Description | +| ------------------ | :------: | --------------------------------------------------------- | +| `description` | yes | Shown when Claude decides which agent to delegate to | +| `prompt` | yes | System prompt the sub-agent runs under | +| `model` | no | Per-agent model override | +| `tools` | no | Tool whitelist for the sub-agent | +| `disallowedTools` | no | Tool blacklist | +| `skills` | no | Skills to inject into the sub-agent | +| `maxTurns` | no | Max conversation turns for the sub-agent | + +**Naming rule:** lowercase kebab-case. No leading or trailing hyphens, no double hyphens, no digits-only ids. + +**When to use `agents:` vs fan-out at the workflow level:** +- Use `agents:` when the number of sub-tasks is dynamic or decided by the orchestrator node at runtime. +- Use workflow-level fan-out (parallel nodes with `depends_on: [setup]`) when the sub-tasks are known ahead of time and each needs its own artifact. + +See [archon.diy/guides/authoring-workflows/#inline-sub-agents](https://archon.diy/guides/authoring-workflows/#inline-sub-agents) for a worked end-to-end example. + +## Cross-References to Detailed Guides + +Use this matrix to find the right parameter. Use these references for the full explanation of how it works. + +| Topic | Detailed reference | +| ------------------------------------------------ | ----------------------------------------------------------------------- | +| Workflow authoring overview, node base fields | `workflow-dag.md` | +| Loop nodes in depth (completion, session patterns) | `workflow-dag.md` § Loop Nodes | +| Approval / cancel nodes | `workflow-dag.md` § Approval Nodes, § Cancel Nodes | +| Hooks (events, matchers, response shapes) | `dag-advanced.md` § Hooks | +| MCP (transports, env expansion, wildcards) | `dag-advanced.md` § MCP | +| Skills (injection, discovery, combining with MCP) | `dag-advanced.md` § Skills | +| Retry classification (FATAL / TRANSIENT / UNKNOWN) | `dag-advanced.md` § Retry Configuration | +| Variable reference (`$ARGUMENTS`, `$ARTIFACTS_DIR`, etc) | `variables.md` | +| CLI flags and commands | `cli-commands.md` | +| Command file authoring | `authoring-commands.md` | +| Repo initialization, `.archon/config.yaml` schema | `repo-init.md` | +| Good practices and anti-patterns | `good-practices.md` | +| Interactive workflow relay protocol | `interactive-workflows.md` | +| Debugging and log locations | `troubleshooting.md` | +| Full schema reference | [archon.diy/reference/configuration/](https://archon.diy/reference/configuration/) | + +## Providers at a Glance + +| Feature | Claude | Codex | Pi (community) | +| ------------------------------- | :-----------: | :-------------------------------------: | :----------------------------------: | +| `command` / `prompt` / `loop` | yes | yes | yes | +| `bash` / `script` | yes | yes | yes | +| `output_format` | reliable | reliable | best-effort | +| `allowed_tools` / `denied_tools` | yes | ignored (use Codex CLI config) | ignored | +| `hooks` | yes | **ignored + warn** | not available | +| `mcp` (per-node) | yes | global `~/.codex/config.toml` only | not available | +| `skills` (per-node) | yes | global `~/.agents/skills/` only | not available | +| Model naming | `haiku`, `sonnet`, `opus`, `opus[1m]` | Codex model ID (e.g. `gpt-5.2`) | `/` (e.g. `anthropic/claude-opus-4-5`, `openai/gpt-4o`, `groq/llama-3-70b`) | +| `effort` / `thinking` | yes | use `modelReasoningEffort` for reasoning models | via `effort:` (maps to thinking level) | +| Session resume / `--resume` | yes | yes | yes | + +Mixing providers in one workflow: set workflow-level `provider: claude`, then override per-node with `provider: codex` or `provider: pi`. Cross-provider `$nodeId.output` substitution works as expected. + +## Ten Principles for Safe Workflow Design + +1. Always use `--branch ` (or `worktree: {enabled: true}`) for workflows that modify the codebase. +2. Validate before running: `archon validate workflows `. +3. Tier your models. Haiku for routing and glue; Sonnet for reasoning and review; Opus only where the context is deep. +4. Use `output_format` for every node whose output downstream `when:` reads. Never pattern-match free-form AI text. +5. On Ralph-style loops, use `loop.fresh_context: true` and treat `$ARTIFACTS_DIR` as the source of truth. Command bodies should re-read state at the top of every iteration. +6. Use interactive loops for iterative refinement with the human. Use `approval:` nodes for single-point checkpoints. +7. Read-only analysis phases use `denied_tools: [Write, Edit, Bash]`. Separation of concerns. +8. Use `hooks.PostToolUse` to enforce post-change validation (type-check, lint). Tighter feedback loop than end-of-workflow review. +9. Large artifacts go through `$ARTIFACTS_DIR`. Small structured data goes through `$nodeId.output.field`. +10. AI can scaffold a workflow. Only a human can verify it. Read the YAML before running. diff --git a/.claude/skills/archon/references/repo-init.md b/.claude/skills/archon/references/repo-init.md index 66be6375f5..e44907fd2e 100644 --- a/.claude/skills/archon/references/repo-init.md +++ b/.claude/skills/archon/references/repo-init.md @@ -10,14 +10,27 @@ Create the following in your repository root: .archon/ ├── commands/ # Custom command files (.md) ├── workflows/ # Workflow definitions (.yaml) +├── scripts/ # Named scripts for script: nodes (.ts/.js for bun, .py for uv) — optional ├── mcp/ # MCP server config files (.json) — optional -└── config.yaml # Repo-specific configuration — optional +├── state/ # Cross-run workflow state — gitignored, never committed +├── config.yaml # Repo-specific configuration — optional +└── .env # Repo-scoped Archon env (optional; do NOT commit) ``` ```bash -mkdir -p .archon/commands .archon/workflows +mkdir -p .archon/commands .archon/workflows .archon/scripts ``` +**What each directory is for:** + +- `commands/` — Reusable prompt templates used by `command:` workflow nodes. Committed to git. +- `workflows/` — YAML workflow definitions. Committed to git. +- `scripts/` — Named TypeScript/JavaScript (bun) or Python (uv) scripts referenced by `script:` nodes. Extension determines runtime: `.ts`/`.js` → bun, `.py` → uv. Committed to git. +- `mcp/` — MCP server JSON configs. Usually checked in with `$ENV_VAR` references; avoid hardcoding secrets. Some teams gitignore this and rely entirely on env expansion. +- `state/` — Workflow-written cross-run state (e.g. the `repo-triage` dedup log). **Always gitignore** — these are runtime artifacts, not source. +- `config.yaml` — Repo-specific defaults (assistant, worktree settings, etc.). Committed to git. +- `.env` — Repo-scoped Archon env (loaded with `override: true` at boot). **Do NOT commit.** This is different from the target repo's top-level `.env` — that file belongs to the target project, and Archon strips its auto-loaded keys from subprocess env before spawning AI to prevent leakage. See **Three-Path Env Model** below. + ## Minimal config.yaml Create `.archon/config.yaml` only if you need to override defaults: @@ -52,11 +65,59 @@ Archon ships with built-in commands and workflows (like `archon-assist`, `archon Add to your `.gitignore`: ```gitignore -# Archon runtime artifacts (never commit) -.archon/mcp/ # May contain env var references +# Archon runtime artifacts — NEVER commit +.archon/state/ # Cross-run workflow state, runtime-only +.archon/.env # Repo-scoped Archon env (secrets) + +# Optional — gitignore if your MCP configs hardcode secrets +.archon/mcp/ +``` + +`.archon/commands/`, `.archon/workflows/`, and `.archon/scripts/` **should be committed** — they are part of your project's workflow definitions. `.archon/config.yaml` should be committed unless it contains secrets (use `.archon/.env` for those instead). + +## Three-Path Env Model + +Archon loads env from three distinct paths at boot, with different trust levels and precedence: + +| Path | Scope | Trust | Loaded? | +|------|-------|-------|---------| +| `~/.archon/.env` | User (home) | Trusted — user owns it | Yes, with `override: true` | +| `/.archon/.env` | Repo (per-project, Archon-owned) | Trusted — user owns it | Yes, with `override: true` (overrides home) | +| `/.env` | Target repo | **Untrusted** — belongs to the project being worked on | **Stripped from `process.env`** before subprocess spawn to prevent secret leakage (see [archon.diy/reference/security/](https://archon.diy/reference/security/#target-repo-env-isolation) for the full trust model) | + +Boot behavior emits observable log lines: + +``` +[archon] loaded N keys from ~/.archon/.env +[archon] loaded M keys from /path/to/repo/.archon/.env +[archon] stripped K keys from /path/to/repo (ANTHROPIC_API_KEY, OPENAI_API_KEY, ...) ``` -The `.archon/commands/` and `.archon/workflows/` directories should be committed — they are part of your project's workflow definitions. +**Where should you put what?** + +- **API keys for Archon itself** (`ANTHROPIC_API_KEY`, `CLAUDE_CODE_OAUTH_TOKEN`, `DATABASE_URL`, `SLACK_BOT_TOKEN`, etc.) → `~/.archon/.env` (shared across all repos) or `/.archon/.env` (per-repo override). +- **Target-project env that a workflow needs** (`GH_TOKEN`, `DOTENV_PRIVATE_KEY`, etc.) → see [Per-Project Env Injection](#per-project-env-injection) below. +- **Target-project env that Archon should NOT touch** → leave it in `/.env` where the project already expects it. Archon strips it from subprocess env but doesn't delete the file. + +The `archon setup --scope home|project [--force]` wizard writes to the right file for you and produces a timestamped backup on every rewrite. + +## Per-Project Env Injection + +For env vars a workflow's `bash:` and `script:` subprocesses need (`GH_TOKEN` for `gh` calls, `DATABASE_URL` for a migration script, etc.), use one of the two **managed injection** surfaces — both inject into subprocess env at workflow execution time, after the target-repo `.env` strip: + +**Option 1: `.archon/config.yaml` `env:` block** (checked into git; values can be `$REF_NAME` expansions from Archon env): + +```yaml +env: + GH_TOKEN: $GH_TOKEN # expanded from ~/.archon/.env at runtime + BUILD_TARGET: production # literal value +``` + +**Option 2: Web UI Settings → Projects → Env Vars** — per-codebase, stored in the Archon DB, values never returned over the API (only keys are listed). Use this for values that should NOT appear in git. + +Both surfaces inject into: Claude/Codex/Pi subprocess env, `bash:` node subprocess env, `script:` node subprocess env, and direct chat messages that run against the codebase. The worktree isolation layer propagates them as well. + +> **About keys in the target repo's `/.env`**: Archon unconditionally strips the keys auto-loaded from `/.env` out of `process.env` at boot (see the Three-Path Env Model above) and the Bun subprocess is invoked with `--no-env-file`, so those values do NOT reach AI / bash / script subprocesses. If a workflow needs a value that currently lives in the target repo's `.env`, surface it through one of the two managed injection options above — don't expect the target `.env` to leak through. ## Global Configuration diff --git a/.claude/skills/archon/references/troubleshooting.md b/.claude/skills/archon/references/troubleshooting.md new file mode 100644 index 0000000000..099cccd928 --- /dev/null +++ b/.claude/skills/archon/references/troubleshooting.md @@ -0,0 +1,162 @@ +# Troubleshooting Workflows + +Where to look when a workflow fails, hangs, or does the wrong thing. + +## Log Locations + +Workflow run logs are written as JSONL per run: + +``` +~/.archon/workspaces///logs/.jsonl +``` + +Each line is a structured event. The discriminator is the `type` field. Values (see `packages/workflows/src/logger.ts` for the canonical list): + +| `type` | Meaning | +|--------|---------| +| `workflow_start` / `workflow_complete` / `workflow_error` | Run lifecycle | +| `node_start` / `node_complete` / `node_error` / `node_skipped` | Node lifecycle | +| `assistant` | AI assistant message — has `content` field with the full AI output | +| `tool` | SDK tool invocation — has `tool_name`, `tool_input`, `duration_ms`, and optionally `tokens` | +| `validation` | Workflow-level validation event — has `check` and `result` (`pass` / `fail` / `warn` / `unknown`) | + +> **Loop iterations and per-attempt retry events are NOT in the JSONL file.** They go through the workflow event emitter (WebSocket / `workflow_events` DB table) under `loop_iteration_started` / `loop_iteration_completed` etc. To see them, query the DB or the Web UI dashboard — not the JSONL log. + +Find the run ID from `archon workflow status` (most recent run). Then: + +```bash +# Last assistant message (what the AI said before failure) +jq 'select(.type == "assistant") | .content' | tail -1 + +# All error events (node failures + workflow-level failures) +jq 'select(.type == "node_error" or .type == "workflow_error")' + +# Full event stream +cat | jq . +``` + +Adapter logs (Slack / Telegram / Web / GitHub) are emitted to stderr when `LOG_LEVEL=debug` is set on the server. + +## Artifact Locations + +``` +~/.archon/workspaces///artifacts/runs// +``` + +Inspect artifacts when a multi-node workflow produces wrong output. The failing node's upstream artifact is usually where the problem originated. + +```bash +ls ~/.archon/workspaces///artifacts/runs// +cat ~/.archon/workspaces///artifacts/runs//issues/issue-42.md +``` + +Artifacts are **external** to the repo on purpose — they don't pollute git. + +## Common Failure Modes + +### "No base branch could be resolved" + +A node references `$BASE_BRANCH` in its prompt, but neither git auto-detection nor `worktree.baseBranch` in `.archon/config.yaml` produced a branch. + +**Fix:** +1. Set `worktree.baseBranch: main` (or `dev`, or whatever) in `.archon/config.yaml`. +2. Or pass `--from ` on `archon workflow run`. +3. Or remove the `$BASE_BRANCH` reference if the node doesn't actually need it. + +### "Claude Code not found" / "Codex CLI binary not found" + +Compiled-binary builds of Archon no longer embed Claude Code / Codex — you install them separately and Archon resolves the binary via env var or config. + +**Fix (Claude):** +- Install: `curl -fsSL https://claude.ai/install.sh | bash` (or `npm install -g @anthropic-ai/claude-code`) +- Set `CLAUDE_BIN_PATH=/path/to/claude` in `~/.archon/.env`, OR +- Set `assistants.claude.claudeBinaryPath: /absolute/path` in `.archon/config.yaml` +- Autodetect covers `$HOME/.local/bin/claude` (native installer) — no config needed if you used that path + +**Fix (Codex):** +- Install: `npm install -g @openai/codex` (or platform-specific instructions) +- Set `CODEX_CLI_PATH=/path/to/codex` or `assistants.codex.codexBinaryPath` in config +- Autodetect covers the standard npm / Homebrew locations per platform + +See [archon.diy/getting-started/installation/](https://archon.diy/getting-started/installation/) for full platform-specific install paths. + +### Workflow shows `running` for a long time but nothing happens + +Three possibilities: + +1. **The AI is actually working.** Check `~/.archon/workspaces///logs/.jsonl` — if you see recent `tool` or `assistant` events in the tail, it's fine. Wait. +2. **The server crashed and left an orphan row.** Server startup no longer auto-fails orphaned `running` rows (per the "No Autonomous Lifecycle Mutation" rule — `CLAUDE.md`). Transition it manually: + - Web UI: Dashboard → Abandon or Cancel button on the run card + - CLI: `archon workflow abandon ` — marks the DB row cancelled without killing any subprocess. Right tool for orphans since the subprocess is already gone + - Chat (Slack / Telegram / Web): `/workflow cancel ` — actively terminates the subprocess. Use for a still-live run that needs to be interrupted (there is no `archon workflow cancel` CLI subcommand) +3. **A node is past its `idle_timeout`.** The default is 5 minutes. Override with per-node `idle_timeout: 600000` (10 min) for long-running nodes. + +### Workflow fails mid-way; how do I resume? + +Auto-resume is default — just re-invoke the same workflow at the same cwd: + +```bash +archon workflow run my-workflow "original message" +# → "Resuming workflow — skipping N already-completed node(s)" +``` + +Use `--resume` only when you want to force-reuse the same worktree from a specific failed run. Use `archon workflow resume ` to force a specific run ID. + +**Caveat:** AI session context from prior nodes is NOT restored on resume. If a `context: shared` node depended on in-session memory, re-running it will have fresh context. Artifact-based handoff survives; in-context memory does not. + +### Approval gate not appearing on web UI + +You set `interactive: true` on the approval node but the workflow still runs in the background and no chat message appears. + +**Fix:** Set `interactive: true` at the **workflow level** too. Node-level `interactive` is ignored on web without workflow-level `interactive`. See `references/workflow-dag.md` §Approval Nodes and §Interactive Loops. + +### `MCP server connection failed: ` noise in chat + +User-level Claude plugin MCPs (e.g. `telegram`, `notion`) inherited from `~/.claude/` fail to connect in the headless subprocess. This is normal — they're not configured for Archon's worktree context. Archon filters these to debug logs (`dag.mcp_plugin_connection_suppressed`) and surfaces only workflow-configured MCP failures. + +If you see a failure for an MCP you DID configure via `mcp:` in the workflow: check the config JSON path, the MCP server's `command`/`args`, and any referenced env vars. + +### Node output is empty / `$nodeId.output.field` resolves to empty string + +Common causes: + +1. Upstream node is an AI node without `output_format` — the output is free-form text, JSON parsing fails, field access returns empty. +2. Upstream node was **skipped** (its `when:` evaluated false). Downstream `when:` with `==` comparisons against a specific value will fail-closed. +3. Bash/script node printed to stderr, not stdout. Only stdout is captured. +4. For script nodes, non-zero exit on a non-existent file / missing import silently drops the output. Check the run log for `node_error` entries. + +## Useful Diagnostic Commands + +```bash +# All active runs as JSON (running / paused / recently finished, depending on retention) +archon workflow status --json | jq '.runs[]' + +# Human-readable status of any active runs +archon workflow status + +# Active worktrees and their last activity +archon isolation list + +# Validate a specific workflow before running +archon validate workflows my-workflow + +# Validate a specific command +archon validate commands my-command + +# Dump the last 50 lines of a workflow's log +tail -n 50 ~/.archon/workspaces///logs/.jsonl | jq . + +# Increase log verbosity (workflow run) +archon workflow run my-workflow --verbose "..." + +# Increase server log verbosity +LOG_LEVEL=debug bun run start +``` + +## Escalation: when nothing makes sense + +1. Run `archon version` and note the version. +2. Run `archon validate workflows ` and capture the output. +3. Grab the last ~50 lines of the run's JSONL log. +4. Check the `CHANGELOG.md` for known issues / recent changes to the subsystem you're hitting. +5. File an issue at https://github.com/coleam00/Archon/issues with version, validate output, log tail, and the YAML. diff --git a/.claude/skills/archon/references/variables.md b/.claude/skills/archon/references/variables.md index 8f3d2dc57f..a02b546b3a 100644 --- a/.claude/skills/archon/references/variables.md +++ b/.claude/skills/archon/references/variables.md @@ -26,6 +26,7 @@ All variables are available in all workflows. The only exception is `$nodeId.out - **Command files** (`.archon/commands/*.md`) — all variables except `$nodeId.output` - **Inline `prompt:` fields** — in DAG prompt nodes and loop node prompts - **`bash:` scripts in DAG nodes** — `$nodeId.output` references are automatically shell-quoted (single-quoted with `'` escaped) +- **`script:` bodies in DAG nodes** — same substitution as bash, but `$nodeId.output` values are **NOT** shell-quoted. For TypeScript/bun scripts, assign directly (`const data = $nodeId.output;`) — JSON is valid JS expression syntax. **Avoid `String.raw\`$nodeId.output\``** — it silently breaks when the output contains a backtick (common in AI-generated markdown and `output_format` payloads). ## Substitution Order diff --git a/.claude/skills/archon/references/workflow-dag.md b/.claude/skills/archon/references/workflow-dag.md index eefb380646..93d2d0b2d0 100644 --- a/.claude/skills/archon/references/workflow-dag.md +++ b/.claude/skills/archon/references/workflow-dag.md @@ -20,9 +20,91 @@ nodes: depends_on: [other-node] # Node IDs that must complete first ``` -## Four Node Types (Mutually Exclusive) +## Workflow-Level Fields -Each node must have exactly ONE of these fields: +Top-level YAML fields on a workflow object. Per-node overrides (same name under a node) win over workflow-level defaults. + +### Core + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string (required) | Workflow identifier (used in `archon workflow run `) | +| `description` | string (required) | Human-readable summary. Used for routing; see [Workflow Description Best Practices](https://archon.diy/guides/authoring-workflows/#workflow-description-best-practices) | +| `provider` | string | AI provider (e.g. `claude`, `codex`, `pi`). Default: from `.archon/config.yaml` | +| `model` | string | Model override. Claude: `sonnet` \| `opus` \| `haiku` \| `claude-*` \| `inherit`. Codex: any non-Claude model ID | +| `interactive` | boolean | **Required for web UI** when the workflow has approval gates or `loop.interactive` nodes. Forces foreground execution so gate messages reach the user's chat. Default: `false` (background on web) | + +### Isolation + +| Field | Type | Description | +|-------|------|-------------| +| `worktree.enabled` | boolean | Pin isolation regardless of caller. `false` = always live checkout (CLI `--branch`/`--from` hard-error). `true` = always worktree (CLI `--no-worktree` hard-errors). Omit = caller decides. Use `false` for read-only workflows (triage, reporting) | + +Other worktree config (`baseBranch`, `copyFiles`, `initSubmodules`, `path`) lives in `.archon/config.yaml`, not the workflow YAML — see `references/repo-init.md`. + +### Claude SDK Advanced Options + +These fields apply to Claude nodes workflow-wide; each can be overridden per-node. Codex nodes ignore them with a warning. + +| Field | Type | Description | +|-------|------|-------------| +| `effort` | `'low'` \| `'medium'` \| `'high'` \| `'max'` | Claude Agent SDK reasoning depth. Different from Codex `modelReasoningEffort` below | +| `thinking` | string \| object | Extended thinking. String shorthand: `'adaptive'` \| `'enabled'` \| `'disabled'`. Object form: `{ type: 'enabled', budgetTokens: 8000 }` | +| `fallbackModel` | string | Model to use if the primary model fails (e.g. `claude-haiku-4-5-20251001`) | +| `betas` | string[] | SDK beta feature flags (non-empty array). Example: `['context-1m-2025-08-07']` for 1M-context Claude | +| `sandbox` | object | OS-level filesystem/network restrictions. Nested `network` / `filesystem` sub-objects — see [archon.diy/guides/authoring-workflows/#claude-sdk-advanced-options](https://archon.diy/guides/authoring-workflows/#claude-sdk-advanced-options) for the full schema. Layers on top of worktree isolation | + +Per-node-only (NOT valid at workflow level): `maxBudgetUsd`, `systemPrompt`. + +### Codex-Specific Options + +| Field | Type | Description | +|-------|------|-------------| +| `modelReasoningEffort` | `'minimal'` \| `'low'` \| `'medium'` \| `'high'` \| `'xhigh'` | Codex reasoning depth. Separate field from Claude's `effort` | +| `webSearchMode` | `'disabled'` \| `'cached'` \| `'live'` | Codex web search behavior. Default: `disabled` | +| `additionalDirectories` | string[] | Absolute paths Codex can read outside the codebase (shared libraries, docs repos) | + +### Complete workflow-level example + +```yaml +name: careful-migration +description: | + Plan a migration, get explicit approval, then implement under strict + sandbox and cost limits. Used by the ops team before destructive work. +provider: claude +model: sonnet +interactive: true # required — this workflow has an approval gate + +worktree: + enabled: true # always isolate; reject --no-worktree + +effort: high +thinking: adaptive +fallbackModel: claude-haiku-4-5-20251001 +betas: ['context-1m-2025-08-07'] +sandbox: + enabled: true + network: + allowedDomains: ['api.github.com'] + allowManagedDomainsOnly: true + filesystem: + denyWrite: ['/etc', '/usr'] + +nodes: + - id: plan + command: plan-migration + - id: review + approval: + message: "Review the migration plan above." + depends_on: [plan] + - id: implement + command: implement-migration + depends_on: [review] +``` + +## Node Types (Mutually Exclusive) + +Each node must have exactly ONE of these fields: `command`, `prompt`, `bash`, `script`, `loop`, `approval`, or `cancel`. ### Command Node Runs a command file from `.archon/commands/`: @@ -54,6 +136,55 @@ Runs a shell script without AI: - **stderr** forwarded as warning, does not fail the node - No AI invoked — AI-specific fields are ignored - Use `timeout:` (milliseconds) for execution time limit +- `$nodeId.output` substitutions are **auto shell-quoted** (safe to embed) + +### Script Node +Runs TypeScript/JavaScript (via `bun`) or Python (via `uv`) without AI. Same stdout/stderr contract as bash nodes. + +**Inline script (TypeScript):** +```yaml +- id: parse + script: | + const raw = process.argv.slice(2).join(' ') || '{}'; + const data = JSON.parse(raw); + console.log(JSON.stringify({ items: data.items?.length ?? 0 })); + runtime: bun # REQUIRED: 'bun' or 'uv' + timeout: 30000 # ms, default: 120000 +``` + +**Inline script (Python) with uv dependencies:** +```yaml +- id: fetch + script: | + import httpx, json + r = httpx.get("https://api.github.com/repos/anthropics/anthropic-cookbook") + print(json.dumps({ "stars": r.json()["stargazers_count"] })) + runtime: uv + deps: ["httpx>=0.27"] # Optional — 'uv run --with '. Ignored for bun. +``` + +**Named script from `.archon/scripts/`:** +```yaml +- id: analyze + script: analyze-metrics # Resolves .archon/scripts/analyze-metrics.py + runtime: uv # Must match file extension (.ts/.js → bun, .py → uv) + deps: ["pandas>=2.0"] +``` + +- **Inline vs named**: a `script` value is treated as inline code if it contains a newline or any shell metacharacter (space, or any of: `;` `(` `)` `{` `}` `&` `|` `<` `>` `$` `` ` `` `"` `'`). Otherwise it's a named-script lookup (bare identifier). +- **Named script resolution**: `/.archon/scripts/` (wins) → `~/.archon/scripts/`. 1-level subfolder grouping allowed. Extension determines runtime (`.ts`/`.js` → `bun`, `.py` → `uv`) and MUST match the declared `runtime:` +- **Dispatch**: + - `bun` + inline → `bun --no-env-file -e ''` + - `bun` + named → `bun --no-env-file run ` + - `uv` + inline → `uv run [--with dep ...] python -c ''` + - `uv` + named → `uv run [--with dep ...] ` +- **`deps`** is uv-only. Bun auto-installs on import; `deps` with `runtime: bun` emits a validator warning +- **stdout** captured as `$nodeId.output` (trailing newline trimmed) +- **stderr** forwarded as warning, does NOT fail the node. Non-zero exit DOES fail it. +- **`bun --no-env-file`** prevents target repo `.env` from leaking into the subprocess +- `$nodeId.output` substitutions are **NOT shell-quoted** in script bodies — assign directly (`const data = $nodeId.output;`) or parse with `JSON.parse` / `json.loads`; don't interpolate into shell syntax +- **CAUTION — `String.raw\`$nodeId.output\`` is fragile**: if the substituted value contains a backtick (common in AI-generated markdown, `output_format` payloads, or any content with code spans), the template literal terminates early and produces a cryptic `Expected ";"` parse error. Use direct assignment instead — JSON is valid JS expression syntax and needs no wrapper. +- AI-specific fields (`model`, `provider`, `hooks`, `mcp`, `skills`, `output_format`, `allowed_tools`, `denied_tools`, `agents`, `effort`, `thinking`, `maxBudgetUsd`, `systemPrompt`, `fallbackModel`, `betas`, `sandbox`) emit a loader warning and are ignored ### Loop Node Iterates an AI prompt until a completion signal or max iterations: @@ -83,7 +214,7 @@ All node types share these fields: | `depends_on` | string[] | `[]` | Node IDs that must settle before this node runs | | `when` | string | — | Condition expression. Node **skipped** when false | | `trigger_rule` | string | `all_success` | Join semantics for multiple dependencies | -| `idle_timeout` | number (ms) | 300000 | Per-node idle timeout. On loop nodes, applies per-iteration | +| `idle_timeout` | number (ms) | 300000 | Idle timeout for AI streaming (`command`, `prompt`) and per-iteration idle for `loop`. Accepted but ignored on `bash` and `script` — use `timeout` there | **Command, prompt, and bash nodes** (silently ignored on loop nodes, except `retry` which is a hard error): @@ -129,14 +260,53 @@ nodes: ## Conditions (`when:`) +Gate whether a node runs based on upstream output. A condition that evaluates to `false` skips the node (fail-closed — skipped nodes propagate their skipped state to dependants). + +### Operators + +**String comparison** (literal string equality): ```yaml -- id: investigate - command: investigate-bug - depends_on: [classify] - when: "$classify.output.issue_type == 'bug'" +when: "$nodeId.output == 'VALUE'" +when: "$nodeId.output != 'VALUE'" +when: "$nodeId.output.field == 'VALUE'" # JSON dot notation (requires output_format) ``` -**Syntax**: `$nodeId.output OPERATOR 'value'` — operators: `==`, `!=` only. Values single-quoted. Invalid expressions skip the node (fail-closed). +**Numeric comparison** (both sides auto-parsed as numbers; fail-closed if either side is not finite): +```yaml +when: "$score.output > '80'" +when: "$score.output >= '0.9'" +when: "$score.output < '100'" +when: "$score.output <= '5'" +when: "$score.output.confidence >= '0.9'" +``` + +All six operators — `==`, `!=`, `<`, `>`, `<=`, `>=` — are supported. Values are single-quoted strings (even for numeric comparisons). + +### Compound Expressions + +Combine conditions with `&&` (AND) and `||` (OR). **`&&` binds tighter than `||`.** No parentheses supported — structure expressions with that precedence in mind. + +```yaml +when: "$a.output == 'X' && $b.output != 'Y'" +when: "$a.output == 'X' || $b.output == 'Y'" +when: "$score.output > '80' && $flag.output == 'true'" + +# Precedence: (A && B) || C +when: "$a.output == 'X' && $b.output == 'Y' || $c.output == 'Z'" +``` + +Short-circuit evaluation: `&&` stops at the first false, `||` stops at the first true. + +### Dot Notation (JSON Field Access) + +`$nodeId.output.field` parses the upstream output as JSON and extracts the named field. Returns empty string if parsing fails or the field is absent — which then fails-closed against any literal value. Requires the upstream node to have `output_format` set (for AI nodes) or to print valid JSON (for bash/script nodes). + +### Fail-Closed Rules + +- Invalid or unparseable expression → node skipped, warning logged +- Numeric operator with a non-numeric side → node skipped +- `$nodeId.output.field` on non-JSON output → field is empty → comparison fails +- Referenced node did not run (skipped upstream) → substitution is empty → comparison fails ## Node Output Substitution @@ -211,15 +381,53 @@ Loop nodes iterate an AI prompt until a completion condition is met. Use them fo max_iterations: 10 # Required. Integer >= 1. Fails if exceeded fresh_context: true # Optional. Default: false until_bash: "..." # Optional. Exit 0 = complete + interactive: true # Optional. Pauses between iterations for user input + gate_message: "..." # Required when interactive: true ``` | Field | Type | Required | Description | |-------|------|----------|-------------| -| `prompt` | string | Yes | Prompt template. Supports all variable substitution (`$ARGUMENTS`, `$nodeId.output`, etc.) | +| `prompt` | string | Yes | Prompt template. Supports all variable substitution (`$ARGUMENTS`, `$nodeId.output`, `$LOOP_USER_INPUT`, etc.) | | `until` | string | Yes | Completion signal to detect in AI output | | `max_iterations` | number | Yes | Hard limit. Node **fails** if exceeded | | `fresh_context` | boolean | No | Default `false`. `true` = fresh AI session each iteration | -| `until_bash` | string | No | Shell script run after each iteration. Exit 0 = complete | +| `until_bash` | string | No | Shell script run after each iteration. Exit 0 = complete. Variable substitution applies; `$nodeId.output` IS shell-quoted here | +| `interactive` | boolean | No | Default `false`. `true` = pause after each non-completing iteration for user feedback via `/workflow approve ` | +| `gate_message` | string | **Required when `interactive: true`** | Message shown to the user at each pause. Validated at parse time — a loop with `interactive: true` and no `gate_message` fails to load | + +### Interactive Loops + +Interactive loops pause between iterations so a human can provide feedback that feeds the next iteration. Use them for guided writing/refinement (e.g. PRD co-authoring, iterative design). + +```yaml +name: guided-refine +description: Refine an output with human feedback between iterations +interactive: true # REQUIRED at the workflow level for web UI + +nodes: + - id: refine + loop: + prompt: | + Review the current draft and improve it based on this feedback: + $LOOP_USER_INPUT + + When the output is satisfactory, output: DONE + until: DONE + max_iterations: 5 + interactive: true # node level — enables the pause + gate_message: | + Review the output above. Reply with feedback, or type DONE to finish. +``` + +The flow: +1. Iteration N runs. AI produces output. +2. If AI signalled completion (`DONE`) or `until_bash` exited 0, loop ends. +3. Otherwise: `gate_message` is sent to the user, workflow pauses (status = `paused`). +4. User runs `archon workflow approve ""` (or replies naturally in chat platforms). +5. Iteration N+1 runs with `$LOOP_USER_INPUT` substituted to the user's feedback — but **only on that first resumed iteration**. Subsequent iterations in the same resumed session see `$LOOP_USER_INPUT` as empty string. +6. Repeat. + +**Workflow-level `interactive: true` is required** for the gate message to reach the user on the web UI (otherwise the workflow dispatches to a background worker that can't deliver chat messages). The loader emits a warning if a node has `interactive: true` without workflow-level `interactive: true`. ### Completion Detection @@ -279,6 +487,148 @@ First iteration is always fresh regardless. --- +## Approval Nodes + +Approval nodes **pause the workflow** until a human approves or rejects the gate. Use them to insert review steps between AI-driven nodes — for example, reviewing a generated plan before committing to expensive implementation work. + +### Configuration + +```yaml +- id: review-gate + approval: + message: "Review the plan above before proceeding with implementation." + capture_response: false # Optional. true = user's comment stored as $review-gate.output + on_reject: # Optional. AI rework on rejection instead of cancel + prompt: "Revise based on feedback: $REJECTION_REASON" + max_attempts: 3 # Range 1–10, default 3. After max, workflow is cancelled. + depends_on: [plan] +``` + +### Fields + +| Field | Required | Description | +|-------|----------|-------------| +| `approval.message` | **Yes** | The message shown to the user when the workflow pauses | +| `approval.capture_response` | No | `true` = user's approval comment stored as `$.output` for downstream nodes. Default: `false` (downstream `$.output` is empty string) | +| `approval.on_reject.prompt` | No | Prompt run via AI when the user rejects. `$REJECTION_REASON` is substituted with the reject reason. After running, the workflow re-pauses at the same gate | +| `approval.on_reject.max_attempts` | No | Max times the on_reject prompt runs before the workflow is cancelled. Range: 1–10. Default: 3 | + +### Web UI Requirement + +Approval gates delivered on the Web UI require `interactive: true` at the **workflow level** — otherwise the workflow dispatches to a background worker and the gate message never reaches the user's chat window. + +```yaml +name: plan-approve-implement +interactive: true # REQUIRED for approval gates on web UI +nodes: + - id: plan + command: plan-feature + - id: review-gate + approval: + message: "Approve the plan to proceed." + depends_on: [plan] + - id: implement + command: implement + depends_on: [review-gate] +``` + +### Approve and Reject Commands + +```bash +# From the CLI +archon workflow approve +archon workflow approve --comment "looks good" +archon workflow reject +archon workflow reject --reason "plan needs more test coverage" + +# Cross-platform (Slack / Telegram / Web / GitHub chat) +/workflow approve +/workflow reject + +# Natural language (all platforms except CLI — auto-detects paused workflow) +User: "Looks good, proceed" +# → auto-approves. With capture_response: true, the message becomes $review-gate.output +``` + +### What Does NOT Work on Approval Nodes + +AI-specific fields (`model`, `provider`, `hooks`, `mcp`, `skills`, `output_format`, `allowed_tools`, `denied_tools`, `context`, `effort`, `thinking`, etc.) are accepted by the parser but emit a loader warning and are ignored — no AI runs during the pause. (Note: `on_reject.prompt` DOES run AI, using the workflow's default provider/model.) + +`retry`, `when`, `trigger_rule`, `depends_on`, `idle_timeout` all work. + +--- + +## Cancel Nodes + +Cancel nodes **terminate the workflow run** with a reason string. Useful for guarded exits — a `cancel:` node with a `when:` condition stops the workflow cleanly when preconditions aren't met. + +### Configuration + +```yaml +- id: gate-branch + cancel: "Refusing to run on main — this workflow modifies files." + when: "$check-branch.output == 'main'" + depends_on: [check-branch] +``` + +When a cancel node runs, Archon: +- Marks the workflow run as `cancelled` (not `failed`) +- Stops in-flight parallel nodes via the existing cancellation plumbing +- Records the reason string in the run's metadata +- Emits a `node_completed` event for the cancel node itself + +### Fields + +| Field | Required | Description | +|-------|----------|-------------| +| `cancel` | **Yes** | Non-empty reason string shown to the user and recorded in metadata | + +Standard DAG fields (`id`, `depends_on`, `when`, `trigger_rule`, `idle_timeout`) all work. AI-specific fields emit a loader warning and are ignored — cancel nodes don't invoke AI. + +### When to use `cancel` vs failing a `bash:` check + +- **Use `cancel:`** when the precondition failure is **expected** (e.g., wrong branch, required file missing, feature flag disabled). The run shows as `cancelled`, which doesn't trigger the DAG auto-resume path. +- **Use a `bash:` node that exits non-zero** when the check itself fails (e.g., network error, tool missing). The run shows as `failed`, which auto-resumes on the next invocation. + +### Typical Patterns + +**Gate on upstream classification:** +```yaml +- id: classify + prompt: "Is the input safe to proceed? Output 'SAFE' or 'UNSAFE'." + allowed_tools: [] + +- id: stop-if-unsafe + cancel: "Refusing to proceed: input flagged UNSAFE by classifier." + depends_on: [classify] + when: "$classify.output != 'SAFE'" + +- id: do-work + command: the-work + depends_on: [classify] + when: "$classify.output == 'SAFE'" +``` + +**Stop before expensive step unless precondition met:** +```yaml +- id: check-budget + bash: | + spent=$(gh api /meta --jq '.rate.used // 0') + echo "$spent" + +- id: abort-if-over + cancel: "Aborting — GH API quota exhausted." + depends_on: [check-budget] + when: "$check-budget.output > '4500'" + +- id: run-api-heavy-work + command: heavy-work + depends_on: [check-budget] + when: "$check-budget.output <= '4500'" +``` + +--- + ## Validate Before Finishing Before declaring a workflow complete, validate it: @@ -302,8 +652,13 @@ Use `--json` for machine-readable output. Use `archon validate commands ` - All `depends_on` reference existing IDs - No cycles - `$nodeId.output` refs in `when:`, `prompt:`, `loop.prompt:` must point to known IDs -- Exactly one of `command`, `prompt`, `bash`, `loop` per node +- Exactly one of `command`, `prompt`, `bash`, `script`, `loop`, `approval`, `cancel` per node +- Script nodes require `runtime: bun` or `runtime: uv` +- Named scripts must exist in `.archon/scripts/` or `~/.archon/scripts/` with extension matching declared runtime - `retry` on loop node = hard error +- `approval.message` required and non-empty +- `cancel` reason required and non-empty +- Approval `on_reject.max_attempts` must be 1–10 if set - `steps:` format rejected (deprecated — use `nodes:` only) ## Complete Example diff --git a/.claude/skills/release/SKILL.md b/.claude/skills/release/SKILL.md index 4f90f70978..1844336f2f 100644 --- a/.claude/skills/release/SKILL.md +++ b/.claude/skills/release/SKILL.md @@ -64,9 +64,15 @@ if [ -f scripts/build-binaries.sh ] && [ -f packages/cli/src/cli.ts ]; then packages/cli/src/cli.ts # Smoke test: the binary must start and exit 0 on a safe, non-interactive command. - # `version` or `--help` are both acceptable — pick one that does NOT touch the - # network, database, or require env vars. - if ! "$TMP_BINARY" version > /tmp/archon-preflight.log 2>&1; then + # Use `--help` (NOT `version`). The `version` command's compiled-binary code + # path depends on BUNDLED_IS_BINARY=true, which is set by scripts/build-binaries.sh + # — but we're doing a bare `bun build --compile` here to keep the smoke fast, + # so BUNDLED_IS_BINARY is still `false`. That sends `version` down the dev + # branch of version.ts which tries to read package.json from a path that only + # exists in node_modules, producing a false-positive ENOENT. `--help` has no + # such dev/binary branch and exercises the same module-init graph we're + # actually testing. Must NOT touch network, database, or require env vars. + if ! "$TMP_BINARY" --help > /tmp/archon-preflight.log 2>&1; then echo "ERROR: compiled binary crashed at startup" cat /tmp/archon-preflight.log echo "" diff --git a/.claude/skills/test-release/SKILL.md b/.claude/skills/test-release/SKILL.md index 31029014ea..c93d0c5bee 100644 --- a/.claude/skills/test-release/SKILL.md +++ b/.claude/skills/test-release/SKILL.md @@ -79,6 +79,8 @@ About to test: Path: brew (Homebrew tap on macOS) Version: 0.3.1 (expected) Cleanup: will uninstall after tests (brew uninstall + untap) + If `archon-stable` symlink is detected in Phase 2, it will be + restored at the end of Phase 5 by reinstalling the tap formula. Proceed? (y/N) ``` @@ -112,6 +114,18 @@ gh release view v --repo coleam00/Archon --json tagName,assets --jq '{t If the release does not exist or has no assets, abort with a clear message. Do not proceed to install a non-existent release. +4. **Detect persistent `archon-stable` install (brew path only).** If the user has renamed a prior brew install to `archon-stable` (the dual-homebrew pattern — see `~/.config/fish/functions/brew-upgrade-archon.fish`), Phase 5's `brew uninstall` will wipe it. Capture the state so Phase 5b can restore it: + +```bash +ARCHON_STABLE_WAS_INSTALLED="" +if [ -L /opt/homebrew/bin/archon-stable ] || [ -L /usr/local/bin/archon-stable ]; then + ARCHON_STABLE_WAS_INSTALLED="yes" + echo "Detected persistent archon-stable — will restore after Phase 5 uninstall." +fi +``` + +Export `ARCHON_STABLE_WAS_INSTALLED` into the environment used by Phase 5b. Only applies to the `brew` path — `curl-mac` and `curl-vps` don't go through brew and don't disturb `archon-stable`. + ## Phase 3 — Install ### Path: brew @@ -352,6 +366,25 @@ archon version | head -1 # should match the dev version captured in Phase 2 ``` +**Restore `archon-stable` if it existed before the test** (dual-homebrew pattern — see Phase 2 item 4): + +```bash +if [ -n "$ARCHON_STABLE_WAS_INSTALLED" ]; then + echo "Restoring archon-stable (detected before test)..." + brew tap coleam00/archon + brew install coleam00/archon/archon + BREW_BIN="$(brew --prefix)/bin" + if [ -e "$BREW_BIN/archon" ]; then + mv "$BREW_BIN/archon" "$BREW_BIN/archon-stable" + echo "archon-stable restored: $(archon-stable version 2>/dev/null | head -1)" + else + echo "WARNING: brew install succeeded but $BREW_BIN/archon missing — check formula" + fi +fi +``` + +> **Note on the restored version**: this reinstalls from whatever the tap currently ships, which is typically the release you just tested (so `archon-stable` ends up at the newly-tested version). That's usually what the operator wants — you just verified the new release works, and you want `archon-stable` pointed at it. If you were testing an older version for back-version QA, the restored `archon-stable` will be the *current* tap formula, not the pre-test version. For that rare case, the operator should re-run `brew-upgrade-archon` manually after the test. + ### Path: curl-mac ```bash diff --git a/.gitignore b/.gitignore index 4b225843ea..133ca539b7 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,12 @@ e2e-screenshots/ # Cross-run workflow state (e.g. issue-triage memory) .archon/state/ +# Maintainer standup — per-maintainer state and briefs (direction.md is committed) +.archon/maintainer-standup/profile.md +.archon/maintainer-standup/state.json +.archon/maintainer-standup/briefs/ +.archon/maintainer-standup/reviewed-prs.json + # Agent artifacts (generated, local only) .agents/ .agents/rca-reports/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 63d98f8264..76259ddb8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- **`$LOOP_PREV_OUTPUT` workflow variable (loop nodes only)** — exposes the previous iteration's cleaned output (after `` tag stripping) to the current iteration's prompt. Empty on the first iteration and on the first iteration after resuming from an interactive approval gate. Enables `fresh_context: true` loops to reference what the prior pass said or did without carrying full session history. (#1367) + +### Changed + +- **Provider/model resolution: trust the SDK, drop allow-lists.** Removed `inferProviderFromModel` and `isModelCompatible` entirely. Provider is now resolved via a flat explicit chain — `node.provider ?? workflow.provider ?? config.assistant` — and never inferred from the model string. Model strings pass through to the SDK unchanged; the SDK validates them at request time. Codex's stream loop now matches Claude's contract (every terminal close emits exactly one `result` chunk; `error` events without a recovering `turn.completed` synthesize `result.isError` with subtype `codex_stream_incomplete`; `turn.failed` becomes `codex_turn_failed`). AI nodes that exit the streaming loop with empty assistant text and no structured output now fail loudly with `dag.node_empty_output` instead of completing as silent zero-output successes. Provider-id typos (workflow-level and per-node) are caught at YAML load time. **Migration**: workflows that previously relied on cross-provider model inference (e.g. `model: gpt-5.2-codex` with no `provider:`, expecting Archon to pick `codex` because Claude's allow-list rejected the string) must now set `provider:` explicitly. Workflows that already set both `provider:` and `model:` — and workflows that set only `model:` matching `config.assistant` — keep working unchanged. (#1463) + +### Fixed + +- **Claude provider crashed in dev mode with `error: unknown option '--no-env-file'`.** The Claude Agent SDK switched from shipping `cli.js` to per-platform native binaries (via optional deps) in the 0.2.x series. Archon's `shouldPassNoEnvFile` predicate kept emitting the Bun-only `--no-env-file` flag in dev mode (when the SDK resolves its bundled binary), which the native binary rejects. Tightened the predicate to only emit the flag for explicitly-configured Bun-runnable JS entry points (`.js`/`.mjs`/`.cjs`). Target-repo `.env` isolation is unchanged — `stripCwdEnv()` at process boot remains the primary guard, and the native Claude binary does not auto-load `.env` from its cwd. (#1461) + ## [0.3.9] - 2026-04-22 First release with working compiled binaries since v0.3.6. Both v0.3.7 and v0.3.8 were tagged but neither shipped release assets — v0.3.7 was blocked by two genuine binary-runtime bugs (Pi SDK's module-init crash + Bun `--bytecode` producing broken output), and v0.3.8 was blocked by an unrelated CI smoke-test regression where `release.yml`'s Claude resolver test required an `origin` remote that the fresh `git init` test repo didn't have. Both superseded tags remain for history; their GitHub Releases were deleted at the time of tagging so `releases/latest` fell back to v0.3.6 throughout, keeping `install.sh` and Homebrew safe. v0.3.9 is what users actually install. diff --git a/CLAUDE.md b/CLAUDE.md index f2afd41e9c..de588e5987 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -32,6 +32,8 @@ **Git Workflow and Releases** - `main` is the release branch. Never commit directly to `main`. - `dev` is the working branch. All feature work branches off `dev` and merges back into `dev`. +- All PRs must use the template at `.github/PULL_REQUEST_TEMPLATE.md` — fill in every section. When opening a PR via `gh pr create`, copy the template into the body explicitly; GitHub only auto-applies it through the web UI. +- Link the issue with `Closes #` (or `Fixes` / `Resolves`) in the PR description so it auto-closes on merge. - To release, use the `/release` skill. It compares `dev` to `main`, generates changelog entries, bumps the version, and creates a PR to merge `dev` into `main`. - Releases follow Semantic Versioning: `/release` (patch), `/release minor`, `/release major`. - Changelog lives in `CHANGELOG.md` and follows Keep a Changelog format. @@ -499,10 +501,9 @@ assistants: 3. SDK defaults **Model Validation:** -- Workflows are validated at load time for provider/model compatibility -- Claude models: `sonnet`, `opus`, `haiku`, `claude-*`, `inherit` -- Codex models: Any model except Claude-specific aliases -- Invalid combinations fail workflow loading with clear error messages +- Workflows are validated at load time for provider _identity_ only — `provider:` (workflow-level and per-node) must be a registered provider id, otherwise the YAML is rejected with `Unknown provider ''. Registered: claude, codex, pi`. +- Model strings are NOT validated by Archon. Whatever the user writes in `model:` is forwarded verbatim to the resolved SDK. Vendor SDKs ship new models faster than Archon can update; the SDK and the upstream API are the source of truth for what names exist. +- Provider is resolved via an explicit chain: `node.provider ?? workflow.provider ?? config.assistant`. Model never influences provider selection. ### Running the App in Worktrees @@ -689,6 +690,7 @@ async function createSession(conversationId: string, codebaseId: string) { - `$DOCS_DIR` - Documentation directory path; configured via `docs.path` in `.archon/config.yaml`. Defaults to `docs/`. Never throws. - `$LOOP_USER_INPUT` - User feedback provided via `/workflow approve ` at an interactive loop gate. Only populated on the first iteration of a resumed interactive loop; empty string on all other iterations. - `$REJECTION_REASON` - Reviewer feedback provided via `/workflow reject ` at an approval gate. Only populated in `on_reject` prompts; empty string elsewhere. +- `$LOOP_PREV_OUTPUT` - Cleaned output of the previous loop iteration (loop nodes only). Empty string on the first iteration (no prior output exists). Useful for `fresh_context: true` loops that need to reference what the previous pass produced or why it failed without carrying full session history. **Command Types:** diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c0120a16bd..314ab1e5f7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -44,7 +44,8 @@ bun run validate 1. Create a feature branch from `dev` 2. Make your changes 3. Ensure all checks pass -4. Submit a PR with a clear description +4. Submit a PR using the template at [`.github/PULL_REQUEST_TEMPLATE.md`](./.github/PULL_REQUEST_TEMPLATE.md). GitHub fills it in automatically when you open a PR through the web UI. If you use `gh pr create`, copy the template into the body — leaving it empty or partially filled slows review. +5. Link the issue your PR addresses with `Closes #` (or `Fixes #` / `Resolves #`) in the description so it auto-closes on merge. ## Code Style diff --git a/bun.lock b/bun.lock index d06d5ccac0..1944301e01 100644 --- a/bun.lock +++ b/bun.lock @@ -5,7 +5,7 @@ "": { "name": "archon", "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.74", + "@anthropic-ai/claude-agent-sdk": "^0.2.121", }, "devDependencies": { "@eslint/js": "^9.39.1", @@ -23,7 +23,7 @@ }, "packages/adapters": { "name": "@archon/adapters", - "version": "0.3.6", + "version": "0.3.9", "dependencies": { "@archon/core": "workspace:*", "@archon/git": "workspace:*", @@ -41,7 +41,7 @@ }, "packages/cli": { "name": "@archon/cli", - "version": "0.3.6", + "version": "0.3.9", "bin": { "archon": "./src/cli.ts", }, @@ -63,7 +63,7 @@ }, "packages/core": { "name": "@archon/core", - "version": "0.3.6", + "version": "0.3.9", "dependencies": { "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", @@ -83,7 +83,7 @@ }, "packages/docs-web": { "name": "@archon/docs-web", - "version": "0.3.6", + "version": "0.3.9", "dependencies": { "@astrojs/starlight": "^0.38.0", "astro": "^6.1.0", @@ -92,7 +92,7 @@ }, "packages/git": { "name": "@archon/git", - "version": "0.3.6", + "version": "0.3.9", "dependencies": { "@archon/paths": "workspace:*", }, @@ -102,7 +102,7 @@ }, "packages/isolation": { "name": "@archon/isolation", - "version": "0.3.6", + "version": "0.3.9", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", @@ -113,7 +113,7 @@ }, "packages/paths": { "name": "@archon/paths", - "version": "0.3.6", + "version": "0.3.9", "dependencies": { "dotenv": "^17", "pino": "^9", @@ -126,13 +126,13 @@ }, "packages/providers": { "name": "@archon/providers", - "version": "0.3.6", + "version": "0.3.9", "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.89", + "@anthropic-ai/claude-agent-sdk": "^0.2.121", "@archon/paths": "workspace:*", "@mariozechner/pi-ai": "^0.67.5", "@mariozechner/pi-coding-agent": "^0.67.5", - "@openai/codex-sdk": "^0.116.0", + "@openai/codex-sdk": "^0.125.0", "@sinclair/typebox": "^0.34.41", }, "devDependencies": { @@ -144,7 +144,7 @@ }, "packages/server": { "name": "@archon/server", - "version": "0.3.6", + "version": "0.3.9", "dependencies": { "@archon/adapters": "workspace:*", "@archon/core": "workspace:*", @@ -163,7 +163,7 @@ }, "packages/web": { "name": "@archon/web", - "version": "0.3.6", + "version": "0.3.9", "dependencies": { "@dagrejs/dagre": "^2.0.4", "@radix-ui/react-alert-dialog": "^1.1.15", @@ -215,7 +215,7 @@ }, "packages/workflows": { "name": "@archon/workflows", - "version": "0.3.6", + "version": "0.3.9", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", @@ -235,9 +235,25 @@ "packages": { "@antfu/ni": ["@antfu/ni@25.0.0", "", { "dependencies": { "ansis": "^4.0.0", "fzf": "^0.5.2", "package-manager-detector": "^1.3.0", "tinyexec": "^1.0.1" }, "bin": { "na": "bin/na.mjs", "ni": "bin/ni.mjs", "nr": "bin/nr.mjs", "nci": "bin/nci.mjs", "nlx": "bin/nlx.mjs", "nun": "bin/nun.mjs", "nup": "bin/nup.mjs" } }, "sha512-9q/yCljni37pkMr4sPrI3G4jqdIk074+iukc5aFJl7kmDCCsiJrbZ6zKxnES1Gwg+i9RcDZwvktl23puGslmvA=="], - "@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.2.74", "", { "optionalDependencies": { "@img/sharp-darwin-arm64": "^0.34.2", "@img/sharp-darwin-x64": "^0.34.2", "@img/sharp-linux-arm": "^0.34.2", "@img/sharp-linux-arm64": "^0.34.2", "@img/sharp-linux-x64": "^0.34.2", "@img/sharp-linuxmusl-arm64": "^0.34.2", "@img/sharp-linuxmusl-x64": "^0.34.2", "@img/sharp-win32-arm64": "^0.34.2", "@img/sharp-win32-x64": "^0.34.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-S/SFSSbZHPL1HiQxAqCCxU3iHuE5nM+ir0OK1n0bZ+9hlVUH7OOn88AsV9s54E0c1kvH9YF4/foWH8J9kICsBw=="], + "@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.2.121", "", { "dependencies": { "@anthropic-ai/sdk": "^0.81.0", "@modelcontextprotocol/sdk": "^1.29.0" }, "optionalDependencies": { "@anthropic-ai/claude-agent-sdk-darwin-arm64": "0.2.121", "@anthropic-ai/claude-agent-sdk-darwin-x64": "0.2.121", "@anthropic-ai/claude-agent-sdk-linux-arm64": "0.2.121", "@anthropic-ai/claude-agent-sdk-linux-arm64-musl": "0.2.121", "@anthropic-ai/claude-agent-sdk-linux-x64": "0.2.121", "@anthropic-ai/claude-agent-sdk-linux-x64-musl": "0.2.121", "@anthropic-ai/claude-agent-sdk-win32-arm64": "0.2.121", "@anthropic-ai/claude-agent-sdk-win32-x64": "0.2.121" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-hwZNYTkGLKVixd/V/OCJwfH/SdfxZXGV0m6wvy5EBq6qfB+lvJTRz/MSOSa7dHqo4/F7zJY68crEEca68Wrxpw=="], - "@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.74.0", "", { "dependencies": { "json-schema-to-ts": "^3.1.1" }, "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" }, "optionalPeers": ["zod"], "bin": { "anthropic-ai-sdk": "bin/cli" } }, "sha512-srbJV7JKsc5cQ6eVuFzjZO7UR3xEPJqPamHFIe29bs38Ij2IripoAhC0S5NslNbaFUYqBKypmmpzMTpqfHEUDw=="], + "@anthropic-ai/claude-agent-sdk-darwin-arm64": ["@anthropic-ai/claude-agent-sdk-darwin-arm64@0.2.121", "", { "os": "darwin", "cpu": "arm64" }, "sha512-zVHcXvx6Hl/glDcOCH+EyNx4KPE9cMGLk42eEBSZe014tAN5W8bwM/By08iM6dxijnpH0NQRNNEAW+BryWzuDg=="], + + "@anthropic-ai/claude-agent-sdk-darwin-x64": ["@anthropic-ai/claude-agent-sdk-darwin-x64@0.2.121", "", { "os": "darwin", "cpu": "x64" }, "sha512-lIXdqKj+bpfDxCk/eU1F1TXNqsIsLTRrkUG/wx19WIGZ8gLUmmVSveUKGlNegTs7S6evMvuezprJzDJT4TcvPA=="], + + "@anthropic-ai/claude-agent-sdk-linux-arm64": ["@anthropic-ai/claude-agent-sdk-linux-arm64@0.2.121", "", { "os": "linux", "cpu": "arm64" }, "sha512-AQSnJzaiFvQpUPfO1tWLvsHgb6KNar4QYEQ/5/sk1itfgr3Fx9gxTreq43wX7AXSvkBX1QlDaP1aR1sfM/g/lQ=="], + + "@anthropic-ai/claude-agent-sdk-linux-arm64-musl": ["@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.2.121", "", { "os": "linux", "cpu": "arm64" }, "sha512-4XaGK+dRBYy7krln7BrDG0WsdE6ejUSgHjWHlUGXoubFfZUvls4GSahLcYjJBArLi4dLnxKw8zEuiQguPAIbrw=="], + + "@anthropic-ai/claude-agent-sdk-linux-x64": ["@anthropic-ai/claude-agent-sdk-linux-x64@0.2.121", "", { "os": "linux", "cpu": "x64" }, "sha512-DJUgpm7au086WaQV/S7BGOt2M8D90spGZRizT3twYsacf1BxzK1qsXqB/Pw1lUjPy6pI107pml/TaPzWuS/Vzg=="], + + "@anthropic-ai/claude-agent-sdk-linux-x64-musl": ["@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.2.121", "", { "os": "linux", "cpu": "x64" }, "sha512-sQoGIgzLlBRrwizxsCV/lbaEuxXom/cfOwlDtQ2HnS1IzDDSjSf5d5pugpWItkOyXBWcHzMUu731WTTutvd/BQ=="], + + "@anthropic-ai/claude-agent-sdk-win32-arm64": ["@anthropic-ai/claude-agent-sdk-win32-arm64@0.2.121", "", { "os": "win32", "cpu": "arm64" }, "sha512-6n/NHkHxs0/lCJX3XPADjo1EFzXBf0IwYz/nyzJGBCDJjGKmgTe0i8eYBr/hviwt1/OPeK7dmVzVSVl6EL9Azg=="], + + "@anthropic-ai/claude-agent-sdk-win32-x64": ["@anthropic-ai/claude-agent-sdk-win32-x64@0.2.121", "", { "os": "win32", "cpu": "x64" }, "sha512-v2/R918/t94cCwc6rmbxk+UYeQPtF2oBLtQAk+cT0M60hvqmCZO2noyZx5uTp8TQncOlG4MkINIeNY2yfmWSoQ=="], + + "@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.81.0", "", { "dependencies": { "json-schema-to-ts": "^3.1.1" }, "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" }, "optionalPeers": ["zod"], "bin": { "anthropic-ai-sdk": "bin/cli" } }, "sha512-D4K5PvEV6wPiRtVlVsJHIUhHAmOZ6IT/I9rKlTf84gR7GyyAurPJK7z9BOf/AZqC5d1DhYQGJNKRmV+q8dGhgw=="], "@archon/adapters": ["@archon/adapters@workspace:packages/adapters"], @@ -545,9 +561,9 @@ "@img/colour": ["@img/colour@1.1.0", "", {}, "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ=="], - "@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.2.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w=="], + "@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.0.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ=="], - "@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.2.4" }, "os": "darwin", "cpu": "x64" }, "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw=="], + "@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.0.4" }, "os": "darwin", "cpu": "x64" }, "sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q=="], "@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.0.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg=="], @@ -569,9 +585,9 @@ "@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.0.4", "", { "os": "linux", "cpu": "x64" }, "sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw=="], - "@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.2.4" }, "os": "linux", "cpu": "arm" }, "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw=="], + "@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.0.5" }, "os": "linux", "cpu": "arm" }, "sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ=="], - "@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg=="], + "@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.0.4" }, "os": "linux", "cpu": "arm64" }, "sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA=="], "@img/sharp-linux-ppc64": ["@img/sharp-linux-ppc64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-ppc64": "1.2.4" }, "os": "linux", "cpu": "ppc64" }, "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA=="], @@ -579,11 +595,11 @@ "@img/sharp-linux-s390x": ["@img/sharp-linux-s390x@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-s390x": "1.0.4" }, "os": "linux", "cpu": "s390x" }, "sha512-y/5PCd+mP4CA/sPDKl2961b+C9d+vPAveS33s6Z3zfASk2j5upL6fXVPZi7ztePZ5CuH+1kW8JtvxgbuXHRa4Q=="], - "@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ=="], + "@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.0.4" }, "os": "linux", "cpu": "x64" }, "sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA=="], - "@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg=="], + "@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.0.4" }, "os": "linux", "cpu": "arm64" }, "sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g=="], - "@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q=="], + "@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.0.4" }, "os": "linux", "cpu": "x64" }, "sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw=="], "@img/sharp-wasm32": ["@img/sharp-wasm32@0.33.5", "", { "dependencies": { "@emnapi/runtime": "^1.2.0" }, "cpu": "none" }, "sha512-ykUW4LVGaMcU9lu9thv85CbRMAwfeadCJHRsg2GmeRa/cJxsVY9Rbd57JcMxBkKHag5U/x7TSBpScF4U8ElVzg=="], @@ -591,7 +607,7 @@ "@img/sharp-win32-ia32": ["@img/sharp-win32-ia32@0.33.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-T36PblLaTwuVJ/zw/LaH0PdZkRz5rd3SmMHX8GSmR7vtNSP5Z6bQkExdSK7xGWyxLw4sUknBuugTelgw2faBbQ=="], - "@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.34.5", "", { "os": "win32", "cpu": "x64" }, "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw=="], + "@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.33.5", "", { "os": "win32", "cpu": "x64" }, "sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg=="], "@inquirer/ansi": ["@inquirer/ansi@1.0.2", "", {}, "sha512-S8qNSZiYzFd0wAcyG5AXCvUHC5Sr7xpZ9wZ2py9XR88jUz8wooStVx5M6dRzczbBWjic9NP7+rY0Xi7qqK/aMQ=="], @@ -649,7 +665,7 @@ "@mistralai/mistralai": ["@mistralai/mistralai@1.14.1", "", { "dependencies": { "ws": "^8.18.0", "zod": "^3.25.0 || ^4.0.0", "zod-to-json-schema": "^3.24.1" } }, "sha512-IiLmmZFCCTReQgPAT33r7KQ1nYo5JPdvGkrkZqA8qQ2qB1GHgs5LoP5K2ICyrjnpw2n8oSxMM/VP+liiKcGNlQ=="], - "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.27.1", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-sr6GbP+4edBwFndLbM60gf07z0FQ79gaExpnsjMGePXqFcSSb7t6iscpjk9DhFhwd+mTEQrzNafGP8/iGGFYaA=="], + "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.29.0", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ=="], "@mswjs/interceptors": ["@mswjs/interceptors@0.41.3", "", { "dependencies": { "@open-draft/deferred-promise": "^2.2.0", "@open-draft/logger": "^0.3.0", "@open-draft/until": "^2.0.0", "is-node-process": "^1.2.0", "outvariant": "^1.4.3", "strict-event-emitter": "^0.5.1" } }, "sha512-cXu86tF4VQVfwz8W1SPbhoRyHJkti6mjH/XJIxp40jhO4j2k1m4KYrEykxqWPkFF3vrK4rgQppBh//AwyGSXPA=="], @@ -695,21 +711,21 @@ "@open-draft/until": ["@open-draft/until@2.1.0", "", {}, "sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg=="], - "@openai/codex": ["@openai/codex@0.116.0", "", { "optionalDependencies": { "@openai/codex-darwin-arm64": "npm:@openai/codex@0.116.0-darwin-arm64", "@openai/codex-darwin-x64": "npm:@openai/codex@0.116.0-darwin-x64", "@openai/codex-linux-arm64": "npm:@openai/codex@0.116.0-linux-arm64", "@openai/codex-linux-x64": "npm:@openai/codex@0.116.0-linux-x64", "@openai/codex-win32-arm64": "npm:@openai/codex@0.116.0-win32-arm64", "@openai/codex-win32-x64": "npm:@openai/codex@0.116.0-win32-x64" }, "bin": { "codex": "bin/codex.js" } }, "sha512-K6q9P2ZmpnzGmpS6Ybjvsdtvu8AbJx3f/Z4KmjH1u85StSS9TWMSQB8z0PPObKMejbtiIkHwhGyEIHi4iBYjig=="], + "@openai/codex": ["@openai/codex@0.125.0", "", { "optionalDependencies": { "@openai/codex-darwin-arm64": "npm:@openai/codex@0.125.0-darwin-arm64", "@openai/codex-darwin-x64": "npm:@openai/codex@0.125.0-darwin-x64", "@openai/codex-linux-arm64": "npm:@openai/codex@0.125.0-linux-arm64", "@openai/codex-linux-x64": "npm:@openai/codex@0.125.0-linux-x64", "@openai/codex-win32-arm64": "npm:@openai/codex@0.125.0-win32-arm64", "@openai/codex-win32-x64": "npm:@openai/codex@0.125.0-win32-x64" }, "bin": { "codex": "bin/codex.js" } }, "sha512-GiE9wlgL95u/5BRirY5d3EaRLU1tu7Y1R09R8lCHHVmcQdSmhS809FdPDWH3gIYHS7ZriAPqXwJ3aLA0WKl40Q=="], - "@openai/codex-darwin-arm64": ["@openai/codex@0.116.0-darwin-arm64", "", { "os": "darwin", "cpu": "arm64" }, "sha512-WkdL083p8uMeASpg8bwV0DPGgzkm48LjN3MyU2m/YukujbiLnknAmG29O2q2rFCLm0oLSDIGUK8EnXA4ZcAF9Q=="], + "@openai/codex-darwin-arm64": ["@openai/codex@0.125.0-darwin-arm64", "", { "os": "darwin", "cpu": "arm64" }, "sha512-Gn2fHiSO0XgyHp1OSd5DWUTm66Bv9UEuipW5pVEj1E+hWZCOrdqnYttllKFWtRGj5yiKefNX3JIxONgh/ZwlOQ=="], - "@openai/codex-darwin-x64": ["@openai/codex@0.116.0-darwin-x64", "", { "os": "darwin", "cpu": "x64" }, "sha512-Ax8uTwYSNIwGrzcNRcn0jJQhZzNcKGDbbn00Emde7gGOemjSLhRALjUaKjckAaW5xWnNqHTGdtzzPB4phNlDYg=="], + "@openai/codex-darwin-x64": ["@openai/codex@0.125.0-darwin-x64", "", { "os": "darwin", "cpu": "x64" }, "sha512-TZ5Lek2X/UXTI9LXFxzarvQaJeuTrqVh4POc7soO/8RclVnCxADnCf15sivxLd5eiFW4t0myGoeVoM4lciRiRg=="], - "@openai/codex-linux-arm64": ["@openai/codex@0.116.0-linux-arm64", "", { "os": "linux", "cpu": "arm64" }, "sha512-X7cL8rBSGDB+RSZc2FoKiqcMVeLPMmo06bkss/en4lLQsV1XG2DZI56WuXg92IOX3SjYl6Av/eOWgsb1t3UeLQ=="], + "@openai/codex-linux-arm64": ["@openai/codex@0.125.0-linux-arm64", "", { "os": "linux", "cpu": "arm64" }, "sha512-pPnJoJD6rZ2Iin0zNt/up36bO2/EOp2B+1/rPHu/lSq3PJbT3Fmnfut2kJy5LylXb7bGA2XQbtqOogZzIbnlkA=="], - "@openai/codex-linux-x64": ["@openai/codex@0.116.0-linux-x64", "", { "os": "linux", "cpu": "x64" }, "sha512-S9InOgJT3tj6uQp55NqrCA1k5tklwFaH00JdC2ElbRmxchm7ard4WxHSJZX9TiY8enj4cQoLIC04NFTUCO+/PQ=="], + "@openai/codex-linux-x64": ["@openai/codex@0.125.0-linux-x64", "", { "os": "linux", "cpu": "x64" }, "sha512-K2NTTEeBpz/G+N2x17UGWfauRt3So+ir4f+U/60l5PPnYEJB/w3YZrlXo2G9og8Dm9BqtoBAjoPV74sRv9tWWQ=="], - "@openai/codex-sdk": ["@openai/codex-sdk@0.116.0", "", { "dependencies": { "@openai/codex": "0.116.0" } }, "sha512-qrn1Pu5G1GJ9w4m/Lk3L3466ulMGG9SfyR0LPAaXdisuQI1rqgoUOuoZ4byX7cCzn0x1g2+WPc0apZgjMEK04Q=="], + "@openai/codex-sdk": ["@openai/codex-sdk@0.125.0", "", { "dependencies": { "@openai/codex": "0.125.0" } }, "sha512-1xCIHdSbQVF880nJ2aVWdPIsWZbSpKODwuP9y/gvtChDYhYfYEW0DKp2H8ZlctkzIjlzS/WzYmP6ZZPHIvs2Dg=="], - "@openai/codex-win32-arm64": ["@openai/codex@0.116.0-win32-arm64", "", { "os": "win32", "cpu": "arm64" }, "sha512-kX2oAUzkgZX9OsYpd4omv9IGf+9VWj4Vy3UtIAnQKBu1DTSzmTJmXDuDn87mkyUciSZadm2QbeqQQzm2NC0NYw=="], + "@openai/codex-win32-arm64": ["@openai/codex@0.125.0-win32-arm64", "", { "os": "win32", "cpu": "arm64" }, "sha512-zxoUakw9oIHIFrAyk400XkkLBJFA6nOym0NDq6sQ/jhdcYraKqNSRCII2nsBwZHk+/4zgUvuk52iuutgysY/rQ=="], - "@openai/codex-win32-x64": ["@openai/codex@0.116.0-win32-x64", "", { "os": "win32", "cpu": "x64" }, "sha512-6sBIMOoA9FNuxQvCCnK0P548Wqrlk3I9SMdtOCUg2zYzYU7jOF2mWS1VpRQ6R+Jvo2x50dxeJZ+W37dBmXfprw=="], + "@openai/codex-win32-x64": ["@openai/codex@0.125.0-win32-x64", "", { "os": "win32", "cpu": "x64" }, "sha512-ofpOK+OWH5QFuUZ9pTM0d/PcXUXiIP5z5DpRcE9MlucJoyOl4Zy4Nu3NcuHF4YzCkZMQb6x3j0tjDEPHKqNQzw=="], "@oslojs/encoding": ["@oslojs/encoding@1.1.0", "", {}, "sha512-70wQhgYmndg4GCPxPPxPGevRKqTIJ2Nh4OkiMWmDAVYsTQ+Ta7Sq+rPevXyXGdzr30/qZBnyOalCszoMxlyldQ=="], @@ -2785,7 +2801,7 @@ "@antfu/ni/tinyexec": ["tinyexec@1.0.2", "", {}, "sha512-W/KYk+NFhkmsYpuHq5JykngiOCnxeVL8v8dFnqxSD8qEEdRfXk1SDM6JzNqcERbcGYj9tMrDQBYV9cjgnunFIg=="], - "@archon/providers/@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.2.89", "", { "dependencies": { "@anthropic-ai/sdk": "^0.74.0", "@modelcontextprotocol/sdk": "^1.27.1" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "^0.34.2", "@img/sharp-darwin-x64": "^0.34.2", "@img/sharp-linux-arm": "^0.34.2", "@img/sharp-linux-arm64": "^0.34.2", "@img/sharp-linux-x64": "^0.34.2", "@img/sharp-linuxmusl-arm64": "^0.34.2", "@img/sharp-linuxmusl-x64": "^0.34.2", "@img/sharp-win32-arm64": "^0.34.2", "@img/sharp-win32-x64": "^0.34.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-/9W0lyBGuGHw1uu7pQafsp6BLpxfqCv1QYE0Z/eZTX6lGHht4j4Q+O3UImzjsiyEE9cGkOAwZBGAEHDEqt+QUA=="], + "@anthropic-ai/claude-agent-sdk/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], "@astrojs/markdown-remark/remark-parse": ["remark-parse@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "micromark-util-types": "^2.0.0", "unified": "^11.0.0" } }, "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA=="], @@ -2831,20 +2847,6 @@ "@expressive-code/plugin-shiki/shiki": ["shiki@3.23.0", "", { "dependencies": { "@shikijs/core": "3.23.0", "@shikijs/engine-javascript": "3.23.0", "@shikijs/engine-oniguruma": "3.23.0", "@shikijs/langs": "3.23.0", "@shikijs/themes": "3.23.0", "@shikijs/types": "3.23.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-55Dj73uq9ZXL5zyeRPzHQsK7Nbyt6Y10k5s7OjuFZGMhpp4r/rsLBH0o/0fstIzX1Lep9VxefWljK/SKCzygIA=="], - "@img/sharp-darwin-arm64/@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.2.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g=="], - - "@img/sharp-darwin-x64/@img/sharp-libvips-darwin-x64": ["@img/sharp-libvips-darwin-x64@1.2.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg=="], - - "@img/sharp-linux-arm/@img/sharp-libvips-linux-arm": ["@img/sharp-libvips-linux-arm@1.2.4", "", { "os": "linux", "cpu": "arm" }, "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A=="], - - "@img/sharp-linux-arm64/@img/sharp-libvips-linux-arm64": ["@img/sharp-libvips-linux-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw=="], - - "@img/sharp-linux-x64/@img/sharp-libvips-linux-x64": ["@img/sharp-libvips-linux-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw=="], - - "@img/sharp-linuxmusl-arm64/@img/sharp-libvips-linuxmusl-arm64": ["@img/sharp-libvips-linuxmusl-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw=="], - - "@img/sharp-linuxmusl-x64/@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg=="], - "@inquirer/core/wrap-ansi": ["wrap-ansi@6.2.0", "", { "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" } }, "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA=="], "@mariozechner/pi-ai/@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.90.0", "", { "dependencies": { "json-schema-to-ts": "^3.1.1" }, "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" }, "optionalPeers": ["zod"], "bin": { "anthropic-ai-sdk": "bin/cli" } }, "sha512-MzZtPabJF1b0FTDl6Z6H5ljphPwACLGP13lu8MTiB8jXaW/YXlpOp+Po2cVou3MPM5+f5toyLnul9whKCy7fBg=="], @@ -2879,6 +2881,8 @@ "@modelcontextprotocol/sdk/ajv": ["ajv@8.18.0", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A=="], + "@modelcontextprotocol/sdk/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], + "@redocly/ajv/json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], "@redocly/openapi-core/colorette": ["colorette@1.4.0", "", {}, "sha512-Y2oEozpomLn7Q3HFP7dpww7AtMJplbM9lGZP6RDfHqmbeRjiwRg4n6VM6j4KLmRke85uWEI7JqF17f3pqdRA0g=="], @@ -3085,28 +3089,14 @@ "retext-stringify/unified": ["unified@11.0.5", "", { "dependencies": { "@types/unist": "^3.0.0", "bail": "^2.0.0", "devlop": "^1.0.0", "extend": "^3.0.0", "is-plain-obj": "^4.0.0", "trough": "^2.0.0", "vfile": "^6.0.0" } }, "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA=="], + "shadcn/@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.27.1", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.2.1", "express-rate-limit": "^8.2.1", "hono": "^4.11.4", "jose": "^6.1.3", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.1" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-sr6GbP+4edBwFndLbM60gf07z0FQ79gaExpnsjMGePXqFcSSb7t6iscpjk9DhFhwd+mTEQrzNafGP8/iGGFYaA=="], + "shadcn/commander": ["commander@14.0.3", "", {}, "sha512-H+y0Jo/T1RZ9qPP4Eh1pkcQcLRglraJaSLoyOtHxu6AapkjWVCy2Sit1QQ4x3Dng8qDlSsZEet7g5Pq06MvTgw=="], "shadcn/execa": ["execa@9.6.1", "", { "dependencies": { "@sindresorhus/merge-streams": "^4.0.0", "cross-spawn": "^7.0.6", "figures": "^6.1.0", "get-stream": "^9.0.0", "human-signals": "^8.0.1", "is-plain-obj": "^4.1.0", "is-stream": "^4.0.1", "npm-run-path": "^6.0.0", "pretty-ms": "^9.2.0", "signal-exit": "^4.1.0", "strip-final-newline": "^4.0.0", "yoctocolors": "^2.1.1" } }, "sha512-9Be3ZoN4LmYR90tUoVu2te2BsbzHfhJyfEiAVfz7N5/zv+jduIfLrV2xdQXOHbaD6KgpGdO9PRPM1Y4Q9QkPkA=="], "shadcn/node-fetch": ["node-fetch@3.3.2", "", { "dependencies": { "data-uri-to-buffer": "^4.0.0", "fetch-blob": "^3.1.4", "formdata-polyfill": "^4.0.10" } }, "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA=="], - "sharp/@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.0.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ=="], - - "sharp/@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.0.4" }, "os": "darwin", "cpu": "x64" }, "sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q=="], - - "sharp/@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.0.5" }, "os": "linux", "cpu": "arm" }, "sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ=="], - - "sharp/@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.0.4" }, "os": "linux", "cpu": "arm64" }, "sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA=="], - - "sharp/@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.0.4" }, "os": "linux", "cpu": "x64" }, "sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA=="], - - "sharp/@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.0.4" }, "os": "linux", "cpu": "arm64" }, "sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g=="], - - "sharp/@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.33.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.0.4" }, "os": "linux", "cpu": "x64" }, "sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw=="], - - "sharp/@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.33.5", "", { "os": "win32", "cpu": "x64" }, "sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg=="], - "sitemap/@types/node": ["@types/node@24.12.2", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-A1sre26ke7HDIuY/M23nd9gfB+nrmhtYyMINbjI1zHJxYteKR6qSMX56FsmjMcDb3SMcjJg5BiRRgOCC/yBD0g=="], "slice-ansi/ansi-styles": ["ansi-styles@6.2.3", "", {}, "sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg=="], @@ -3219,6 +3209,10 @@ "ajv-formats/ajv/json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], + "astro/sharp/@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.2.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w=="], + + "astro/sharp/@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.2.4" }, "os": "darwin", "cpu": "x64" }, "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw=="], + "astro/sharp/@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.2.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g=="], "astro/sharp/@img/sharp-libvips-darwin-x64": ["@img/sharp-libvips-darwin-x64@1.2.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg=="], @@ -3235,12 +3229,24 @@ "astro/sharp/@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg=="], + "astro/sharp/@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.2.4" }, "os": "linux", "cpu": "arm" }, "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw=="], + + "astro/sharp/@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg=="], + "astro/sharp/@img/sharp-linux-s390x": ["@img/sharp-linux-s390x@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-s390x": "1.2.4" }, "os": "linux", "cpu": "s390x" }, "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg=="], + "astro/sharp/@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ=="], + + "astro/sharp/@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg=="], + + "astro/sharp/@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q=="], + "astro/sharp/@img/sharp-wasm32": ["@img/sharp-wasm32@0.34.5", "", { "dependencies": { "@emnapi/runtime": "^1.7.0" }, "cpu": "none" }, "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw=="], "astro/sharp/@img/sharp-win32-ia32": ["@img/sharp-win32-ia32@0.34.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg=="], + "astro/sharp/@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.34.5", "", { "os": "win32", "cpu": "x64" }, "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw=="], + "cliui/string-width/emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], "cliui/string-width/is-fullwidth-code-point": ["is-fullwidth-code-point@3.0.0", "", {}, "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg=="], @@ -3399,6 +3405,8 @@ "retext/unified/trough": ["trough@2.2.0", "", {}, "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw=="], + "shadcn/@modelcontextprotocol/sdk/ajv": ["ajv@8.18.0", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-PlXPeEWMXMZ7sPYOHqmDyCJzcfNrUr3fGNKtezX14ykXOEIvyK81d+qydx89KY5O71FKMPaQ2vBfBFI5NHR63A=="], + "shadcn/execa/get-stream": ["get-stream@9.0.1", "", { "dependencies": { "@sec-ant/readable-stream": "^0.4.1", "is-stream": "^4.0.1" } }, "sha512-kVCxPF3vQM/N0B1PmoqVUqgHP+EeVjmZSQn+1oCRPxd2P21P2F19lIgbR3HBosbB1PUhOAoctJnfEn2GbN2eZA=="], "shadcn/execa/human-signals": ["human-signals@8.0.1", "", {}, "sha512-eKCa6bwnJhvxj14kZk5NCPc6Hb6BdsU9DZcOnmQKSnO1VKrfV0zCvtttPZUsBvjmNDn8rpcJfpwSYnHBjc95MQ=="], @@ -3525,6 +3533,8 @@ "remark-parse/mdast-util-from-markdown/unist-util-stringify-position/@types/unist": ["@types/unist@2.0.11", "", {}, "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA=="], + "shadcn/@modelcontextprotocol/sdk/ajv/json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], + "shadcn/execa/npm-run-path/path-key": ["path-key@4.0.0", "", {}, "sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ=="], "telegramify-markdown/remark-gfm/mdast-util-gfm/mdast-util-gfm-autolink-literal": ["mdast-util-gfm-autolink-literal@0.1.3", "", { "dependencies": { "ccount": "^1.0.0", "mdast-util-find-and-replace": "^1.1.0", "micromark": "^2.11.3" } }, "sha512-GjmLjWrXg1wqMIO9+ZsRik/s7PLwTaeCHVB7vRxUwLntZc8mzmTsLVr6HW1yLokcnhfURsn5zmSVdi3/xWWu1A=="], diff --git a/eslint.config.mjs b/eslint.config.mjs index 152c4245dd..6e926f7bc0 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -17,6 +17,7 @@ export default tseslint.config( 'worktrees/**', '.claude/worktrees/**', '.claude/skills/**', + '.archon/**', // User workflow/script/command content — not in any tsconfig project '**/*.generated.ts', // Auto-generated source files (content inlined via JSON.stringify) '**/*.js', '*.mjs', diff --git a/homebrew/archon.rb b/homebrew/archon.rb index 0bac58a339..d8f4c45c18 100644 --- a/homebrew/archon.rb +++ b/homebrew/archon.rb @@ -7,28 +7,28 @@ class Archon < Formula desc "Remote agentic coding platform - control AI assistants from anywhere" homepage "https://github.com/coleam00/Archon" - version "0.3.6" + version "0.3.9" license "MIT" on_macos do on_arm do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-darwin-arm64" - sha256 "96b6dac50b046eece9eddbb988a0c39b4f9a0e2faac66e49b977ba6360069e86" + sha256 "b617f85a2181938b793b25ad816a9f6b3149d184f64b2e9e2ea2430f27778d64" end on_intel do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-darwin-x64" - sha256 "09f1dbe12417b4300b7b07b531eb7391a286305f8d4eafc11e7f61f5d26eb8eb" + sha256 "5a928af5e0e67ffe084159161a9ea3994a9304cc39bd06132719cd89cc715e86" end end on_linux do on_arm do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-linux-arm64" - sha256 "80b06a6ff699ec57cd4a3e49cfe7b899a3e8212688d70285f5a887bf10086731" + sha256 "567bfca9175e10d9b4fd748e3862bbd34141a234766a7ecf0a714d9c27b8c92e" end on_intel do url "https://github.com/coleam00/Archon/releases/download/v#{version}/archon-linux-x64" - sha256 "09f5dac6db8037ed6f3e5b7e9c5eb8e37f19822a4ed2bf4cd7e654780f9d00de" + sha256 "c918218df2f0f853d107e6b1727dcd9accc034b183ffbccea93a331d8d376ed8" end end diff --git a/package.json b/package.json index 536c8ff7b2..6f4b82da7b 100644 --- a/package.json +++ b/package.json @@ -52,6 +52,6 @@ "axios": "^1.15.0" }, "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.74" + "@anthropic-ai/claude-agent-sdk": "^0.2.121" } } diff --git a/packages/cli/src/commands/setup.test.ts b/packages/cli/src/commands/setup.test.ts index a0fa7373b5..03a6b32d60 100644 --- a/packages/cli/src/commands/setup.test.ts +++ b/packages/cli/src/commands/setup.test.ts @@ -407,11 +407,11 @@ CODEX_ACCOUNT_ID=account1 }); describe('copyArchonSkill', () => { - it('should create skill files in target directory', () => { + it('should create skill files in target directory', async () => { const target = join(TEST_DIR, 'skill-target'); mkdirSync(target, { recursive: true }); - copyArchonSkill(target); + await copyArchonSkill(target); expect(existsSync(join(target, '.claude', 'skills', 'archon', 'SKILL.md'))).toBe(true); expect(existsSync(join(target, '.claude', 'skills', 'archon', 'guides', 'setup.md'))).toBe( @@ -425,11 +425,11 @@ CODEX_ACCOUNT_ID=account1 ).toBe(true); }); - it('should write non-empty content to skill files', () => { + it('should write non-empty content to skill files', async () => { const target = join(TEST_DIR, 'skill-target-content'); mkdirSync(target, { recursive: true }); - copyArchonSkill(target); + await copyArchonSkill(target); const content = readFileSync( join(target, '.claude', 'skills', 'archon', 'SKILL.md'), @@ -439,23 +439,23 @@ CODEX_ACCOUNT_ID=account1 expect(content).toContain('archon'); }); - it('should overwrite existing skill files', () => { + it('should overwrite existing skill files', async () => { const target = join(TEST_DIR, 'skill-target-overwrite'); const skillDir = join(target, '.claude', 'skills', 'archon'); mkdirSync(skillDir, { recursive: true }); writeFileSync(join(skillDir, 'SKILL.md'), 'old content'); - copyArchonSkill(target); + await copyArchonSkill(target); const content = readFileSync(join(skillDir, 'SKILL.md'), 'utf-8'); expect(content).not.toBe('old content'); }); - it('should create skill files even when target directory does not exist', () => { + it('should create skill files even when target directory does not exist', async () => { const target = join(TEST_DIR, 'non-existent-parent', 'skill-target-new'); // Do NOT pre-create target — copyArchonSkill must handle it - copyArchonSkill(target); + await copyArchonSkill(target); expect(existsSync(join(target, '.claude', 'skills', 'archon', 'SKILL.md'))).toBe(true); }); diff --git a/packages/cli/src/commands/setup.ts b/packages/cli/src/commands/setup.ts index 2160a99d8a..b1405d6298 100644 --- a/packages/cli/src/commands/setup.ts +++ b/packages/cli/src/commands/setup.ts @@ -35,7 +35,6 @@ import { import { existsSync, readFileSync, writeFileSync, mkdirSync, copyFileSync, chmodSync } from 'fs'; import { parse as parseDotenv } from 'dotenv'; import { join, dirname } from 'path'; -import { BUNDLED_SKILL_FILES } from '../bundled-skill'; import { homedir } from 'os'; import { randomBytes } from 'crypto'; import { spawn, execSync, type ChildProcess } from 'child_process'; @@ -1448,8 +1447,18 @@ export function writeScopedEnv( * Copy the bundled Archon skill files to /.claude/skills/archon/ * * Always overwrites existing files to ensure the latest skill version is installed. + * + * The `bundled-skill` module is dynamically imported here so that its 18 top-level + * `import … with { type: 'text' }` statements only execute when this function is + * actually called. Compiled binaries (`bun build --compile`) still statically + * analyze the literal-string `import()` and embed the chunk; linked-source + * installs (`bun link`) don't touch the source skill files unless the user runs + * `archon setup`. Without this indirection, every `archon` invocation — + * including `archon --help` — fails at module load when the source skill files + * are missing from disk. */ -export function copyArchonSkill(targetPath: string): void { +export async function copyArchonSkill(targetPath: string): Promise { + const { BUNDLED_SKILL_FILES } = await import('../bundled-skill'); const skillRoot = join(targetPath, '.claude', 'skills', 'archon'); for (const [relativePath, content] of Object.entries(BUNDLED_SKILL_FILES)) { const dest = join(skillRoot, relativePath); @@ -1841,7 +1850,7 @@ export async function setupCommand(options: SetupOptions): Promise { const skillTarget = skillTargetRaw; s.start('Installing Archon skill...'); try { - copyArchonSkill(skillTarget); + await copyArchonSkill(skillTarget); } catch (err) { s.stop('Archon skill installation failed'); cancel(`Could not install skill: ${(err as NodeJS.ErrnoException).message}`); diff --git a/packages/docs-web/src/content/docs/adapters/community/discord.md b/packages/docs-web/src/content/docs/adapters/community/discord.md index 0f3e59082c..b719d719ce 100644 --- a/packages/docs-web/src/content/docs/adapters/community/discord.md +++ b/packages/docs-web/src/content/docs/adapters/community/discord.md @@ -40,6 +40,14 @@ Connect Archon to Discord so you can interact with your AI coding assistant from 2. Enable **"Message Content Intent"** (required for the bot to read messages) 3. Save changes +:::caution +Skipping this step causes Discord to reject the bot's connection with +`Used disallowed intents`. Archon will log +`discord.start_failed_continuing_without_adapter` and keep the rest of +the server running, but the Discord adapter will be unavailable until +the intent is enabled and the server is restarted. +::: + ## Invite Bot to Your Server 1. Go to "OAuth2" > "URL Generator" in the left sidebar diff --git a/packages/docs-web/src/content/docs/adapters/web.md b/packages/docs-web/src/content/docs/adapters/web.md index 0025ca0219..bb5e43ba91 100644 --- a/packages/docs-web/src/content/docs/adapters/web.md +++ b/packages/docs-web/src/content/docs/adapters/web.md @@ -166,7 +166,7 @@ Click on a workflow run (from the dashboard or progress card) to open the execut The Workflow Builder at `/workflows/builder` provides a visual editor for creating and modifying workflow YAML files. Features include: - **DAG canvas** -- Drag-and-drop nodes to build your workflow graph visually -- **Node palette** -- Add command, prompt, bash, and loop nodes from a sidebar library +- **Node palette** -- Drag command, prompt, and bash nodes from a sidebar library. Additional node types (`script`, `loop`, `approval`, `cancel`) are editable via the Code / Split view - **Node inspector** -- Click a node to configure its properties (command, prompt text, dependencies, model overrides, hooks, MCP servers, etc.) in a tabbed panel - **View modes** -- Toggle between Visual, Split, and Code views. Split mode shows the canvas and YAML side by side. - **Command picker** -- Browse available commands when configuring command nodes diff --git a/packages/docs-web/src/content/docs/book/dag-workflows.md b/packages/docs-web/src/content/docs/book/dag-workflows.md index 2a66702584..558df2590f 100644 --- a/packages/docs-web/src/content/docs/book/dag-workflows.md +++ b/packages/docs-web/src/content/docs/book/dag-workflows.md @@ -230,20 +230,23 @@ The classify-and-route example uses `none_failed_min_one_success` on `implement` ## Node Types -Archon supports four node types: +Archon supports seven node types. Exactly one mode field is required per node: | Type | Syntax | When to use | |------|--------|-------------| | **Command** | `command: my-command` | Load a command from `.archon/commands/my-command.md`. The standard choice. | | **Prompt** | `prompt: "inline instructions..."` | Quick, one-off instructions that don't need a reusable command file. | | **Bash** | `bash: "shell command"` | Run a shell script without AI. Stdout is captured as `$nodeId.output`. Deterministic operations only. | +| **Script** | `script: "..." ` + `runtime: bun \| uv` | Run TypeScript/JavaScript (bun) or Python (uv) without AI. Inline code or named reference to `.archon/scripts/`. Stdout captured as `$nodeId.output`. See [Script Nodes](/guides/script-nodes/). | | **Loop** | `loop: { prompt: "...", until: SIGNAL }` | Repeat an AI prompt until a completion signal appears in the output. See [Loop Nodes](/guides/loop-nodes/). | +| **Approval** | `approval: { message: "..." }` | Pause the workflow for a human approve/reject decision. See [Approval Nodes](/guides/approval-nodes/). | +| **Cancel** | `cancel: "reason string"` | Terminate the workflow run (status: cancelled, not failed). Usually gated with `when:`. | **Command** is the most common. Use it for anything you'll reuse across workflows. **Prompt** is convenient for glue nodes — summarizing outputs, formatting data — where the logic is simple and workflow-specific. -**Bash** is powerful for deterministic operations: running tests, checking git status, reading a file, fetching an API. The AI doesn't run the bash command; your shell does. The output becomes a variable for downstream nodes: +**Bash** is powerful for deterministic shell operations: running tests, checking git status, reading a file, fetching an API. The AI doesn't run the bash command; your shell does. The output becomes a variable for downstream nodes: ```yaml - id: check-tests @@ -255,6 +258,22 @@ Archon supports four node types: prompt: "Test output: $check-tests.output\n\nFix any failures." ``` +**Script** is for deterministic work that needs a real programming language — parsing JSON, transforming data between AI nodes, calling typed HTTP clients. Use `runtime: bun` for TypeScript/JavaScript and `runtime: uv` for Python: + +```yaml +- id: transform + script: | + const raw = process.env.UPSTREAM ?? '{}'; + const items = JSON.parse(raw).items ?? []; + console.log(JSON.stringify({ count: items.length })); + runtime: bun + +- id: analyze + script: analyze-metrics # Named script: .archon/scripts/analyze-metrics.py + runtime: uv + deps: ["pandas>=2.0"] # uv-only; bun auto-installs imports +``` + **Loop** is for iterative tasks where you don't know how many steps it will take. The AI runs until it emits a completion signal: ```yaml @@ -269,6 +288,32 @@ Archon supports four node types: fresh_context: true ``` +**Approval** pauses the workflow for human review. The downstream nodes don't run until the user approves in chat, CLI, or web UI: + +```yaml +interactive: true # required at workflow level for web UI delivery + +nodes: + - id: plan + command: plan-feature + - id: review-gate + approval: + message: "Review the plan above." + depends_on: [plan] + - id: implement + command: implement + depends_on: [review-gate] +``` + +**Cancel** terminates the workflow with a reason string. Pair with `when:` for guarded exits — the run shows as `cancelled` rather than `failed`: + +```yaml +- id: gate-branch + cancel: "Refusing to run on main — this workflow modifies files." + when: "$check-branch.output == 'main'" + depends_on: [check-branch] +``` + --- ## Best Practices diff --git a/packages/docs-web/src/content/docs/book/quick-reference.md b/packages/docs-web/src/content/docs/book/quick-reference.md index ae37659f7a..6275f5487d 100644 --- a/packages/docs-web/src/content/docs/book/quick-reference.md +++ b/packages/docs-web/src/content/docs/book/quick-reference.md @@ -124,7 +124,10 @@ All nodes share these base fields: | `command` | One of | string | Name of a command file in `.archon/commands/` | | `prompt` | One of | string | Inline AI instructions | | `bash` | One of | string | Shell script (runs without AI; stdout captured as `$nodeId.output`) | +| `script` | One of | string | TypeScript/JavaScript (bun) or Python (uv) — inline or named ref to `.archon/scripts/`. Requires `runtime`. See [Script Nodes](/guides/script-nodes/) | | `loop` | One of | object | Loop configuration (see Loop Options below) | +| `approval` | One of | object | Pause for human review; see [Approval Nodes](/guides/approval-nodes/) | +| `cancel` | One of | string | Reason string; terminates the run with `cancelled` status (not `failed`). Usually gated with `when:` | | `depends_on` | No | string[] | Node IDs that must complete before this node runs | | `when` | No | string | Condition expression; node is skipped if false | | `trigger_rule` | No | string | Join semantics when multiple upstreams exist (see Trigger Rules) | @@ -135,12 +138,30 @@ All nodes share these base fields: | `allowed_tools` | No | string[] | Restrict available tools to this list (Claude only) | | `denied_tools` | No | string[] | Remove specific tools from this node's context (Claude only) | | `idle_timeout` | No | number | Per-node idle timeout in milliseconds (default: 5 minutes) | -| `retry` | No | object | Retry configuration for transient failures (see Retry Options) | +| `retry` | No | object | Retry configuration for transient failures (see Retry Options). **Hard error on loop nodes** | | `hooks` | No | object | SDK hook callbacks (Claude only; see Hook Schema) | | `mcp` | No | string | Path to MCP server config JSON file (Claude only) | | `skills` | No | string[] | Skill names to preload into this node's context (Claude only) | +| `agents` | No | object | Inline sub-agent definitions keyed by kebab-case ID. Claude only | -> **bash node timeout**: The `timeout` field on bash nodes is in **milliseconds** (default: 120000). This differs from hook `timeout`, which is in seconds. +**Script-specific fields** (required when `script:` is set): + +| Field | Required | Type | Description | +|-------|----------|------|-------------| +| `runtime` | Yes | `'bun'` \| `'uv'` | Which runtime executes the script. Must match file extension for named scripts (`.ts`/`.js` → bun, `.py` → uv) | +| `deps` | No | string[] | Python dependencies for `uv run --with`. Ignored for bun (bun auto-installs) | +| `timeout` | No | number | Hard kill in ms. Default: 120000 (2 min). Same semantics as `bash` timeout | + +**Approval-specific fields** (required when `approval:` is set): + +| Field | Required | Type | Description | +|-------|----------|------|-------------| +| `approval.message` | Yes | string | The message shown to the user when the workflow pauses | +| `approval.capture_response` | No | boolean | `true` = user's comment becomes `$.output`. Default: `false` | +| `approval.on_reject.prompt` | No | string | AI rework prompt when the user rejects. `$REJECTION_REASON` substituted | +| `approval.on_reject.max_attempts` | No | number | Max rework iterations before cancel. Range 1-10, default 3 | + +> **bash and script node timeout**: The `timeout` field is in **milliseconds** (default: 120000). This differs from hook `timeout`, which is in seconds. ### Trigger Rules @@ -272,7 +293,7 @@ defaults: | `Routing unclear — falling back to archon-assist` | No workflow matched the input | Use an explicit workflow name: `archon workflow run my-workflow "..."` | | `Worktree already exists for branch X` | Prior run left a worktree | Run `archon complete X` or `archon isolation cleanup` | | `Not a git repository` | Running outside a repo | `cd` into a git repo first — workflow and isolation commands require one | -| `Model X is not valid for provider Y` | Provider/model mismatch | Each provider accepts specific models — check the provider's `isModelCompatible` rules. Claude accepts `sonnet`, `opus`, `haiku`, `claude-*`; Codex accepts other models. | +| `Unknown provider 'X'. Registered: claude, codex, pi` | Typo in `provider:` (workflow root or node-level) | Set `provider:` to one of the registered ids. Model strings themselves are not validated at load time — the SDK rejects unknown models at request time. | | `$BASE_BRANCH referenced but could not be detected` | No base branch set and auto-detection failed | Set `worktree.baseBranch` in `.archon/config.yaml` or ensure `main`/`master` exists | | Workflow hangs with no output | Node idle timeout hit | Increase `idle_timeout` on the node (milliseconds) | diff --git a/packages/docs-web/src/content/docs/contributing/adding-a-community-provider.md b/packages/docs-web/src/content/docs/contributing/adding-a-community-provider.md index 4a521a4a8d..ef23e8cd56 100644 --- a/packages/docs-web/src/content/docs/contributing/adding-a-community-provider.md +++ b/packages/docs-web/src/content/docs/contributing/adding-a-community-provider.md @@ -124,7 +124,6 @@ export function registerYourProvider(): void { displayName: 'Your Provider (community)', factory: () => new YourProvider(), capabilities: YOUR_CAPABILITIES, - isModelCompatible: (model) => /* pattern check */, builtIn: false, // ← important: community providers are NOT built-in }); } @@ -147,7 +146,7 @@ Co-locate tests next to your code. The Pi tests use this isolation pattern: - Mock the SDK (`mock.module` at the top of the file, before importing your provider). - Tests that touch `mock.module` are split into separate `bun test` invocations in `packages/providers/package.json` (see existing entries for the Pi files). Bun's `mock.module` is process-global and irreversible — splitting prevents cross-file pollution. -- Registry test (`packages/providers/src/registry.test.ts`): add a `describe` block asserting `builtIn: false`, idempotent registration, and `isModelCompatible` behavior. +- Registry test (`packages/providers/src/registry.test.ts`): add a `describe` block asserting `builtIn: false` and idempotent registration. ### 5. Capability discipline diff --git a/packages/docs-web/src/content/docs/getting-started/ai-assistants.md b/packages/docs-web/src/content/docs/getting-started/ai-assistants.md index ff4f8e6533..7a65b97adf 100644 --- a/packages/docs-web/src/content/docs/getting-started/ai-assistants.md +++ b/packages/docs-web/src/content/docs/getting-started/ai-assistants.md @@ -229,7 +229,7 @@ DEFAULT_AI_ASSISTANT=codex ## Pi (Community Provider) -**One adapter, ~20 LLM backends.** Pi (`@mariozechner/pi-coding-agent`) is a community-maintained coding-agent harness that Archon integrates as the first community provider. It unlocks Anthropic, OpenAI, Google (Gemini + Vertex), Groq, Mistral, Cerebras, xAI, OpenRouter, Hugging Face, and more under a single `provider: pi` entry. +**One adapter, ~20 LLM backends.** Pi (`@mariozechner/pi-coding-agent`) is a community-maintained coding-agent harness that Archon integrates as the first community provider. It unlocks Anthropic, OpenAI, Google (Gemini + Vertex), Groq, Mistral, Cerebras, xAI, OpenRouter, Hugging Face, and local inference (LM Studio, ollama, llamacpp, custom OpenAI-compatible endpoints registered in `~/.pi/agent/models.json`) under a single `provider: pi` entry. Pi is registered as `builtIn: false` — it validates the community-provider seam rather than being a core-team-maintained option. If it proves stable and valuable it may be promoted to `builtIn: true` later. @@ -262,7 +262,20 @@ Pi supports both OAuth subscriptions and API keys. Archon's adapter reads your e | `openrouter` | `OPENROUTER_API_KEY` | | `huggingface` | `HUGGINGFACE_API_KEY` | -Additional Pi backends exist (Azure, Bedrock, Vertex, etc.) — file an issue if you need them wired. +Additional cloud backends exist (Azure, Bedrock, Vertex, etc.) — file an issue if you need an env-var shortcut wired for them. + +**Local / custom providers (no credentials needed):** + +Providers that aren't in the env-var table above (LM Studio, ollama, llamacpp, custom OpenAI-compatible endpoints) work without any Archon-side configuration. Register them in `~/.pi/agent/models.json` per Pi's own docs and reference them as `/`: + +```yaml +# .archon/config.yaml +assistants: + pi: + model: lm-studio/qwen2.5-coder-14b # whatever ID you registered with Pi +``` + +Archon logs an info-level `pi.auth_missing` event when no credentials are found and continues — Pi's SDK then connects directly to the local endpoint defined in `models.json`. If the provider does require auth (a less-common cloud backend not in the env-var table) the SDK call fails downstream; the `pi.auth_missing` breadcrumb in the log lets you trace it back to a missing env-var mapping. ### Extensions (on by default) diff --git a/packages/docs-web/src/content/docs/guides/authoring-workflows.md b/packages/docs-web/src/content/docs/guides/authoring-workflows.md index 0fbc282640..408fdb8e90 100644 --- a/packages/docs-web/src/content/docs/guides/authoring-workflows.md +++ b/packages/docs-web/src/content/docs/guides/authoring-workflows.md @@ -126,6 +126,10 @@ worktree: # Optional: pin isolation behavior regardless o # like triage/reporting. true = must use a worktree; # CLI --no-worktree hard-errors. Omit to let the # caller decide (current default = worktree). +tags: [GitLab, Review] # Optional: explicit Web UI filter tags. Overrides the + # keyword-based tag inference. An empty list (`tags: []`) + # suppresses inference and shows no tags. Omit to fall + # back to inferred tags (the default). # Required for DAG-based nodes: @@ -174,6 +178,7 @@ nodes: | `command` | string | Command name to load from `.archon/commands/` | | `prompt` | string | Inline prompt string | | `bash` | string | Shell script (no AI). Stdout captured as `$nodeId.output`. Optional `timeout` (ms, default 120000) | +| `script` | string | TypeScript/JavaScript (via `bun`) or Python (via `uv`) — inline code or named reference to `.archon/scripts/`. Stdout captured as `$nodeId.output`. Requires `runtime: bun` or `runtime: uv`. Optional `deps` (uv only) and `timeout` (ms, default 120000). See [Script Nodes](/guides/script-nodes/) | | `loop` | object | Iterative AI prompt until completion signal. See [Loop Nodes](/guides/loop-nodes/) | | `approval` | object | Pauses workflow for human review. See [Approval Nodes](/guides/approval-nodes/) | | `cancel` | string | Terminates the workflow run with a reason string. Uses existing cancellation plumbing — in-flight parallel nodes are stopped | @@ -597,16 +602,15 @@ provider: claude # Any registered provider (default: from config) model: sonnet # Model override (default: from config assistants.claude.model) ``` -**Claude models:** -- `sonnet` - Fast, balanced (recommended) -- `opus` - Powerful, expensive -- `haiku` - Fast, lightweight -- `claude-*` - Full model IDs (e.g., `claude-3-5-sonnet-20241022`) -- `inherit` - Use model from previous session +**Model strings:** Whatever you write in `model:` is forwarded verbatim to the resolved provider's SDK. Archon doesn't keep an internal allow-list, because vendor SDKs ship new models faster than this doc can. The provider's API decides whether the string is valid at request time. -**Codex models:** -- Any OpenAI model ID (e.g., `gpt-5.3-codex`, `o5-pro`) -- Cannot use Claude model aliases +Common shapes you'll see in practice: + +- **Claude (Anthropic):** family aliases (`sonnet`, `opus`, `haiku`), full model IDs (`claude-opus-4-7`, `claude-3-5-sonnet-20241022`), context-window suffixed forms (`opus[1m]`, `claude-opus-4-7[1m]`), or `inherit` to reuse the previous session's model. +- **Codex (OpenAI):** any OpenAI model ID — `gpt-5.3-codex`, `gpt-5.2`, `o5-pro`, etc. +- **Pi (community):** `/` refs — e.g. `google/gemini-2.5-pro`, `openrouter/qwen/qwen3-coder`. + +If the SDK rejects the string at request time, the node fails loudly with the SDK's error message — Archon never silently re-routes a model from one provider to another based on the string. ### Codex-Specific Options @@ -671,18 +675,19 @@ nodes: **Platforms:** `interactive` only affects the web platform. CLI, Slack, Telegram, and GitHub always run workflows in foreground mode regardless of this setting. -### Model Validation +### Provider Validation -Workflows are validated at load time: -- Provider/model compatibility checked -- Invalid combinations fail with clear error messages -- Validation errors shown in `/workflow list` +Workflows are validated at load time for **provider identity only**: +- Both the workflow-level `provider:` and any per-node `provider:` overrides must name a registered provider (`claude`, `codex`, `pi`). +- Validation errors are shown in `/workflow list`. Example validation error: ``` -Model "sonnet" is not compatible with provider "codex" +Unknown provider 'claud'. Registered: claude, codex, pi ``` +Model strings are not validated at load time — they're forwarded to the SDK as-is and validated by the upstream API at request time. + ### Resource Validation (CLI) To validate that all referenced command files, MCP config files, and skill directories exist on disk, run: diff --git a/packages/docs-web/src/content/docs/guides/global-workflows.md b/packages/docs-web/src/content/docs/guides/global-workflows.md index 282881e312..a4651ba0ec 100644 --- a/packages/docs-web/src/content/docs/guides/global-workflows.md +++ b/packages/docs-web/src/content/docs/guides/global-workflows.md @@ -6,7 +6,7 @@ area: workflows audience: [user] status: current sidebar: - order: 8 + order: 9 --- Workflows placed in `~/.archon/workflows/`, commands in `~/.archon/commands/`, and scripts in `~/.archon/scripts/` are loaded globally -- they appear in every project and can be invoked from any repository. Workflows and commands carry the `source: 'global'` label in the Web UI node palette; scripts resolve under the same repo-wins-over-home precedence. diff --git a/packages/docs-web/src/content/docs/guides/hooks.md b/packages/docs-web/src/content/docs/guides/hooks.md index 3e6928ae21..201e60c3cb 100644 --- a/packages/docs-web/src/content/docs/guides/hooks.md +++ b/packages/docs-web/src/content/docs/guides/hooks.md @@ -6,7 +6,7 @@ area: workflows audience: [user] status: current sidebar: - order: 5 + order: 6 --- DAG workflow nodes support a `hooks` field that attaches Claude Agent SDK hooks diff --git a/packages/docs-web/src/content/docs/guides/index.md b/packages/docs-web/src/content/docs/guides/index.md index 0d53209fb6..f3cce0d69e 100644 --- a/packages/docs-web/src/content/docs/guides/index.md +++ b/packages/docs-web/src/content/docs/guides/index.md @@ -20,6 +20,7 @@ How-to guides for building and running AI coding workflows with Archon. - [Loop Nodes](/guides/loop-nodes/) — Iterative AI execution with completion conditions and deterministic exit checks - [Approval Nodes](/guides/approval-nodes/) — Human review gates with optional AI rework on rejection +- [Script Nodes](/guides/script-nodes/) — TypeScript/JavaScript (bun) or Python (uv) as a deterministic DAG node, without AI ## Node Features (Claude only) diff --git a/packages/docs-web/src/content/docs/guides/loop-nodes.md b/packages/docs-web/src/content/docs/guides/loop-nodes.md index 0e9e3eebc3..1420c9670a 100644 --- a/packages/docs-web/src/content/docs/guides/loop-nodes.md +++ b/packages/docs-web/src/content/docs/guides/loop-nodes.md @@ -90,10 +90,13 @@ substitution: | `$WORKFLOW_ID` | Current workflow run ID | | `$nodeId.output` | Output from upstream nodes | | `$LOOP_USER_INPUT` | User feedback provided via `/workflow approve ` at an interactive loop gate. Only populated on the first iteration of a resumed interactive loop; empty string on all other iterations. | +| `$LOOP_PREV_OUTPUT` | Cleaned output of the previous loop iteration. Empty string on the first iteration. Useful for `fresh_context: true` loops that need to reference what the previous pass produced or why it failed. | `$USER_MESSAGE` is particularly important for `fresh_context: true` loops — the agent has no memory of prior iterations, so the prompt must include all -context needed to continue the work. +context needed to continue the work. `$LOOP_PREV_OUTPUT` complements this by +exposing the previous iteration's own output without forcing the engine to +thread the session. ### `until` @@ -177,6 +180,39 @@ The prompt tells the agent it has no memory and must bootstrap from files. window exhaustion is a risk. The agent reads `.archon/ralph/*/prd.json` or similar tracking files to know what's done and what's next. +### Retry-on-failure with `$LOOP_PREV_OUTPUT` + +When `fresh_context: true` is needed (to keep each iteration's context window +small) but the agent still benefits from knowing what the previous pass said — +typical of implement→validate or generate→review loops — inject the previous +iteration's output via `$LOOP_PREV_OUTPUT`: + +```yaml +- id: implement-and-qa + loop: + prompt: | + Implement the plan, then run `bun run validate`. + If checks fail, fix the failures. + + Previous iteration output (empty on first pass): + $LOOP_PREV_OUTPUT + + Use the above to focus your fixes. When all checks pass output: + QA_PASS + until: QA_PASS + fresh_context: true + max_iterations: 3 +``` + +In a continuous run, the first iteration sees `$LOOP_PREV_OUTPUT` substituted +to an empty string; iterations 2+ see the previous iteration's cleaned output +(after `` tags are stripped). + +When a loop resumes from an interactive approval gate, the first executed +iteration after the resume also receives an empty `$LOOP_PREV_OUTPUT` even if +its numeric iteration is 2+ — the prior output lived in a different run and is +not carried across the gate. + ### Accumulating context The agent builds on its own prior work across iterations. Good for iterative diff --git a/packages/docs-web/src/content/docs/guides/mcp-servers.md b/packages/docs-web/src/content/docs/guides/mcp-servers.md index 46474477e2..c777964d75 100644 --- a/packages/docs-web/src/content/docs/guides/mcp-servers.md +++ b/packages/docs-web/src/content/docs/guides/mcp-servers.md @@ -6,7 +6,7 @@ area: workflows audience: [user] status: current sidebar: - order: 6 + order: 7 --- DAG workflow nodes support a `mcp` field that attaches MCP (Model Context Protocol) diff --git a/packages/docs-web/src/content/docs/guides/remotion-workflow.md b/packages/docs-web/src/content/docs/guides/remotion-workflow.md index d68831be91..666b1ad916 100644 --- a/packages/docs-web/src/content/docs/guides/remotion-workflow.md +++ b/packages/docs-web/src/content/docs/guides/remotion-workflow.md @@ -6,7 +6,7 @@ area: workflows audience: [user] status: current sidebar: - order: 9 + order: 10 --- The `archon-remotion-generate` workflow uses AI to create Remotion video compositions. diff --git a/packages/docs-web/src/content/docs/guides/script-nodes.md b/packages/docs-web/src/content/docs/guides/script-nodes.md new file mode 100644 index 0000000000..dcf2b985f6 --- /dev/null +++ b/packages/docs-web/src/content/docs/guides/script-nodes.md @@ -0,0 +1,352 @@ +--- +title: Script Nodes +description: Run TypeScript, JavaScript, or Python code as a DAG node without invoking an AI agent. +category: guides +area: workflows +audience: [user] +status: current +sidebar: + order: 5 +--- + +DAG workflow nodes support a `script` field that runs a TypeScript, JavaScript, +or Python snippet as part of the workflow. No AI agent is invoked — the script +runs via the `bun` or `uv` runtime, `stdout` is captured as the node's output, +and the result is available downstream as `$nodeId.output`. + +Use script nodes for deterministic work that needs a real programming language: +parsing JSON, transforming data between upstream AI nodes, calling HTTP APIs +with typed clients, or computing values that a shell one-liner would mangle. +If a plain shell command is enough, use a [`bash:` node](/guides/authoring-workflows/#node-fields) +instead. + +## Quick Start + +### Inline TypeScript (bun) + +```yaml +nodes: + - id: parse + script: | + const data = { count: 42, label: "ok" }; + console.log(JSON.stringify(data)); + runtime: bun +``` + +### Inline Python (uv) + +```yaml +nodes: + - id: compute + script: | + import json, statistics + values = [1, 2, 3, 4, 5] + print(json.dumps({ "mean": statistics.mean(values) })) + runtime: uv +``` + +### Named script from `.archon/scripts/` + +```yaml +nodes: + - id: fetch-pages + script: fetch-github-pages # resolves .archon/scripts/fetch-github-pages.ts + runtime: bun + timeout: 60000 +``` + +The file `.archon/scripts/fetch-github-pages.ts` is loaded and executed with +`bun --no-env-file run `. + +## How It Works + +1. **Substitute variables.** `$ARGUMENTS`, `$WORKFLOW_ID`, `$ARTIFACTS_DIR`, + `$BASE_BRANCH`, `$DOCS_DIR`, and upstream `$nodeId.output` references are + substituted into the `script` text before execution. +2. **Detect inline vs named.** If the `script` value contains a newline or any + shell metacharacter (see [Inline vs Named Scripts](#inline-vs-named-scripts) + below), it's treated as inline code. Otherwise it's treated as a named-script + reference. +3. **Dispatch.** + - `runtime: bun` + inline → `bun --no-env-file -e ''` + - `runtime: bun` + named → `bun --no-env-file run ` + - `runtime: uv` + inline → `uv run [--with dep ...] python -c ''` + - `runtime: uv` + named → `uv run [--with dep ...] ` +4. **Capture.** `stdout` (with the trailing newline stripped) becomes + `$nodeId.output`. `stderr` is logged as a warning and posted to the + conversation but does **not** fail the node. A non-zero exit code fails it. + +## YAML Schema + +```yaml +- id: node-name + script: # required, non-empty + runtime: bun | uv # required + deps: ["httpx", "pydantic>=2"] # optional, uv-only (see below) + timeout: 60000 # optional ms, default 120000 + depends_on: [upstream] # optional + when: "$upstream.output != ''" # optional + trigger_rule: all_success # optional (default) + retry: # optional; same shape as bash/AI nodes + max_attempts: 3 + on_error: transient +``` + +### Fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `script` | string | Yes | Inline code, or the basename (no extension) of a file in `.archon/scripts/` or `~/.archon/scripts/` | +| `runtime` | `'bun'` \| `'uv'` | Yes | Which runtime executes the script. Must match the file extension for named scripts | +| `deps` | string[] | No | Python dependencies to install for this run. **uv only** — ignored with a warning for `bun` | +| `timeout` | number (ms) | No | Hard kill after this many milliseconds. Default: `120000` (2 min) | + +Standard DAG fields (`id`, `depends_on`, `when`, `trigger_rule`, `retry`) all +work. AI-specific fields (`model`, `provider`, `context`, `output_format`, +`allowed_tools`, `denied_tools`, `hooks`, `mcp`, `skills`, `agents`, `effort`, +`thinking`, `maxBudgetUsd`, `systemPrompt`, `fallbackModel`, `betas`, `sandbox`) +are accepted by the parser but emit a loader warning and are ignored at runtime +— no AI is invoked. `idle_timeout` is also accepted but ignored: script nodes +run as one-shot subprocesses, so use `timeout` (hard kill after N ms) instead. + +## Inline vs Named Scripts + +The executor decides mode from the `script` string itself. A value is treated +as **inline code** if it contains a newline or any shell metacharacter; otherwise +it's a **named script** lookup. + +- **Metacharacters that trigger inline mode:** space, `;` `(` `)` `{` `}` `&` + `|` `<` `>` `$` `` ` `` `"` `'` +- **Inline examples:** `"const x = 1; console.log(x)"`, multi-line blocks, any + snippet with a space +- **Named examples:** `fetch-pages`, `analyze_metrics`, `triage-fmt` — bare + identifiers with no whitespace or shell syntax + +If you want an inline snippet that happens to be syntactically a single +identifier, add a trailing comment or newline to force inline mode. + +### Named Script Resolution + +Named scripts are discovered from, in precedence order: + +1. `/.archon/scripts/` — repo-local +2. `~/.archon/scripts/` — home-scoped (shared across every repo) + +Each directory is walked one subfolder deep (e.g. `.archon/scripts/triage/foo.ts` +resolves as `foo`). Deeper nesting is ignored. On a same-name collision the +repo-local entry wins silently — see [Global Workflows](/guides/global-workflows/) +for the shared precedence rules. + +### Extension ↔ Runtime Mapping + +Named scripts derive their runtime from the file extension: + +| Extension | Runtime | +|-----------|---------| +| `.ts`, `.js` | `bun` | +| `.py` | `uv` | + +The `runtime:` declared on the node **must match the file's extension** — the +validator rejects `runtime: uv` pointing at a `.ts` file, and vice versa. For +inline scripts, you can use any language that the chosen runtime supports. + +## Dependencies (uv only) + +`deps` is a pass-through to `uv run --with `, which installs packages into +a per-run ephemeral environment: + +```yaml +- id: scrape + script: | + import httpx + r = httpx.get("https://api.github.com/repos/anthropics/anthropic-cookbook") + print(r.text) + runtime: uv + deps: ["httpx>=0.27"] +``` + +- **Version pinning** — any PEP 508 specifier works (`pkg==1.2.3`, `pkg>=2,<3`). +- **Bun ignores `deps`** — Bun auto-installs imported packages on first run, so + the validator emits a warning if you set `deps` with `runtime: bun`. Remove + the field, or switch to `uv` if you need explicit dependency management. +- **No persistent environment** — each run is isolated; there is no `requirements.txt` + or lockfile to maintain. + +## Output and Data Flow + +`stdout` (trimmed of its trailing newline) becomes `$nodeId.output`. Print JSON +if you want downstream nodes to access structured fields with +`$nodeId.output.field` — the workflow engine tries to parse the output as JSON +for field access in `when:` conditions and prompt substitution. + +```yaml +- id: classify + script: | + const input = process.argv.slice(2).join(' '); + const severity = input.includes('crash') ? 'high' : 'low'; + console.log(JSON.stringify({ severity, length: input.length })); + runtime: bun + +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output.severity == 'high'" +``` + +### Variable Substitution in Scripts + +Variables are substituted into the `script` text **as raw strings, without +shell quoting** — unlike `bash:` nodes, where `$nodeId.output` values are +auto-quoted. Treat substituted values as untrusted input and parse them with +language features, not by interpolating into shell syntax. + +:::caution[Avoid String.raw with `$nodeId.output`] +The pattern `` String.raw`$nodeId.output` `` looks safe but fails silently when +the substituted value contains a backtick — common in AI-generated markdown, +`output_format` payloads, or any output with inline code spans. The backtick +terminates the template literal early, producing a cryptic `Expected ";"` parse +error at runtime. + +**Use direct assignment instead.** JSON is a strict subset of JavaScript +expression syntax, so the substituted value is always a valid JS literal: + +```typescript +// Safe — works for any valid JSON, including content with backticks +const data = $fetch-issue.output; + +// Fragile — breaks if output contains a backtick +const data = JSON.parse(String.raw`$fetch-issue.output`); // DON'T +``` +::: + +For **named scripts**, variables are not passed automatically. Read them from +the environment (`process.env.USER_MESSAGE`, `os.environ['USER_MESSAGE']`) +or accept them via stdin. For **inline scripts**, substituted variables are +literally embedded into the code string at execution time. + +## Environment and Isolation + +Script subprocesses receive `process.env` merged with any codebase-scoped env +vars you've configured via the Web UI (Settings → Projects → Env Vars) or the +`env:` block in `.archon/config.yaml`. This is the same injection surface used +by Claude, Codex, and bash nodes. + +**Target repo `.env` isolation:** the Bun subprocess is invoked with +`--no-env-file`, so variables in the target repo's `.env` do **not** leak into +the script. Archon-managed env (from `~/.archon/.env` and `/.archon/.env`) +passes through normally. `uv`-launched Python subprocesses do not auto-load +`.env` at all. See [Security Model](/reference/security/#target-repo-env-isolation) +for the full story. + +## Validation + +`archon validate workflows ` checks script nodes for: + +- **Script file exists** — for named scripts, the basename must exist in + `.archon/scripts/` or `~/.archon/scripts/` with a matching extension for + the declared runtime. Missing files fail validation with a hint showing + the expected path. +- **Runtime available on PATH** — `bun` or `uv` must be installed. Missing + runtimes emit a warning with the official install command: + - `curl -fsSL https://bun.sh/install | bash` + - `curl -LsSf https://astral.sh/uv/install.sh | sh` +- **`deps` with `runtime: bun`** — warns that `deps` is a no-op under Bun. + +Runtime availability is cached per-process — the check spawns `which bun` / +`which uv` once and memoizes the result. + +## Patterns + +### Transform AI output before the next node + +Use a script node as a deterministic adapter between two AI nodes. The script +parses the upstream classifier's JSON, filters, and forwards a clean payload: + +```yaml +- id: classify + prompt: "Classify: $ARGUMENTS" + allowed_tools: [] + output_format: + type: object + properties: + items: + type: array + items: { type: object } + +- id: filter + script: | + const upstream = JSON.parse(process.env.UPSTREAM ?? '{}'); + const high = (upstream.items ?? []).filter(i => i.severity === 'high'); + console.log(JSON.stringify(high)); + runtime: bun + depends_on: [classify] + +- id: triage + command: triage-high-severity + depends_on: [filter] + when: "$filter.output != '[]'" +``` + +*(Note: to actually populate `UPSTREAM` you'd inline-substitute +`$classify.output` into the script body. The example above illustrates the +shape.)* + +### Reusable helper in `~/.archon/scripts/` + +A helper you want available in every repo — say, a triage summary formatter — +lives at `~/.archon/scripts/triage-fmt.ts`: + +```typescript +// ~/.archon/scripts/triage-fmt.ts +const raw = process.argv.slice(2).join(' ') || '{}'; +const data = JSON.parse(raw); +const lines = data.issues?.map((i: { id: string; title: string }) => + `- [${i.id}] ${i.title}` +).join('\n') ?? ''; +console.log(lines || 'no issues'); +``` + +Then reference it by name from any repo's workflow: + +```yaml +- id: format + script: triage-fmt + runtime: bun + depends_on: [gather] +``` + +### Python with scientific dependencies + +```yaml +- id: analyze + script: | + import json, sys + import pandas as pd + data = json.loads(sys.argv[1]) if len(sys.argv) > 1 else [] + df = pd.DataFrame(data) + print(df.describe().to_json()) + runtime: uv + deps: ["pandas>=2.0"] + depends_on: [collect] +``` + +## What Does NOT Work + +- **AI-only features** — `hooks`, `mcp`, `skills`, `allowed_tools`, + `denied_tools`, `agents`, `model`, `provider`, `output_format`, `effort`, + `thinking`, `maxBudgetUsd`, `systemPrompt`, `fallbackModel`, `betas`, and + `sandbox` are all ignored at runtime. The loader emits a warning listing + the ignored fields. +- **Interactive prompts** — the script runs headlessly; any `stdin` read will + see EOF immediately. +- **Runtimes other than `bun` and `uv`** — rejected at parse time. +- **Cancelling mid-execution** — script subprocesses are killed on workflow + cancel, but there's no cooperative cancellation signal. Design scripts to + complete quickly or fail fast. + +## See Also + +- [Authoring Workflows](/guides/authoring-workflows/) — full workflow reference +- [Global Workflows, Commands, and Scripts](/guides/global-workflows/) — home-scoped `~/.archon/scripts/` +- [Security Model](/reference/security/#target-repo-env-isolation) — env isolation details +- [Variables Reference](/reference/variables/) — substitution rules diff --git a/packages/docs-web/src/content/docs/guides/skills.md b/packages/docs-web/src/content/docs/guides/skills.md index d27262ffac..f64b6def3d 100644 --- a/packages/docs-web/src/content/docs/guides/skills.md +++ b/packages/docs-web/src/content/docs/guides/skills.md @@ -6,7 +6,7 @@ area: workflows audience: [user] status: current sidebar: - order: 7 + order: 8 --- DAG workflow nodes support a `skills` field that preloads named skills into the diff --git a/packages/docs-web/src/content/docs/reference/architecture.md b/packages/docs-web/src/content/docs/reference/architecture.md index be3dd7639e..1b92153f4f 100644 --- a/packages/docs-web/src/content/docs/reference/architecture.md +++ b/packages/docs-web/src/content/docs/reference/architecture.md @@ -414,7 +414,6 @@ export function registerBuiltinProviders(): void { displayName: 'Your Assistant', factory: () => new YourAssistantProvider(), capabilities: YOUR_ASSISTANT_CAPABILITIES, - isModelCompatible: (model) => /* pattern check */, builtIn: true, }, // ...existing entries diff --git a/packages/docs-web/src/content/docs/reference/security.md b/packages/docs-web/src/content/docs/reference/security.md index 0515c6d5e4..5d4067259f 100644 --- a/packages/docs-web/src/content/docs/reference/security.md +++ b/packages/docs-web/src/content/docs/reference/security.md @@ -128,8 +128,8 @@ The GitHub and Gitea adapters verify webhook signatures to ensure payloads origi Archon prevents target repo `.env` from leaking into subprocesses through structural protection: -1. **Boot cleanup:** `stripCwdEnv()` removes Bun-auto-loaded CWD `.env` keys from `process.env` before any application code runs. -2. **Claude Code subprocess:** `executableArgs: ['--no-env-file']` prevents Bun from auto-loading `.env` in the Claude Code subprocess CWD. +1. **Boot cleanup:** `stripCwdEnv()` removes Bun-auto-loaded CWD `.env` keys from `process.env` before any application code runs. **This is the primary guard** — every subprocess Archon spawns inherits from the already-cleaned `process.env`. +2. **Claude Code subprocess:** when the SDK is configured to spawn a Bun-runnable JS entry point (legacy npm-installed `cli.js`/`cli.mjs`/`cli.cjs`), Archon also passes `executableArgs: ['--no-env-file']` so Bun skips its env autoload inside the spawned process. SDK 0.2.x ships per-platform native binaries instead — those don't auto-load `.env` from cwd, so the flag is unnecessary and is omitted. 3. **Bun script nodes:** `bun --no-env-file` prevents script node subprocesses from loading target repo `.env`. 4. **Bash nodes:** Not affected — bash does not auto-load `.env` files. diff --git a/packages/docs-web/src/content/docs/reference/variables.md b/packages/docs-web/src/content/docs/reference/variables.md index f32779cb6c..ecbc626d6c 100644 --- a/packages/docs-web/src/content/docs/reference/variables.md +++ b/packages/docs-web/src/content/docs/reference/variables.md @@ -8,11 +8,11 @@ sidebar: order: 5 --- -Archon substitutes variables in command files, inline prompts, and bash scripts before execution. There are three categories of variables: workflow variables (substituted by the workflow engine), positional arguments (substituted by the command handler), and node output references (DAG workflows only). +Archon substitutes variables in command files, inline prompts, bash scripts, and `script:` node bodies before execution. There are three categories of variables: workflow variables (substituted by the workflow engine), positional arguments (substituted by the command handler), and node output references (DAG workflows only). ## Workflow Variables -These variables are substituted by the workflow executor in all node types (`command:`, `prompt:`, `bash:`, `loop:`). +These variables are substituted by the workflow executor in all node types (`command:`, `prompt:`, `bash:`, `script:`, `loop:`). | Variable | Resolves to | Notes | |----------|-------------|-------| @@ -27,6 +27,7 @@ These variables are substituted by the workflow executor in all node types (`com | `$ISSUE_CONTEXT` | Same as `$CONTEXT` | Alias | | `$LOOP_USER_INPUT` | User feedback from an interactive loop approval gate | Only populated on the first iteration of a resumed interactive loop. Empty string on all other iterations | | `$REJECTION_REASON` | Reviewer feedback from an approval node rejection | Only available in `on_reject` prompts. Empty string elsewhere | +| `$LOOP_PREV_OUTPUT` | Cleaned output of the previous loop iteration (loop nodes only) | Empty string on the first iteration. Useful for `fresh_context: true` loops that need to reference the prior pass without carrying the full session history | ### Context Variable Behavior @@ -64,6 +65,10 @@ In DAG workflows, nodes can reference the output of any completed upstream node. | `$nodeId.output` | Full output string of the referenced node | The node must be a declared dependency (in `depends_on`) | | `$nodeId.output.field` | A specific JSON field from the node's output | Requires the upstream node to use `output_format` for structured JSON | +### Shell Quoting in `bash:` vs `script:` + +`$nodeId.output` values are **auto shell-quoted** (single-quoted, with embedded `'` escaped) when substituted into `bash:` scripts, so the value is always safe to embed in a shell command. They are **not** shell-quoted when substituted into `script:` bodies — the raw value is embedded as-is. For script nodes, treat substituted values as untrusted input and parse them with language features (e.g. `JSON.parse`), not by interpolating into shell syntax. + ### Example ```yaml @@ -88,7 +93,7 @@ nodes: Variables are substituted in a defined order: -1. **Workflow variables** -- `$WORKFLOW_ID`, `$USER_MESSAGE`, `$ARGUMENTS`, `$ARTIFACTS_DIR`, `$BASE_BRANCH`, `$DOCS_DIR`, `$LOOP_USER_INPUT`, `$REJECTION_REASON` +1. **Workflow variables** -- `$WORKFLOW_ID`, `$USER_MESSAGE`, `$ARGUMENTS`, `$ARTIFACTS_DIR`, `$BASE_BRANCH`, `$DOCS_DIR`, `$LOOP_USER_INPUT`, `$REJECTION_REASON`, `$LOOP_PREV_OUTPUT` 2. **Context variables** -- `$CONTEXT`, `$EXTERNAL_CONTEXT`, `$ISSUE_CONTEXT` 3. **Node output references** -- `$nodeId.output`, `$nodeId.output.field` @@ -107,4 +112,5 @@ Positional arguments (`$1` through `$9`) are substituted separately by the comma | `$CONTEXT` / aliases | Yes | No | No | | `$LOOP_USER_INPUT` | Yes (loop nodes) | No | No | | `$REJECTION_REASON` | Yes (`on_reject` only) | No | No | +| `$LOOP_PREV_OUTPUT` | Yes (loop nodes) | No | No | | `$nodeId.output` | Yes (DAG nodes) | No | Yes | diff --git a/packages/git/src/exec.ts b/packages/git/src/exec.ts index a085ef9375..1d005c5d35 100644 --- a/packages/git/src/exec.ts +++ b/packages/git/src/exec.ts @@ -4,6 +4,29 @@ import { promisify } from 'util'; const promisifiedExecFile = promisify(execFile); +/** + * Resolve the bash binary path in a platform-aware way. + * + * On Windows, CreateProcess searches the System32 directory BEFORE the PATH + * env var. Bare `spawn('bash', ...)` therefore resolves to + * `C:\Windows\System32\bash.exe` (the WSL launcher), whose bash has broken + * `${VAR}` expansion when invoked in `-c` mode and uses `/mnt/c/` path + * convention instead of `/c/`. Both break workflow bash nodes. + * + * Fix: on Windows, default to the Git Bash absolute path. Overridable via + * ARCHON_BASH_PATH for non-standard Git installs (e.g. user-scope installer + * at %LOCALAPPDATA%\Programs\Git\bin\bash.exe). + * + * See: coleam00/Archon#1326 + */ +export function resolveBashPath(): string { + if (process.env.ARCHON_BASH_PATH) return process.env.ARCHON_BASH_PATH; + if (process.platform === 'win32') { + return 'C:\\Program Files\\Git\\bin\\bash.exe'; + } + return 'bash'; +} + /** Wrapper around child_process.execFile for test mockability */ export async function execFileAsync( cmd: string, diff --git a/packages/git/src/index.ts b/packages/git/src/index.ts index 39252ce4d3..537163b5f7 100644 --- a/packages/git/src/index.ts +++ b/packages/git/src/index.ts @@ -11,7 +11,7 @@ export type { export { toRepoPath, toBranchName, toWorktreePath } from './types'; // Process and filesystem wrappers -export { execFileAsync, mkdirAsync } from './exec'; +export { execFileAsync, mkdirAsync, resolveBashPath } from './exec'; // Worktree operations export { diff --git a/packages/providers/package.json b/packages/providers/package.json index b1e523d2ab..61a9ced635 100644 --- a/packages/providers/package.json +++ b/packages/providers/package.json @@ -22,11 +22,11 @@ "type-check": "bun x tsc --noEmit" }, "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.89", + "@anthropic-ai/claude-agent-sdk": "^0.2.121", "@archon/paths": "workspace:*", "@mariozechner/pi-ai": "^0.67.5", "@mariozechner/pi-coding-agent": "^0.67.5", - "@openai/codex-sdk": "^0.116.0", + "@openai/codex-sdk": "^0.125.0", "@sinclair/typebox": "^0.34.41" }, "devDependencies": { diff --git a/packages/providers/src/claude/binary-resolver.test.ts b/packages/providers/src/claude/binary-resolver.test.ts index f87e78f36d..c5c407a531 100644 --- a/packages/providers/src/claude/binary-resolver.test.ts +++ b/packages/providers/src/claude/binary-resolver.test.ts @@ -5,6 +5,8 @@ * with BUNDLED_IS_BINARY=true, which conflicts with other test files. */ import { describe, test, expect, mock, beforeEach, afterAll, spyOn } from 'bun:test'; +import { homedir } from 'node:os'; +import { join } from 'node:path'; import { createMockLogger } from '../test/mocks/logger'; const mockLogger = createMockLogger(); @@ -76,7 +78,55 @@ describe('resolveClaudeBinaryPath (binary mode)', () => { expect(result).toBe('/env/cli.js'); }); - test('throws with install instructions when nothing configured', async () => { + test('autodetects native installer path when env and config are unset', async () => { + // Mirror the implementation: use os.homedir() + node:path.join so the + // expected path matches the platform's actual home dir and separator. + const expected = join( + homedir(), + '.local', + 'bin', + process.platform === 'win32' ? 'claude.exe' : 'claude' + ); + // File exists only at the native-installer path. + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation( + (path: string) => path === expected + ); + + const result = await resolver.resolveClaudeBinaryPath(); + expect(result).toBe(expected); + // Log must mark this as autodetect, not 'env' or 'config' — the source + // string is load-bearing for debug triage. + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: expected, source: 'autodetect' }, + 'claude.binary_resolved' + ); + }); + + test('env var takes precedence over autodetect when both would match', async () => { + process.env.CLAUDE_BIN_PATH = '/custom/env/claude'; + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true); + + const result = await resolver.resolveClaudeBinaryPath(); + expect(result).toBe('/custom/env/claude'); + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: '/custom/env/claude', source: 'env' }, + 'claude.binary_resolved' + ); + }); + + test('config takes precedence over autodetect when both would match', async () => { + fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(true); + + const result = await resolver.resolveClaudeBinaryPath('/custom/config/claude'); + expect(result).toBe('/custom/config/claude'); + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: '/custom/config/claude', source: 'config' }, + 'claude.binary_resolved' + ); + }); + + test('throws with install instructions when nothing is configured and autodetect misses', async () => { + // Every probe returns false — env unset, config unset, native path absent. fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(false); const promise = resolver.resolveClaudeBinaryPath(); diff --git a/packages/providers/src/claude/binary-resolver.ts b/packages/providers/src/claude/binary-resolver.ts index f236acb277..6b918d44a5 100644 --- a/packages/providers/src/claude/binary-resolver.ts +++ b/packages/providers/src/claude/binary-resolver.ts @@ -9,13 +9,16 @@ * Resolution order (binary mode only): * 1. `CLAUDE_BIN_PATH` environment variable * 2. `assistants.claude.claudeBinaryPath` in config - * 3. Throw with install instructions + * 3. Autodetect canonical install path (native installer default) + * 4. Throw with install instructions * * In dev mode (BUNDLED_IS_BINARY=false), returns undefined so the caller * omits `pathToClaudeCodeExecutable` entirely and the SDK resolves via its * normal node_modules lookup. */ import { existsSync as _existsSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { join } from 'node:path'; import { BUNDLED_IS_BINARY, createLogger } from '@archon/paths'; /** Wrapper for existsSync — enables spyOn in tests (direct imports can't be spied on). */ @@ -50,9 +53,12 @@ const INSTALL_INSTRUCTIONS = 'See: https://archon.diy/docs/reference/configuration#claude'; /** - * Resolve the path to the Claude Code SDK's cli.js. + * Resolve the path to the Claude Code executable (native binary in SDK 0.2.x; + * legacy `cli.js` is still accepted for operators pinned to npm-installed + * SDKs that ship a JS entry point). * - * In dev mode: returns undefined (let SDK resolve via node_modules). + * In dev mode: returns undefined (let SDK resolve from its bundled per-platform + * native binary in `@anthropic-ai/claude-agent-sdk-`). * In binary mode: resolves from env/config, or throws with install instructions. */ export async function resolveClaudeBinaryPath( @@ -89,6 +95,25 @@ export async function resolveClaudeBinaryPath( return configClaudeBinaryPath; } - // 3. Not found — throw with install instructions + // 3. Autodetect — the Anthropic native installer + // (`curl -fsSL https://claude.ai/install.sh | bash` on macOS/Linux, + // `irm https://claude.ai/install.ps1 | iex` on Windows) writes the + // executable to a fixed location relative to $HOME. Users who follow + // the recommended install path don't need any env var or config entry; + // users who deviate (npm global, custom path, etc.) still set one of + // the higher-priority sources above. + const nativeInstallerPath = + process.platform === 'win32' + ? join(homedir(), '.local', 'bin', 'claude.exe') + : join(homedir(), '.local', 'bin', 'claude'); + if (fileExists(nativeInstallerPath)) { + getLog().info( + { binaryPath: nativeInstallerPath, source: 'autodetect' }, + 'claude.binary_resolved' + ); + return nativeInstallerPath; + } + + // 4. Not found — throw with install instructions throw new Error(INSTALL_INSTRUCTIONS); } diff --git a/packages/providers/src/claude/provider.test.ts b/packages/providers/src/claude/provider.test.ts index 77880128da..123d687989 100644 --- a/packages/providers/src/claude/provider.test.ts +++ b/packages/providers/src/claude/provider.test.ts @@ -18,18 +18,37 @@ mock.module('@anthropic-ai/claude-agent-sdk', () => ({ import { ClaudeProvider, shouldPassNoEnvFile } from './provider'; import * as claudeModule from './provider'; +import * as binaryResolver from './binary-resolver'; describe('shouldPassNoEnvFile', () => { - test('returns true when cliPath is undefined (dev mode — SDK spawns cli.js via Bun)', () => { - expect(shouldPassNoEnvFile(undefined)).toBe(true); + test('returns false when cliPath is undefined (dev mode — SDK 0.2.x resolves a native binary)', () => { + // Pre-0.2.x the SDK shipped cli.js and dev mode = JS. Since 0.2.x the + // SDK ships per-platform native binaries via optional deps. The flag + // (a Bun runtime option) is meaningless to native binaries and gets + // rejected as `error: unknown option '--no-env-file'`. CWD .env leak + // protection comes from stripCwdEnv() at entry, not from this flag. + expect(shouldPassNoEnvFile(undefined)).toBe(false); }); - test('returns true for an explicit cli.js path (npm-installed, SDK spawns via Bun/Node)', () => { + test('returns true for an explicit cli.js path (legacy npm-installed cli.js, SDK spawns via Bun)', () => { expect( shouldPassNoEnvFile('/usr/local/lib/node_modules/@anthropic-ai/claude-code/cli.js') ).toBe(true); }); + test('returns true for .mjs and .cjs paths (also Bun-runnable JS entry points)', () => { + expect(shouldPassNoEnvFile('/path/to/cli.mjs')).toBe(true); + expect(shouldPassNoEnvFile('/path/to/cli.cjs')).toBe(true); + }); + + test('returns false for non-Bun-runnable JS-adjacent extensions', () => { + // `.ts`/`.tsx`/`.jsx` are deliberately excluded — the SDK never shipped + // those as entry points, so accepting them would only widen misconfiguration. + expect(shouldPassNoEnvFile('/path/to/cli.ts')).toBe(false); + expect(shouldPassNoEnvFile('/path/to/cli.tsx')).toBe(false); + expect(shouldPassNoEnvFile('/path/to/cli.jsx')).toBe(false); + }); + test('returns false for a native binary path (curl installer, SDK execs directly)', () => { expect(shouldPassNoEnvFile('/Users/test/.local/bin/claude')).toBe(false); }); @@ -505,8 +524,10 @@ describe('ClaudeProvider', () => { const callArgs = mockQuery.mock.calls[0][0] as { options: { env: NodeJS.ProcessEnv; executableArgs?: string[] }; }; - // --no-env-file prevents Bun from auto-loading .env in subprocess CWD - expect(callArgs.options.executableArgs).toEqual(['--no-env-file']); + // executableArgs is omitted when cliPath is undefined (dev mode, SDK + // 0.2.x resolves a native binary). CWD .env leak protection comes + // from stripCwdEnv() at entry, not from the --no-env-file flag. + expect(callArgs.options.executableArgs).toBeUndefined(); expect(callArgs.options.env.CUSTOM_USER_KEY).toBe('user-trusted-value'); // Windows uses "Path" casing in spread objects and USERPROFILE instead of HOME const envPath = callArgs.options.env.PATH ?? callArgs.options.env.Path; @@ -521,6 +542,37 @@ describe('ClaudeProvider', () => { else delete process.env.CUSTOM_USER_KEY; }); + test('passes executableArgs: [--no-env-file] when cliPath ends in a Bun-runnable JS extension', async () => { + // Belt-and-suspenders integration check: the dev-mode path is exercised + // in the test above (executableArgs: undefined). This test exercises the + // legacy explicit-cli.js path through the real buildBaseClaudeOptions + // codepath, so a regression in the conditional spread would be caught. + const spy = spyOn(binaryResolver, 'resolveClaudeBinaryPath').mockResolvedValue( + '/usr/local/lib/node_modules/@anthropic-ai/claude-code/cli.js' + ); + + mockQuery.mockImplementation(async function* () { + // empty + }); + + for await (const _ of client.sendQuery('test', '/workspace')) { + // consume + } + + const callArgs = mockQuery.mock.calls[0][0] as { + options: { + executableArgs?: string[]; + pathToClaudeCodeExecutable?: string; + }; + }; + expect(callArgs.options.executableArgs).toEqual(['--no-env-file']); + expect(callArgs.options.pathToClaudeCodeExecutable).toBe( + '/usr/local/lib/node_modules/@anthropic-ai/claude-code/cli.js' + ); + + spy.mockRestore(); + }); + test('classifies exit code errors as crash and retries up to 3 times', async () => { const error = new Error('process exited with code 1'); mockQuery.mockImplementation(async function* () { diff --git a/packages/providers/src/claude/provider.ts b/packages/providers/src/claude/provider.ts index 5cbef54079..1e55c00b93 100644 --- a/packages/providers/src/claude/provider.ts +++ b/packages/providers/src/claude/provider.ts @@ -15,8 +15,12 @@ * Binary resolution: * - In compiled binaries, `pathToClaudeCodeExecutable` is resolved from * `CLAUDE_BIN_PATH` env or `assistants.claude.claudeBinaryPath` config; - * see ./binary-resolver.ts. In dev mode the SDK resolves cli.js itself - * from node_modules. + * see ./binary-resolver.ts. In dev mode the resolver returns undefined + * and the SDK picks its bundled per-platform native binary (Mach-O/ELF/PE + * from `@anthropic-ai/claude-agent-sdk-` optional dep). Pre-0.2.x + * SDKs shipped `cli.js` in the package and dev mode resolved that JS file; + * the SDK switched to native binaries in the 0.2.x series. See + * `shouldPassNoEnvFile` for the implications on the `--no-env-file` flag. */ import { query, @@ -535,31 +539,43 @@ interface ToolResultEntry { toolCallId?: string; } +/** Bun-runnable JS extensions. `.ts`/`.tsx`/`.jsx` are excluded — the SDK has + * never shipped those as entry points, so accepting them would only widen the + * surface for misconfiguration. */ +const BUN_JS_EXTENSIONS = ['.js', '.mjs', '.cjs'] as const; + /** * Decide whether the Claude subprocess should be spawned with `--no-env-file`. * - * `--no-env-file` is a Bun flag that prevents auto-loading `.env` from the - * target repo cwd into the spawned process. It only applies when the SDK - * spawns the executable via Bun/Node — i.e. when the executable is a `.js` - * file (dev mode resolves cli.js, npm-installed resolves cli.js). For a - * native Claude Code binary (curl/PowerShell installer at - * `~/.local/bin/claude`), the SDK execs the binary directly and the flag - * gets passed to the native binary, which rejects unknown options and - * exits code 1. + * `--no-env-file` is a Bun flag (consumed by the Bun runtime, not by Claude + * Code itself) that prevents auto-loading `.env` from the target repo cwd + * into the spawned process. It only does anything when the SDK spawns a + * Bun-runnable JS file via `bun cli.js …` — Bun parses the flag and skips + * its env autoload. For native Claude Code binaries the flag is meaningless + * and, worse, gets handed to the binary which rejects unknown options. + * + * The dev-mode `cliPath === undefined` path used to imply "JS executable" + * because the SDK shipped `cli.js` inside its package. SDK 0.2.x switched + * to per-platform native binaries (e.g. `@anthropic-ai/claude-agent-sdk-darwin-arm64/claude`), + * so dev mode now resolves to a native executable and the historical + * `undefined → true` heuristic is unsafe. Only return `true` when we have + * an explicit Bun-runnable JS path (`.js`/`.mjs`/`.cjs`) — i.e. when the + * operator pointed Archon at a legacy Bun/Node-runnable cli script. + * Otherwise return `false`. * - * Returning `false` for native binaries is verified safe — the native - * binary does not auto-load `.env` from CWD (probed end-to-end with - * sentinel `.env` and `.env.local` in the workflow CWD; both arrived - * UNSET in the spawned bash tool). The first-layer protection — - * `stripCwdEnv()` in `@archon/paths` (#1067) — removes CWD env keys from - * the parent process before spawn, so the subprocess inherits a clean - * env regardless of executable type. + * Safety: target-repo `.env` leaks are prevented by `stripCwdEnv()` in + * `@archon/paths` (#1067), which deletes CWD `.env` keys from + * `process.env` at every Archon entry point before any subprocess is + * spawned. The native Claude binary does not auto-load `.env` from its + * cwd either (verified end-to-end with sentinel keys). `--no-env-file` + * was belt-and-suspenders for the JS-via-Bun case only. * * Exported so the decision can be unit-tested without needing to mock * `BUNDLED_IS_BINARY` or run the full provider sendQuery pathway. */ export function shouldPassNoEnvFile(cliPath: string | undefined): boolean { - return cliPath === undefined || cliPath.endsWith('.js'); + if (cliPath === undefined) return false; + return BUN_JS_EXTENSIONS.some(ext => cliPath.endsWith(ext)); } /** @@ -577,10 +593,7 @@ function buildBaseClaudeOptions( cliPath: string | undefined ): Options { const isJsExecutable = shouldPassNoEnvFile(cliPath); - getLog().debug( - { cliPath: cliPath ?? null, isJsExecutable, passesNoEnvFile: isJsExecutable }, - 'claude.subprocess_env_file_flag' - ); + getLog().debug({ cliPath: cliPath ?? null, isJsExecutable }, 'claude.subprocess_env_file_flag'); return { cwd, diff --git a/packages/providers/src/codex/binary-resolver.test.ts b/packages/providers/src/codex/binary-resolver.test.ts index 1df4e7c6f6..a121e4c204 100644 --- a/packages/providers/src/codex/binary-resolver.test.ts +++ b/packages/providers/src/codex/binary-resolver.test.ts @@ -87,7 +87,70 @@ describe('resolveCodexBinaryPath (binary mode)', () => { expect(normalized).toContain('/tmp/test-archon-home/vendor/codex/'); }); + test('autodetects npm global install at ~/.npm-global/bin/codex (POSIX)', async () => { + if (process.platform === 'win32') return; // POSIX-only probe + const home = process.env.HOME ?? '/Users/test'; + const expected = `${home}/.npm-global/bin/codex`; + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation( + (path: string) => path === expected + ); + + const result = await resolver.resolveCodexBinaryPath(); + expect(result).toBe(expected); + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: expected, source: 'autodetect' }, + 'codex.binary_resolved' + ); + }); + + test('autodetects homebrew install on Apple Silicon', async () => { + if (process.platform !== 'darwin' || process.arch !== 'arm64') { + // `/opt/homebrew/bin/codex` is only probed on darwin-arm64; on other + // hosts this test has nothing to assert (the probe list excludes it). + return; + } + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation( + (path: string) => path === '/opt/homebrew/bin/codex' + ); + + const result = await resolver.resolveCodexBinaryPath(); + expect(result).toBe('/opt/homebrew/bin/codex'); + expect(mockLogger.info).toHaveBeenCalledWith( + { binaryPath: '/opt/homebrew/bin/codex', source: 'autodetect' }, + 'codex.binary_resolved' + ); + }); + + test('autodetects system install at /usr/local/bin/codex', async () => { + if (process.platform === 'win32') { + // /usr/local/bin is not probed on Windows. + return; + } + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation( + (path: string) => path === '/usr/local/bin/codex' + ); + + const result = await resolver.resolveCodexBinaryPath(); + expect(result).toBe('/usr/local/bin/codex'); + }); + + test('vendor directory takes precedence over autodetect', async () => { + // Both vendor and npm-global would match; vendor must win (lower tier #). + fileExistsSpy = spyOn(resolver, 'fileExists').mockImplementation((path: string) => { + const normalized = path.replace(/\\/g, '/'); + return normalized.includes('vendor/codex') || normalized.includes('.npm-global'); + }); + + const result = await resolver.resolveCodexBinaryPath(); + expect(result!.replace(/\\/g, '/')).toContain('/vendor/codex/'); + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ source: 'vendor' }), + 'codex.binary_resolved' + ); + }); + test('throws with install instructions when binary not found anywhere', async () => { + // Env unset, config unset, vendor dir empty, every autodetect path missing. fileExistsSpy = spyOn(resolver, 'fileExists').mockReturnValue(false); await expect(resolver.resolveCodexBinaryPath()).rejects.toThrow('Codex CLI binary not found'); diff --git a/packages/providers/src/codex/binary-resolver.ts b/packages/providers/src/codex/binary-resolver.ts index a1e0f01a5b..1ac8e57cfb 100644 --- a/packages/providers/src/codex/binary-resolver.ts +++ b/packages/providers/src/codex/binary-resolver.ts @@ -9,12 +9,14 @@ * 1. `CODEX_BIN_PATH` environment variable * 2. `assistants.codex.codexBinaryPath` in config * 3. `~/.archon/vendor/codex/` (user-placed) - * 4. Throw with install instructions + * 4. Autodetect canonical install paths (npm prefix defaults per platform) + * 5. Throw with install instructions * * In dev mode (BUNDLED_IS_BINARY=false), returns undefined so the SDK * uses its normal node_modules-based resolution. */ import { existsSync as _existsSync } from 'node:fs'; +import { homedir } from 'node:os'; import { join } from 'node:path'; import { BUNDLED_IS_BINARY, getArchonHome, createLogger } from '@archon/paths'; @@ -89,7 +91,19 @@ export async function resolveCodexBinaryPath( } } - // 4. Not found — throw with install instructions + // 4. Autodetect — probe the handful of paths Codex typically lands at + // when installed via the documented package managers. Users who install + // somewhere else (custom npm prefix, etc.) still set one of the higher- + // priority sources above. Order: most specific → least specific. + const autodetectPaths = getAutodetectPaths(); + for (const probePath of autodetectPaths) { + if (fileExists(probePath)) { + getLog().info({ binaryPath: probePath, source: 'autodetect' }, 'codex.binary_resolved'); + return probePath; + } + } + + // 5. Not found — throw with install instructions const vendorPath = `~/.archon/${CODEX_VENDOR_DIR}/`; throw new Error( 'Codex CLI binary not found. The Codex provider requires a native binary\n' + @@ -105,3 +119,47 @@ export async function resolveCodexBinaryPath( ' codexBinaryPath: /path/to/codex\n' ); } + +/** + * Canonical install locations probed by tier 4 autodetect. Grounded in + * the official @openai/codex README and the npm global-install contract + * (npm writes the binary to `{npm_prefix}/bin/` on POSIX and + * `{npm_prefix}\.cmd` on Windows). The probes cover the npm prefix + * a default install lands at on each platform: + * + * - `$HOME/.npm-global/bin/codex` — common when the user ran + * `npm config set prefix ~/.npm-global` to avoid root writes + * - `/opt/homebrew/bin/codex` — mac Apple Silicon with homebrew-node + * (homebrew sets npm prefix to /opt/homebrew) + * - `/usr/local/bin/codex` — mac Intel with homebrew-node, or linux + * with system-installed node (npm prefix defaults to /usr/local) + * - `%AppData%\npm\codex.cmd` — Windows npm global default + * + * Not covered (explicit override required via CODEX_BIN_PATH or config): + * - users with other custom npm prefixes — `npm root -g` would spawn + * a subprocess per resolve, too heavy for a probe helper + * - Homebrew cask install (`brew install --cask codex`) — cask layout + * isn't a PATH binary; users should symlink or set the path + * - manual GitHub Releases extract — placement is user-determined + */ +function getAutodetectPaths(): string[] { + const paths: string[] = []; + + if (process.platform === 'win32') { + const appData = process.env.APPDATA; + if (appData) paths.push(join(appData, 'npm', 'codex.cmd')); + paths.push(join(homedir(), '.npm-global', 'codex.cmd')); + return paths; + } + + // POSIX (macOS + Linux) + paths.push(join(homedir(), '.npm-global', 'bin', 'codex')); + + if (process.platform === 'darwin' && process.arch === 'arm64') { + paths.push('/opt/homebrew/bin/codex'); + } + + paths.push('/usr/local/bin/codex'); + + return paths; +} diff --git a/packages/providers/src/codex/provider.test.ts b/packages/providers/src/codex/provider.test.ts index 669826ebc3..ffc0dbc119 100644 --- a/packages/providers/src/codex/provider.test.ts +++ b/packages/providers/src/codex/provider.test.ts @@ -870,10 +870,13 @@ describe('CodexProvider', () => { ); }); - test('handles error events', async () => { + test('error events followed by turn.completed yield a clean result (recoverable)', async () => { + // SDK error events that are followed by turn.completed indicate the SDK + // recovered internally. The dropped error message is logged but not + // surfaced \u2014 only one terminal result chunk is yielded. mockRunStreamed.mockResolvedValue({ events: (async function* () { - yield { type: 'error', message: 'Something went wrong' }; + yield { type: 'error', message: 'Transient blip' }; yield { type: 'turn.completed', usage: defaultUsage }; })(), }); @@ -883,14 +886,44 @@ describe('CodexProvider', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'system', content: '\u26A0\uFE0F Something went wrong' }); - expect(mockLogger.error).toHaveBeenCalledWith( - { message: 'Something went wrong' }, - 'stream_error' - ); + expect(chunks).toHaveLength(1); + expect(chunks[0]).toEqual({ + type: 'result', + sessionId: 'new-thread-id', + tokens: { input: 10, output: 5 }, + }); + expect(mockLogger.error).toHaveBeenCalledWith({ message: 'Transient blip' }, 'stream_error'); + }); + + test('error event followed by stream close yields fail-stop result.isError', async () => { + // The SDK sends an error event (e.g. "model not supported") and the + // iterator closes without turn.completed or turn.failed. The provider + // synthesizes a fail-stop result so the dag-executor's msg.isError + // branch catches the failure \u2014 same chunk shape as Claude. + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { type: 'error', message: "'opus[1m]' model is not supported" }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(1); + expect(chunks[0]).toEqual({ + type: 'result', + sessionId: 'new-thread-id', + isError: true, + errorSubtype: 'codex_stream_incomplete', + errors: ["'opus[1m]' model is not supported"], + }); }); - test('suppresses MCP timeout errors', async () => { + test('MCP client errors followed by turn.completed yield clean result', async () => { + // MCP client errors are non-fatal \u2014 Codex retries internally. + // Only after turn.completed do we know the SDK recovered. mockRunStreamed.mockResolvedValue({ events: (async function* () { yield { type: 'error', message: 'MCP client connection timeout' }; @@ -903,22 +936,46 @@ describe('CodexProvider', () => { chunks.push(chunk); } - // Should only have the result, not the MCP error expect(chunks).toHaveLength(1); expect(chunks[0]).toEqual({ type: 'result', sessionId: 'new-thread-id', tokens: { input: 10, output: 5 }, }); - - // Error is still logged even though not sent to user + // Logged but not surfaced as failure expect(mockLogger.error).toHaveBeenCalledWith( { message: 'MCP client connection timeout' }, 'stream_error' ); }); - test('handles turn.failed events', async () => { + test('MCP-only error followed by stream close still fails (no terminal = failure)', async () => { + // The stream-incomplete fail-stop fires whenever the iterator closes + // without a terminal event \u2014 that's an SDK contract violation + // regardless of cause. But the captured error message does NOT carry + // the MCP-client text, since MCP errors are filtered from capture. + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { type: 'error', message: 'MCP client transport closed' }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(1); + expect(chunks[0]).toMatchObject({ + type: 'result', + isError: true, + errorSubtype: 'codex_stream_incomplete', + }); + const errors = (chunks[0] as { errors?: string[] }).errors; + expect(errors?.[0]).not.toContain('MCP client'); + }); + + test('turn.failed yields result.isError with codex_turn_failed subtype', async () => { mockRunStreamed.mockResolvedValue({ events: (async function* () { yield { type: 'turn.failed', error: { message: 'Rate limit exceeded' } }; @@ -930,9 +987,13 @@ describe('CodexProvider', () => { chunks.push(chunk); } + expect(chunks).toHaveLength(1); expect(chunks[0]).toEqual({ - type: 'system', - content: '\u274C Turn failed: Rate limit exceeded', + type: 'result', + sessionId: 'new-thread-id', + isError: true, + errorSubtype: 'codex_turn_failed', + errors: ['Rate limit exceeded'], }); expect(mockLogger.error).toHaveBeenCalledWith( { errorMessage: 'Rate limit exceeded' }, @@ -940,7 +1001,7 @@ describe('CodexProvider', () => { ); }); - test('handles turn.failed without error message', async () => { + test('turn.failed without error message yields fail-stop with Unknown error', async () => { mockRunStreamed.mockResolvedValue({ events: (async function* () { yield { type: 'turn.failed', error: null }; @@ -952,9 +1013,13 @@ describe('CodexProvider', () => { chunks.push(chunk); } + expect(chunks).toHaveLength(1); expect(chunks[0]).toEqual({ - type: 'system', - content: '\u274C Turn failed: Unknown error', + type: 'result', + sessionId: 'new-thread-id', + isError: true, + errorSubtype: 'codex_turn_failed', + errors: ['Unknown error'], }); expect(mockLogger.error).toHaveBeenCalledWith( { errorMessage: 'Unknown error' }, @@ -962,6 +1027,31 @@ describe('CodexProvider', () => { ); }); + test('iterator that closes with zero events yields codex_stream_incomplete with default message', async () => { + // Bare-stream-close fallback: no error event, no terminal event, + // iterator just ends. Locks in the default message used when there is + // no captured non-MCP error to attribute the failure to. + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + // no events + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(1); + expect(chunks[0]).toEqual({ + type: 'result', + sessionId: 'new-thread-id', + isError: true, + errorSubtype: 'codex_stream_incomplete', + errors: ['Codex stream closed without turn.completed or turn.failed'], + }); + }); + test('throws on runStreamed error', async () => { const networkError = new Error('Network failure'); mockRunStreamed.mockRejectedValue(networkError); diff --git a/packages/providers/src/codex/provider.ts b/packages/providers/src/codex/provider.ts index b9e1d493e9..89a0796b94 100644 --- a/packages/providers/src/codex/provider.ts +++ b/packages/providers/src/codex/provider.ts @@ -196,6 +196,13 @@ async function* streamCodexEvents( const state: CodexStreamState = {}; let accumulatedText = ''; + // If the iterator closes without a terminal event (e.g. the model was + // rejected before the turn even started), we synthesize a fail-stop result + // after the loop so the dag-executor's `msg.isError` branch catches it + // — matching Claude's contract. Both terminal branches below `return`, + // so reaching the post-loop block can only mean no terminal fired. + let lastNonMcpError: string | undefined; + for await (const event of events) { if (abortSignal?.aborted) { getLog().info('query_aborted_between_events'); @@ -213,8 +220,14 @@ async function* streamCodexEvents( if (event.type === 'error') { const errorEvent = event as { message: string }; getLog().error({ message: errorEvent.message }, 'stream_error'); + // MCP client errors are non-fatal — Codex retries internally and may + // still reach turn.completed. Other errors are captured; whether they + // are fatal is decided when the stream terminates: turn.completed + // means the SDK recovered, so the captured error is dropped; loop + // closure without a terminal means the captured error caused the + // stream to abort and is surfaced as the failure cause. if (!errorEvent.message.includes('MCP client')) { - yield { type: 'system', content: `⚠️ ${errorEvent.message}` }; + lastNonMcpError = errorEvent.message; } continue; } @@ -223,8 +236,14 @@ async function* streamCodexEvents( const errorObj = (event as { error?: { message?: string } }).error; const errorMessage = errorObj?.message ?? 'Unknown error'; getLog().error({ errorMessage }, 'turn_failed'); - yield { type: 'system', content: `❌ Turn failed: ${errorMessage}` }; - break; + yield { + type: 'result', + sessionId: threadId ?? undefined, + isError: true, + errorSubtype: 'codex_turn_failed', + errors: [errorMessage], + }; + return; } if (event.type === 'item.completed') { @@ -419,9 +438,27 @@ async function* streamCodexEvents( tokens: usage, ...(structuredOutput !== undefined ? { structuredOutput } : {}), }; - break; + return; } } + + // Reaching here means the iterator closed without yielding turn.completed + // or turn.failed (both branches `return` immediately). Common cause: model + // rejected by the API (model not supported, auth refused) before the turn + // started. Surface as a fail-stop. The dag-executor's `msg.isError` branch + // (dag-executor.ts: throws `Node '' failed: SDK returned `) + // turns this into a thrown node failure — distinct from the empty-output + // guard further down, which returns `{ state: 'failed' }` for AI nodes + // that streamed nothing but never raised an isError. + const message = lastNonMcpError ?? 'Codex stream closed without turn.completed or turn.failed'; + getLog().error({ message }, 'stream_incomplete'); + yield { + type: 'result', + sessionId: threadId ?? undefined, + isError: true, + errorSubtype: 'codex_stream_incomplete', + errors: [message], + }; } // ─── Error Classification & Retry ──────────────────────────────────────── diff --git a/packages/providers/src/community/pi/index.ts b/packages/providers/src/community/pi/index.ts index 5f06e9edaa..ce0a286eda 100644 --- a/packages/providers/src/community/pi/index.ts +++ b/packages/providers/src/community/pi/index.ts @@ -1,5 +1,4 @@ export { PI_CAPABILITIES } from './capabilities'; export { parsePiConfig, type PiProviderDefaults } from './config'; -export { isPiModelCompatible, parsePiModelRef, type PiModelRef } from './model-ref'; export { PiProvider } from './provider'; export { registerPiProvider } from './registration'; diff --git a/packages/providers/src/community/pi/model-ref.test.ts b/packages/providers/src/community/pi/model-ref.test.ts index d0001186e2..2bd093973d 100644 --- a/packages/providers/src/community/pi/model-ref.test.ts +++ b/packages/providers/src/community/pi/model-ref.test.ts @@ -1,6 +1,6 @@ import { describe, expect, test } from 'bun:test'; -import { isPiModelCompatible, parsePiModelRef } from './model-ref'; +import { parsePiModelRef } from './model-ref'; describe('parsePiModelRef', () => { test('parses simple provider/model', () => { @@ -48,21 +48,3 @@ describe('parsePiModelRef', () => { expect(parsePiModelRef('')).toBeUndefined(); }); }); - -describe('isPiModelCompatible', () => { - test('accepts valid provider/model refs', () => { - expect(isPiModelCompatible('google/gemini-2.5-pro')).toBe(true); - expect(isPiModelCompatible('anthropic/claude-opus-4-5')).toBe(true); - expect(isPiModelCompatible('openrouter/qwen/qwen3-coder')).toBe(true); - }); - - test('rejects Claude aliases', () => { - expect(isPiModelCompatible('sonnet')).toBe(false); - expect(isPiModelCompatible('opus')).toBe(false); - expect(isPiModelCompatible('haiku')).toBe(false); - }); - - test('rejects claude-prefixed models without provider', () => { - expect(isPiModelCompatible('claude-sonnet-4')).toBe(false); - }); -}); diff --git a/packages/providers/src/community/pi/model-ref.ts b/packages/providers/src/community/pi/model-ref.ts index 2d67c05fec..3b7fbd66fe 100644 --- a/packages/providers/src/community/pi/model-ref.ts +++ b/packages/providers/src/community/pi/model-ref.ts @@ -30,13 +30,3 @@ export function parsePiModelRef(raw: string): PiModelRef | undefined { return { provider, modelId }; } - -/** - * Registry-level `isModelCompatible` check. - * Syntactic only — Pi's actual model catalog is validated at `sendQuery` time - * via `getModel(provider, modelId)`, which is more trustworthy than keeping - * an Archon-side allowlist in sync. - */ -export function isPiModelCompatible(model: string): boolean { - return parsePiModelRef(model) !== undefined; -} diff --git a/packages/providers/src/community/pi/provider.test.ts b/packages/providers/src/community/pi/provider.test.ts index 17e6de417d..4de4314147 100644 --- a/packages/providers/src/community/pi/provider.test.ts +++ b/packages/providers/src/community/pi/provider.test.ts @@ -81,7 +81,14 @@ const mockAuthCreate = mock(() => ({ setRuntimeApiKey: mockSetRuntimeApiKey, getApiKey: mockGetApiKey, })); -const mockModelRegistryInMemory = mock(() => ({})); + +const mockModelRegistryFind = mock((provider: string, modelId: string) => { + if (provider === 'nonexistent') return undefined; + return { id: modelId, provider, name: `${provider}/${modelId}` }; +}); +const mockModelRegistryCreate = mock(() => ({ + find: mockModelRegistryFind, +})); // SessionManager mocks. Each returns a tagged session-manager stub so tests // can assert whether resume resolved to an existing session or fell through @@ -115,7 +122,7 @@ const mockCreateLsTool = mock((_cwd: string) => ({ __piTool: 'ls' })); mock.module('@mariozechner/pi-coding-agent', () => ({ createAgentSession: mockCreateAgentSession, AuthStorage: { create: mockAuthCreate }, - ModelRegistry: { inMemory: mockModelRegistryInMemory }, + ModelRegistry: { create: mockModelRegistryCreate }, SessionManager: { create: mockSessionCreate, open: mockSessionOpen, @@ -132,16 +139,6 @@ mock.module('@mariozechner/pi-coding-agent', () => ({ createLsTool: mockCreateLsTool, })); -// getModel is imported from pi-ai. Return a fake model for known refs and -// undefined for unknown refs so the provider's not-found branch is testable. -const mockGetModel = mock((provider: string, modelId: string) => { - if (provider === 'nonexistent') return undefined; - return { id: modelId, provider, name: `${provider}/${modelId}` }; -}); -mock.module('@mariozechner/pi-ai', () => ({ - getModel: mockGetModel, -})); - // Import AFTER mocks are set — module resolution freezes the mocks. import { PiProvider } from './provider'; import { PI_CAPABILITIES } from './capabilities'; @@ -169,6 +166,12 @@ function resetScript(events: FakeEvent[]): void { describe('PiProvider', () => { beforeEach(() => { + mockLogger.fatal.mockClear(); + mockLogger.error.mockClear(); + mockLogger.warn.mockClear(); + mockLogger.info.mockClear(); + mockLogger.debug.mockClear(); + mockLogger.trace.mockClear(); mockPrompt.mockClear(); mockAbort.mockClear(); mockDispose.mockClear(); @@ -177,8 +180,9 @@ describe('PiProvider', () => { mockSetFlagValue.mockClear(); mockResourceLoaderReload.mockClear(); mockCreateAgentSession.mockClear(); - mockGetModel.mockClear(); mockAuthCreate.mockClear(); + mockModelRegistryCreate.mockClear(); + mockModelRegistryFind.mockClear(); mockSetRuntimeApiKey.mockClear(); mockGetApiKey.mockClear(); MockDefaultResourceLoader.mockClear(); @@ -209,6 +213,21 @@ describe('PiProvider', () => { expect(new PiProvider().getCapabilities()).toEqual(PI_CAPABILITIES); }); + test('sendQuery installs PI_PACKAGE_DIR shim before Pi SDK loads', async () => { + // Runtime-safety regression: Pi's config.js reads `getPackageJsonPath()` at + // its module init, which resolves to a non-existent path inside compiled + // archon binaries. The shim writes a stub package.json to tmpdir and sets + // PI_PACKAGE_DIR so Pi's short-circuit kicks in. Must run BEFORE the + // dynamic imports in sendQuery — we verify by calling the fast-fail "no + // model" path (which returns before any Pi SDK logic executes) and + // asserting the env var was set regardless. + delete process.env.PI_PACKAGE_DIR; + expect(process.env.PI_PACKAGE_DIR).toBeUndefined(); + await consume(new PiProvider().sendQuery('hi', '/tmp')); + expect(process.env.PI_PACKAGE_DIR).toBeDefined(); + expect(process.env.PI_PACKAGE_DIR).toContain('archon-pi-shim'); + }); + test('throws when no model is configured', async () => { const { error } = await consume(new PiProvider().sendQuery('hi', '/tmp')); expect(error?.message).toContain('Pi provider requires a model'); @@ -221,15 +240,102 @@ describe('PiProvider', () => { expect(error?.message).toContain('Invalid Pi model ref'); }); - test('throws when Pi provider id is unknown AND no creds available', async () => { - // No env var, no auth.json entry → fail-fast with hint about env-var table + test('logs credential hint when Pi provider id is unknown AND no creds available', async () => { + // No env var, no auth.json entry → log hint, but continue, to support custom providers that don't use credentials or that use non-Pi means of providing credentials. + resetScript(scriptedAgentEnd()); const { error } = await consume( new PiProvider().sendQuery('hi', '/tmp', undefined, { model: 'unknownprovider/some-model', }) ); - expect(error?.message).toContain("no credentials for provider 'unknownprovider'"); - expect(error?.message).toContain("not in the Archon adapter's env-var table"); + + expect(error).toBeUndefined(); + expect(mockLogger.info).toHaveBeenCalledWith( + { + piProvider: 'unknownprovider', + envHint: expect.stringContaining("not in the Archon adapter's env-var table"), + loginHint: expect.stringContaining('/login'), + }, + 'pi.auth_missing' + ); + expect(mockCreateAgentSession).toHaveBeenCalledTimes(1); + }); + + test('ModelRegistry.create receives the AuthStorage instance', async () => { + // Headline-fix wiring: ModelRegistry.create must receive the same + // AuthStorage instance returned by AuthStorage.create(), so registry + // lookups can resolve user-configured custom models from + // ~/.pi/agent/models.json (LM Studio, ollama, llamacpp, etc.). Without + // this wiring the registry only sees the static built-in catalog. + process.env.GEMINI_API_KEY = 'sk-test'; + resetScript(scriptedAgentEnd()); + + await consume( + new PiProvider().sendQuery('hi', '/tmp', undefined, { + model: 'google/gemini-2.5-pro', + }) + ); + + expect(mockAuthCreate).toHaveBeenCalledTimes(1); + expect(mockModelRegistryCreate).toHaveBeenCalledTimes(1); + const authInstance = mockAuthCreate.mock.results[0]?.value; + expect(mockModelRegistryCreate).toHaveBeenCalledWith(authInstance); + }); + + test('AuthStorage.create() throwing surfaces a contextualized error', async () => { + // Both AuthStorage.create() and ModelRegistry.create() read from disk + // and can throw on malformed JSON or filesystem errors. Wrap with + // try/catch and surface a Pi-framed error so operators see the cause + // rather than a raw SDK stack trace. + mockAuthCreate.mockImplementationOnce(() => { + throw new Error('Unexpected token } in JSON at position 42'); + }); + + const { error } = await consume( + new PiProvider().sendQuery('hi', '/tmp', undefined, { + model: 'google/gemini-2.5-pro', + }) + ); + + expect(error).toBeDefined(); + expect(error?.message).toContain('Pi auth storage init failed'); + expect(error?.message).toContain('Unexpected token'); + expect(error?.message).toContain('~/.pi/agent/auth.json'); + expect(mockLogger.error).toHaveBeenCalledWith( + expect.objectContaining({ piProvider: 'google' }), + 'pi.auth_storage_init_failed' + ); + }); + + test('Pi model not found includes models.json load error when registry reports one', async () => { + // ModelRegistry swallows models.json parse/validation errors into an + // internal loadError. When find() returns undefined we surface that + // error in both the structured log and the throw message so users + // debugging a custom-provider config see the actual reason. + process.env.GEMINI_API_KEY = 'sk-test'; + mockModelRegistryFind.mockImplementationOnce(() => undefined); + mockModelRegistryCreate.mockImplementationOnce(() => ({ + find: mockModelRegistryFind, + getError: () => 'Provider lm-studio: "baseUrl" is required when defining custom models.', + })); + + const { error } = await consume( + new PiProvider().sendQuery('hi', '/tmp', undefined, { + model: 'lm-studio/some-model', + }) + ); + + expect(error?.message).toContain('Pi model not found'); + expect(error?.message).toContain('models.json failed to load'); + expect(error?.message).toContain('"baseUrl" is required'); + expect(mockLogger.error).toHaveBeenCalledWith( + expect.objectContaining({ + piProvider: 'lm-studio', + modelId: 'some-model', + loadError: expect.stringContaining('"baseUrl" is required'), + }), + 'pi.model_not_found' + ); }); test('throws when env var missing AND auth.json has no entry', async () => { @@ -280,13 +386,13 @@ describe('PiProvider', () => { expect(mockGetApiKey).toHaveBeenCalledWith('anthropic'); }); - test('throws when getModel returns undefined', async () => { + test('throws when ModelRegistry.find returns undefined', async () => { process.env.GEMINI_API_KEY = 'sk-test'; - // 'nonexistent' is handled in mockGetModel to return undefined, but - // the adapter rejects unknown providers before getModel. To exercise + // 'nonexistent' is handled in mockModelRegistryFind to return undefined, but + // the adapter rejects unknown providers. To exercise // the not-found branch, use a known provider but unknown modelId by - // temporarily swapping mockGetModel to always return undefined. - mockGetModel.mockImplementationOnce(() => undefined); + // temporarily swapping mockModelRegistryFind to always return undefined. + mockModelRegistryFind.mockImplementationOnce(() => undefined); const { error } = await consume( new PiProvider().sendQuery('hi', '/tmp', undefined, { model: 'google/unknown-model-id', diff --git a/packages/providers/src/community/pi/provider.ts b/packages/providers/src/community/pi/provider.ts index e4b6804762..5a14ed6166 100644 --- a/packages/providers/src/community/pi/provider.ts +++ b/packages/providers/src/community/pi/provider.ts @@ -1,5 +1,8 @@ +import { existsSync, mkdirSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + import { createLogger } from '@archon/paths'; -import type { Api, Model } from '@mariozechner/pi-ai'; import type { IAgentProvider, @@ -24,6 +27,44 @@ import { parsePiModelRef } from './model-ref'; // All Pi SDK value bindings and Pi-dependent helper modules are dynamically // imported inside `sendQuery()` below, which runs only when a Pi workflow is // actually invoked. Type-only imports above are fine — TS erases them. +// +// Lazy-loading defers the crash from boot-time to sendQuery-time — but the +// crash still happens when Pi is actually used. `ensurePiPackageDirShim()` +// (see below) fixes the *runtime* half: before any dynamic Pi import in +// sendQuery, write a stub package.json to tmpdir and point Pi at it via +// its own documented `PI_PACKAGE_DIR` escape hatch. + +/** + * Write a minimal package.json to a stable tmpdir and set `PI_PACKAGE_DIR` + * so Pi's `config.js` short-circuits its `dirname(process.execPath)` walk + * (which fails inside a compiled archon binary). Pi only reads three + * optional fields from that package.json — `piConfig.name`, `piConfig.configDir`, + * and `version` — so the stub is genuinely minimal. Idempotent: the file is + * only written once per host (existsSync check), and the env var is set on + * every call so multiple PiProvider instances stay consistent. + * + * Done on each sendQuery rather than at module load so (a) the file write + * is paid only when Pi is actually used, and (b) the env var can't get + * clobbered between registration and invocation. + */ +function ensurePiPackageDirShim(): void { + const shimDir = join(tmpdir(), 'archon-pi-shim'); + const shimPkgJson = join(shimDir, 'package.json'); + if (!existsSync(shimPkgJson)) { + mkdirSync(shimDir, { recursive: true }); + // `piConfig: {}` is explicit so Pi's defaults (`name: 'pi'`, + // `configDir: '.pi'`) kick in — matches Pi's standalone behavior. + writeFileSync( + shimPkgJson, + JSON.stringify({ + name: 'archon-pi-shim', + version: '0.0.0', + piConfig: {}, + }) + ); + } + process.env.PI_PACKAGE_DIR = shimDir; +} /** * Map Pi provider id → env var name used by pi-ai's getEnvApiKey(). @@ -53,24 +94,6 @@ function getLog(): ReturnType { return cachedLog; } -/** - * Typed wrapper around Pi's `getModel` for a runtime-string provider/model - * pair. Pi's getModel signature constrains `TModelId` to - * `keyof MODELS[TProvider]`, which isn't knowable from a runtime string — - * the local `GetModelFn` alias is the narrowest shape that still lets us - * bypass that constraint. Isolating the escape hatch behind one searchable - * name keeps it auditable. Takes `getModel` as a parameter because the Pi - * SDK is loaded dynamically (see the header comment on this file for why). - */ -type GetModelFn = (provider: string, modelId: string) => Model | undefined; -function lookupPiModel( - getModel: GetModelFn, - provider: string, - modelId: string -): Model | undefined { - return getModel(provider, modelId); -} - /** * Append a "respond with JSON matching this schema" instruction to the user * prompt so Pi-backed models produce parseable structured output. Pi's SDK @@ -98,15 +121,7 @@ ${JSON.stringify(schema, null, 2)}`; /** * Pi community provider — wraps `@mariozechner/pi-coding-agent`'s full * coding-agent harness. Each `sendQuery()` call creates a fresh session - * (no reuse) with in-memory auth/session/settings, so the server never - * touches `~/.pi/` and concurrent calls don't collide. - * - * Capabilities (see `capabilities.ts` for the canonical list): Pi declares - * `sessionResume`, `skills`, `toolRestrictions`, `structuredOutput`, - * `envInjection`, `effortControl`, and `thinkingControl`. Features Pi does - * not currently support through Archon (`mcp`, `hooks`, `agents`, - * `costControl`, `fallbackModel`, `sandbox`) stay off; the dag-executor - * surfaces a warning for any unsupported nodeConfig field. + * (no reuse) so concurrent calls don't collide. */ export class PiProvider implements IAgentProvider { async *sendQuery( @@ -115,6 +130,13 @@ export class PiProvider implements IAgentProvider { resumeSessionId?: string, requestOptions?: SendQueryOptions ): AsyncGenerator { + // Install the PI_PACKAGE_DIR shim BEFORE the dynamic imports below: Pi's + // config.js runs `readFileSync(getPackageJsonPath())` at its own module + // init, and getPackageJsonPath() checks process.env.PI_PACKAGE_DIR first. + // Without this, the dynamic import below would crash with ENOENT on + // `dirname(process.execPath)/package.json` inside a compiled binary. + ensurePiPackageDirShim(); + // Lazy-load Pi SDK and all Pi-dependent helper modules here. Must not move // these imports to module scope — see the header comment for the failure // mode (archon compiled binary crashes at startup when Pi's config.js @@ -125,7 +147,6 @@ export class PiProvider implements IAgentProvider { // destructured PascalCase bindings trip eslint's naming-convention rule. const [ piCodingAgent, - piAi, { bridgeSession }, { resolvePiSkills, resolvePiThinkingLevel, resolvePiTools }, { createNoopResourceLoader }, @@ -133,7 +154,6 @@ export class PiProvider implements IAgentProvider { { createArchonUIBridge, createArchonUIContext }, ] = await Promise.all([ import('@mariozechner/pi-coding-agent'), - import('@mariozechner/pi-ai'), import('./event-bridge'), import('./options-translator'), import('./resource-loader'), @@ -178,39 +198,74 @@ export class PiProvider implements IAgentProvider { ); } - // 2. Look up the Model via Pi's static catalog. `lookupPiModel` returns - // undefined when not found; we guard explicitly below. - // Cast to the runtime-string-friendly shape — see `lookupPiModel`'s docblock. - const model = lookupPiModel(piAi.getModel as GetModelFn, parsed.provider, parsed.modelId); + // 2. Build AuthStorage + ModelRegistry. Both `create()` calls read from + // disk: AuthStorage reads ~/.pi/agent/auth.json (or + // $PI_CODING_AGENT_DIR/auth.json), and ModelRegistry reads + // ~/.pi/agent/models.json — the user's per-host config including + // custom models for local providers (LM Studio, ollama, llamacpp, + // custom OpenAI-compatible endpoints). Reads are synchronous and + // happen on every sendQuery; we don't cache because the user can + // edit either file between calls and expects pickup without restart + // (Pi's `/login` flow rewrites auth.json under a file lock). + // ModelRegistry captures any models.json load/parse error in its + // internal loadError rather than throwing — surfaced below if the + // requested model is then not found. + let authStorage: ReturnType; + let modelRegistry: ReturnType; + try { + authStorage = piCodingAgent.AuthStorage.create(); + modelRegistry = piCodingAgent.ModelRegistry.create(authStorage); + } catch (err) { + const e = err as Error; + getLog().error({ err: e, piProvider: parsed.provider }, 'pi.auth_storage_init_failed'); + throw new Error( + `Pi auth storage init failed: ${e.message}. Check that ~/.pi/agent/auth.json ` + + '(or $PI_CODING_AGENT_DIR/auth.json) is valid JSON and readable.' + ); + } + + // 3. Look up the model. find() returns undefined when not found; if + // models.json itself failed to load (e.g. a custom provider entry + // missing baseUrl/apiKey), surface the load error so users debugging + // custom-provider configs see the actual reason. + const model = modelRegistry.find(parsed.provider, parsed.modelId); if (!model) { + const loadError = modelRegistry.getError?.(); + const loadErrorHint = loadError + ? ` ~/.pi/agent/models.json failed to load: ${loadError}` + : ''; + getLog().error( + { + piProvider: parsed.provider, + modelId: parsed.modelId, + loadError: loadError ?? null, + }, + 'pi.model_not_found' + ); throw new Error( - `Pi model not found: provider='${parsed.provider}' model='${parsed.modelId}'. ` + + `Pi model not found: provider='${parsed.provider}' model='${parsed.modelId}'.${loadErrorHint} ` + 'See https://github.com/badlogic/pi-mono/blob/main/packages/ai/src/models.generated.ts for the Pi model catalog.' ); } - // 3. Build AuthStorage. `AuthStorage.create()` reads ~/.pi/agent/auth.json - // (or $PI_CODING_AGENT_DIR/auth.json), so any credential the user has - // populated via `pi` → `/login` (OAuth subscriptions: Claude Pro/Max, - // ChatGPT Plus, GitHub Copilot, Gemini CLI, Antigravity) or by editing - // the file directly (api_key entries) is picked up transparently. - // - // Per-request env vars override the file via setRuntimeApiKey — this - // mirrors Claude's process-env + request-env merge pattern and - // ensures codebase-scoped env vars (from .archon/config.yaml `env:`) - // win over the user's global Pi login. + // 4. Resolve credentials. authStorage already loaded ~/.pi/agent/auth.json + // so any creds populated via `pi` → `/login` (OAuth subscriptions: + // Claude Pro/Max, ChatGPT Plus, GitHub Copilot, Gemini CLI, + // Antigravity) or by hand-edited api_key entries are picked up + // transparently. Per-request env vars override via setRuntimeApiKey — + // mirrors Claude's process-env + request-env merge so codebase-scoped + // env vars (.archon/config.yaml `env:`) win over the user's global + // Pi login. // // Pi's internal resolution order: // 1. runtime override (our setRuntimeApiKey below) // 2. auth.json api_key entry // 3. auth.json oauth entry (auto-refreshes expired tokens) - // 4. env var fallback (Pi's getEnvApiKey, e.g. ANTHROPIC_API_KEY) + // 4. env var fallback (Pi's getEnvApiKey, e.g. ANTHROPIC_API_KEY) // // OAuth refresh note: Pi refreshes expired access tokens against the // provider's OAuth server and rewrites ~/.pi/agent/auth.json under a // file lock (same mechanism pi CLI uses — safe for concurrent access). - const authStorage = piCodingAgent.AuthStorage.create(); - const envVarName = PI_PROVIDER_ENV_VARS[parsed.provider]; const envOverride = envVarName ? (requestOptions?.env?.[envVarName] ?? process.env[envVarName]) @@ -219,16 +274,28 @@ export class PiProvider implements IAgentProvider { authStorage.setRuntimeApiKey(parsed.provider, envOverride); } - // Fail-fast: resolve creds synchronously before spinning up a session. - // Matches Claude's auth-error fast-fail pattern (no retry on auth failures). const resolvedKey = await authStorage.getApiKey(parsed.provider); if (!resolvedKey) { - const envHint = envVarName - ? `Set ${envVarName} in the environment or codebase env vars (.archon/config.yaml env: section).` - : `Provider '${parsed.provider}' is not in the Archon adapter's env-var table — file an issue if you want a shortcut env var for it.`; - const loginHint = `Or run \`pi\` and type \`/login\` locally to authenticate '${parsed.provider}' via OAuth; credentials land in ~/.pi/agent/auth.json and are picked up automatically.`; - throw new Error( - `Pi auth: no credentials for provider '${parsed.provider}'. ${envHint} ${loginHint}` + if (envVarName) { + const envHint = `Set ${envVarName} in the environment or codebase env vars (.archon/config.yaml env: section).`; + const loginHint = `Or run \`pi\` and type \`/login\` locally to authenticate '${parsed.provider}' via OAuth; credentials land in ~/.pi/agent/auth.json and are picked up automatically.`; + throw new Error( + `Pi auth: no credentials for provider '${parsed.provider}'. ${envHint} ${loginHint}` + ); + } + + // Unmapped providers (LM Studio, ollama, llamacpp, custom + // OpenAI-compatible endpoints) often don't need credentials at all — + // log + continue rather than failing fast so local models work without + // ceremony. If the SDK call later fails for a provider that *does* + // need creds, the auth_missing breadcrumb is searchable in the log. + getLog().info( + { + piProvider: parsed.provider, + envHint: `Provider '${parsed.provider}' is not in the Archon adapter's env-var table — file an issue if you want a shortcut env var for it.`, + loginHint: `Or run \`pi\` and type \`/login\` locally to authenticate '${parsed.provider}' via OAuth; credentials land in ~/.pi/agent/auth.json and are picked up automatically.`, + }, + 'pi.auth_missing' ); } @@ -294,13 +361,11 @@ export class PiProvider implements IAgentProvider { }; } - // ModelRegistry + settings stay in-memory — only sessions persist, to - // match Claude/Codex. Resource loader still suppresses filesystem - // discovery by default, except for explicitly-passed skill paths and — - // when piConfig.enableExtensions is true — Pi's community extension - // ecosystem (tools + lifecycle hooks from ~/.pi/agent/extensions/ and - // packages installed via `pi install npm:`). - const modelRegistry = piCodingAgent.ModelRegistry.inMemory(authStorage); + // Settings stay in-memory — only sessions persist, to match Claude/Codex. + // Resource loader still suppresses filesystem except for explicitly-passed + // skill paths and — when piConfig.enableExtensions is true — Pi's community + // extension ecosystem (tools + lifecycle hooks from ~/.pi/agent/extensions/ + // and packages installed via `pi install npm:`). const settingsManager = piCodingAgent.SettingsManager.inMemory(); // Default ON: extensions (community packages like @plannotator/pi-extension // or your own local ones) are a core reason users run Pi. Opt out with diff --git a/packages/providers/src/community/pi/registration.ts b/packages/providers/src/community/pi/registration.ts index 01c9e5ea0f..dd8447fd5d 100644 --- a/packages/providers/src/community/pi/registration.ts +++ b/packages/providers/src/community/pi/registration.ts @@ -1,7 +1,6 @@ import { isRegisteredProvider, registerProvider } from '../../registry'; import { PI_CAPABILITIES } from './capabilities'; -import { isPiModelCompatible } from './model-ref'; import { PiProvider } from './provider'; /** @@ -20,7 +19,6 @@ export function registerPiProvider(): void { displayName: 'Pi (community)', factory: () => new PiProvider(), capabilities: PI_CAPABILITIES, - isModelCompatible: isPiModelCompatible, builtIn: false, }); } diff --git a/packages/providers/src/registry.test.ts b/packages/providers/src/registry.test.ts index 64b879a91c..ee3e04ee04 100644 --- a/packages/providers/src/registry.test.ts +++ b/packages/providers/src/registry.test.ts @@ -49,7 +49,6 @@ function makeMockRegistration( displayName: `Mock ${id}`, factory: () => makeMockProvider(id), capabilities: makeMockProvider(id).getCapabilities(), - isModelCompatible: () => true, builtIn: false, ...overrides, }; @@ -183,7 +182,6 @@ describe('registry', () => { expect(reg.displayName).toBe('Claude (Anthropic)'); expect(reg.builtIn).toBe(true); expect(typeof reg.factory).toBe('function'); - expect(typeof reg.isModelCompatible).toBe('function'); }); test('throws for unknown provider', () => { @@ -251,27 +249,6 @@ describe('registry', () => { }); }); - describe('built-in model compatibility', () => { - test('Claude registration matches Claude model patterns', () => { - const reg = getRegistration('claude'); - expect(reg.isModelCompatible('sonnet')).toBe(true); - expect(reg.isModelCompatible('opus')).toBe(true); - expect(reg.isModelCompatible('haiku')).toBe(true); - expect(reg.isModelCompatible('inherit')).toBe(true); - expect(reg.isModelCompatible('claude-3.5-sonnet')).toBe(true); - expect(reg.isModelCompatible('gpt-4')).toBe(false); - }); - - test('Codex registration rejects Claude model patterns', () => { - const reg = getRegistration('codex'); - expect(reg.isModelCompatible('sonnet')).toBe(false); - expect(reg.isModelCompatible('claude-3.5-sonnet')).toBe(false); - expect(reg.isModelCompatible('inherit')).toBe(false); - expect(reg.isModelCompatible('gpt-4')).toBe(true); - expect(reg.isModelCompatible('o3-mini')).toBe(true); - }); - }); - describe('registerCommunityProviders (aggregator)', () => { test('registers all bundled community providers', () => { registerCommunityProviders(); @@ -325,17 +302,6 @@ describe('registry', () => { expect(caps.sandbox).toBe(false); }); - test('isModelCompatible accepts provider/model refs, rejects aliases', () => { - registerPiProvider(); - const reg = getRegistration('pi'); - expect(reg.isModelCompatible('google/gemini-2.5-pro')).toBe(true); - expect(reg.isModelCompatible('anthropic/claude-opus-4-5')).toBe(true); - expect(reg.isModelCompatible('openrouter/qwen/qwen3-coder')).toBe(true); - expect(reg.isModelCompatible('sonnet')).toBe(false); - expect(reg.isModelCompatible('claude-3.5-sonnet')).toBe(false); - expect(reg.isModelCompatible('')).toBe(false); - }); - test('appears in getProviderInfoList with builtIn: false', () => { registerPiProvider(); const info = getProviderInfoList().find(p => p.id === 'pi'); diff --git a/packages/providers/src/registry.ts b/packages/providers/src/registry.ts index 1ae16759dc..7006ab4961 100644 --- a/packages/providers/src/registry.ts +++ b/packages/providers/src/registry.ts @@ -83,7 +83,7 @@ export function getRegisteredProviders(): ProviderRegistration[] { } /** - * Get API-safe provider info (excludes factory and isModelCompatible). + * Get API-safe provider info (excludes the factory). */ export function getProviderInfoList(): ProviderInfo[] { return getRegisteredProviders().map(({ id, displayName, capabilities, builtIn }) => ({ @@ -112,10 +112,6 @@ export function registerBuiltinProviders(): void { displayName: 'Claude (Anthropic)', factory: () => new ClaudeProvider(), capabilities: CLAUDE_CAPABILITIES, - isModelCompatible: (model: string): boolean => { - const aliases = ['sonnet', 'opus', 'haiku']; - return aliases.includes(model) || model.startsWith('claude-') || model === 'inherit'; - }, builtIn: true, }, { @@ -123,12 +119,6 @@ export function registerBuiltinProviders(): void { displayName: 'Codex (OpenAI)', factory: () => new CodexProvider(), capabilities: CODEX_CAPABILITIES, - isModelCompatible: (model: string): boolean => { - const claudeAliases = ['sonnet', 'opus', 'haiku']; - return ( - !claudeAliases.includes(model) && !model.startsWith('claude-') && model !== 'inherit' - ); - }, builtIn: true, }, ]; diff --git a/packages/providers/src/types.ts b/packages/providers/src/types.ts index d6cb8b4a87..fe47eff6c4 100644 --- a/packages/providers/src/types.ts +++ b/packages/providers/src/types.ts @@ -265,13 +265,6 @@ export interface ProviderRegistration { /** Static capability declaration — used for dag-executor warnings */ capabilities: ProviderCapabilities; - /** - * Model compatibility check. Returns true if the model string - * is valid for this provider. Used by workflow validation and - * provider inference from model names. - */ - isModelCompatible: (model: string) => boolean; - /** Whether this is a built-in (maintained by core team) or community provider */ builtIn: boolean; } diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index c1c76cf549..ee14cfef5b 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -385,8 +385,24 @@ export async function startServer(opts: ServerOptions = {}): Promise { .catch(createMessageErrorHandler('Discord', discordAdapter, conversationId)); }); - await discord.start(); - activePlatforms.push('Discord'); + // Don't let a Discord login failure (bad token, missing privileged + // intents, etc.) bring down the whole server — users running + // `archon serve` for the web UI shouldn't lose it because of an + // unrelated bot misconfiguration. See #1365. + try { + await discord.start(); + activePlatforms.push('Discord'); + } catch (error) { + const err = error as Error; + const isPrivilegedIntentError = err.message?.includes('disallowed intents'); + const hint = isPrivilegedIntentError + ? 'Enable "Message Content Intent" in the Discord Developer Portal ' + + '(your application > Bot > Privileged Gateway Intents) and restart, ' + + 'or unset DISCORD_BOT_TOKEN if you do not want the Discord adapter.' + : 'Verify DISCORD_BOT_TOKEN is valid, or unset it to disable the Discord adapter.'; + getLog().error({ err, hint }, 'discord.start_failed_continuing_without_adapter'); + discord = null; + } } else { getLog().info('discord_adapter_skipped'); } diff --git a/packages/web/src/components/workflows/WorkflowCard.tsx b/packages/web/src/components/workflows/WorkflowCard.tsx index 10ed0cd23e..b2a6fc8218 100644 --- a/packages/web/src/components/workflows/WorkflowCard.tsx +++ b/packages/web/src/components/workflows/WorkflowCard.tsx @@ -55,7 +55,7 @@ export function WorkflowCard({ const parsed = parseWorkflowDescription(workflow.description ?? ''); const displayName = getWorkflowDisplayName(workflow.name); const category = getWorkflowCategory(workflow.name, workflow.description ?? ''); - const tags = getWorkflowTags(workflow.name, parsed); + const tags = getWorkflowTags(workflow.name, parsed, workflow.tags); const iconName = getWorkflowIconName(workflow.name, category); const CARD_ICON = ICON_MAP[iconName]; diff --git a/packages/web/src/lib/api.generated.d.ts b/packages/web/src/lib/api.generated.d.ts index 68b4d0a02f..2abcd56361 100644 --- a/packages/web/src/lib/api.generated.d.ts +++ b/packages/web/src/lib/api.generated.d.ts @@ -2345,6 +2345,10 @@ export interface components { args?: string[]; }; }; + worktree?: { + enabled?: boolean; + }; + tags?: string[]; nodes: components['schemas']['DagNode'][]; }; /** @enum {string} */ @@ -2561,6 +2565,7 @@ export interface components { runningWorkflows: number; version?: string; is_docker: boolean; + activePlatforms?: string[]; }; UpdateCheckResponse: { updateAvailable: boolean; diff --git a/packages/web/src/lib/workflow-metadata.test.ts b/packages/web/src/lib/workflow-metadata.test.ts index 18af743267..87fd8bb2c9 100644 --- a/packages/web/src/lib/workflow-metadata.test.ts +++ b/packages/web/src/lib/workflow-metadata.test.ts @@ -200,6 +200,31 @@ describe('getWorkflowTags', () => { const githubCount = tags.filter(t => t === 'GitHub').length; expect(githubCount).toBeLessThanOrEqual(1); }); + + test('uses explicit tags when provided', () => { + const parsed = parseWorkflowDescription('A GitLab workflow'); + const tags = getWorkflowTags('review-gitlab-mr', parsed, ['GitLab', 'Review']); + expect(tags).toEqual(['GitLab', 'Review']); + }); + + test('falls back to inference when no explicit tags', () => { + const parsed = parseWorkflowDescription('Does: review PR on GitHub'); + const tags = getWorkflowTags('archon-pr-review', parsed, undefined); + expect(tags).toContain('GitHub'); + expect(tags).toContain('Review'); + }); + + test('deduplicates explicit tags', () => { + const parsed = parseWorkflowDescription('anything'); + const tags = getWorkflowTags('test', parsed, ['GitLab', 'GitLab', 'Review']); + expect(tags).toEqual(['GitLab', 'Review']); + }); + + test('explicit empty array suppresses inference', () => { + const parsed = parseWorkflowDescription('Does: review PR on GitHub'); + const tags = getWorkflowTags('archon-pr-review', parsed, []); + expect(tags).toEqual([]); + }); }); describe('getWorkflowIconName', () => { diff --git a/packages/web/src/lib/workflow-metadata.ts b/packages/web/src/lib/workflow-metadata.ts index e3ab01191d..14ccb43e3e 100644 --- a/packages/web/src/lib/workflow-metadata.ts +++ b/packages/web/src/lib/workflow-metadata.ts @@ -163,8 +163,18 @@ export function getWorkflowCategory(name: string, description: string): Workflow /** * Derive tags from the workflow name and parsed description. + * If `explicitTags` is provided (including an empty array), those are used + * verbatim (deduplicated) and inference is skipped. */ -export function getWorkflowTags(name: string, parsed: ParsedDescription): string[] { +export function getWorkflowTags( + name: string, + parsed: ParsedDescription, + explicitTags?: string[] +): string[] { + if (explicitTags !== undefined) { + return [...new Set(explicitTags)]; + } + const tags: string[] = []; const text = `${name} ${parsed.raw}`.toLowerCase(); diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts index b4717e9565..5d1210aa82 100644 --- a/packages/workflows/src/dag-executor.test.ts +++ b/packages/workflows/src/dag-executor.test.ts @@ -1233,14 +1233,21 @@ describe('executeDagWorkflow -- bash nodes', () => { { ...minimalConfig, envVars: { MY_SECRET: 'abc123' } } ); - expect(execSpy).toHaveBeenCalledWith( - 'bash', - ['-c', 'echo ok'], - expect.objectContaining({ - env: expect.objectContaining({ MY_SECRET: 'abc123' }), - }) - ); - execSpy.mockRestore(); + // Expected bash command is platform-aware: `bash` on Linux/macOS, absolute + // Git Bash path on Windows (per resolveBashPath() — coleam00/Archon#1326). + // Wrap the assertion + mockRestore in try/finally so the spy doesn't leak + // into subsequent tests if the assertion fails. + try { + expect(execSpy).toHaveBeenCalledWith( + git.resolveBashPath(), + ['-c', 'echo ok'], + expect.objectContaining({ + env: expect.objectContaining({ MY_SECRET: 'abc123' }), + }) + ); + } finally { + execSpy.mockRestore(); + } }); it('bash node output with shell metacharacters does not inject into downstream bash script', async () => { @@ -3140,6 +3147,266 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { expect(mockSendQueryDag.mock.calls.length).toBe(3); }); + it('substitutes $LOOP_PREV_OUTPUT with previous iteration output (empty on iter 1)', async () => { + // Iteration 1 emits a distinctive output, iteration 2 emits the completion signal. + // We then assert the prompt sent to the AI: iteration 1 strips $LOOP_PREV_OUTPUT + // to empty, iteration 2 receives iteration 1's cleaned output. + let callCount = 0; + mockSendQueryDag.mockImplementation(function* () { + callCount++; + if (callCount === 1) { + yield { type: 'assistant', content: 'Iter1 output: 2 type errors in users.ts' }; + yield { type: 'result', sessionId: 'loop-session-1' }; + } else { + yield { type: 'assistant', content: 'All fixed. COMPLETE' }; + yield { type: 'result', sessionId: 'loop-session-2' }; + } + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'dag-loop-prev-output', + nodes: [ + { + id: 'fix-loop', + loop: { + prompt: 'Previous output: <<$LOOP_PREV_OUTPUT>>. Fix and emit COMPLETE.', + until: 'COMPLETE', + max_iterations: 5, + fresh_context: true, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(mockSendQueryDag.mock.calls.length).toBe(2); + const promptIter1 = mockSendQueryDag.mock.calls[0][0] as string; + const promptIter2 = mockSendQueryDag.mock.calls[1][0] as string; + // Iteration 1: $LOOP_PREV_OUTPUT substitutes to empty string. + expect(promptIter1).toContain('Previous output: <<>>.'); + // Iteration 2: receives iteration 1's cleaned output. + expect(promptIter2).toContain( + 'Previous output: <>.' + ); + }); + + it('strips tags from $LOOP_PREV_OUTPUT (uses cleaned output)', async () => { + let callCount = 0; + mockSendQueryDag.mockImplementation(function* () { + callCount++; + if (callCount === 1) { + // Iteration 1 includes a non-completion XML tag in its output. The cleaned + // output (after stripCompletionTags) drops ... blocks. + // We use a non-matching signal here so iteration 1 does NOT complete. + yield { + type: 'assistant', + content: 'Real work output. NOT_DONE_YET', + }; + yield { type: 'result', sessionId: 'loop-session-1' }; + } else { + yield { type: 'assistant', content: 'Done. COMPLETE' }; + yield { type: 'result', sessionId: 'loop-session-2' }; + } + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'dag-loop-prev-clean', + nodes: [ + { + id: 'fix-loop', + loop: { + prompt: 'PREV=[$LOOP_PREV_OUTPUT]', + until: 'COMPLETE', + max_iterations: 5, + fresh_context: true, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(mockSendQueryDag.mock.calls.length).toBe(2); + const promptIter2 = mockSendQueryDag.mock.calls[1][0] as string; + // The previous-output payload must be the *cleaned* output — no tags. + expect(promptIter2).toContain('PREV=[Real work output.'); + expect(promptIter2).not.toContain(''); + }); + + it('$LOOP_PREV_OUTPUT is empty on the first iteration after interactive resume', async () => { + // Regression guard for the resume-from-approval path: when an interactive + // loop pauses at the approval gate, the prior `lastIterationOutput` lives + // in a separate process and is not persisted. On resume, the executor must + // substitute $LOOP_PREV_OUTPUT to '' on the first resumed iteration — + // never to whatever the paused run produced. + // + // Wirasm-suggested shape (PR #1367 review): two executeDagWorkflow calls. + // The first call pauses at the gate after iteration 1; the second call + // resumes with metadata.approval populated and runs iteration 2. + + // ---- Call 1: fresh run, iteration 1 emits no completion → pauses at gate + mockSendQueryDag.mockImplementationOnce(function* () { + yield { type: 'assistant', content: 'Iter1 output: 2 type errors in users.ts' }; + yield { type: 'result', sessionId: 'loop-session-1' }; + }); + const mockDeps1 = createMockDeps(); + const platform1 = createMockPlatform(); + const freshRun = makeWorkflowRun('resume-prev-fresh-run'); + + await executeDagWorkflow( + mockDeps1, + platform1, + 'conv-dag', + testDir, + { + name: 'interactive-loop-resume-prev-output', + nodes: [ + { + id: 'refine', + loop: { + prompt: + 'User: $LOOP_USER_INPUT. PREV=<<$LOOP_PREV_OUTPUT>>. Continue or emit COMPLETE.', + until: 'COMPLETE', + max_iterations: 10, + interactive: true, + gate_message: 'Review and provide feedback.', + }, + }, + ], + }, + freshRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // First iteration of a fresh interactive loop: $LOOP_PREV_OUTPUT empty; + // $LOOP_USER_INPUT empty (no user has spoken yet). + expect(mockSendQueryDag.mock.calls.length).toBe(1); + const promptIter1 = mockSendQueryDag.mock.calls[0][0] as string; + expect(promptIter1).toContain('PREV=<<>>.'); + expect(promptIter1).toContain('User: .'); + // Fresh interactive loop must pause at the gate, not return early. + const pauseCalls1 = ( + mockDeps1.store.pauseWorkflowRun as Mock< + (id: string, ctx: Record) => Promise + > + ).mock.calls; + expect(pauseCalls1.length).toBe(1); + expect(pauseCalls1[0][1]).toMatchObject({ + type: 'interactive_loop', + nodeId: 'refine', + iteration: 1, + }); + + // ---- Call 2: resumed run — metadata carries iter 1 + user input. + // iter 2 emits the completion signal so the loop exits cleanly. + mockSendQueryDag.mockImplementationOnce(function* () { + yield { type: 'assistant', content: 'All clear. COMPLETE' }; + yield { type: 'result', sessionId: 'loop-session-2' }; + }); + const mockDeps2 = createMockDeps(); + const platform2 = createMockPlatform(); + const resumedRun = makeWorkflowRun('resume-prev-resume-run', { + metadata: { + approval: { + type: 'interactive_loop', + nodeId: 'refine', + iteration: 1, + sessionId: 'loop-session-1', + message: 'Review and provide feedback.', + }, + loop_user_input: 'looks good, ship it', + }, + }); + + await executeDagWorkflow( + mockDeps2, + platform2, + 'conv-dag', + testDir, + { + name: 'interactive-loop-resume-prev-output', + nodes: [ + { + id: 'refine', + loop: { + prompt: + 'User: $LOOP_USER_INPUT. PREV=<<$LOOP_PREV_OUTPUT>>. Continue or emit COMPLETE.', + until: 'COMPLETE', + max_iterations: 10, + interactive: true, + gate_message: 'Review and provide feedback.', + }, + }, + ], + }, + resumedRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // Second executeDagWorkflow call started a fresh sendQuery generator (mock + // call index 1 across the two runs). The resumed iteration must NOT carry + // the prior process's iter-1 output through $LOOP_PREV_OUTPUT — it must + // substitute to ''. + expect(mockSendQueryDag.mock.calls.length).toBe(2); + const promptResumeIter = mockSendQueryDag.mock.calls[1][0] as string; + expect(promptResumeIter).toContain('PREV=<<>>.'); + expect(promptResumeIter).not.toContain('Iter1 output: 2 type errors'); + // The resume's user input flows through on the first resumed iteration. + expect(promptResumeIter).toContain('User: looks good, ship it.'); + // Resume call exits via completion, not via a second pause at the gate. + const pauseCalls2 = ( + mockDeps2.store.pauseWorkflowRun as Mock< + (id: string, ctx: Record) => Promise + > + ).mock.calls; + expect(pauseCalls2.length).toBe(0); + }); + it('fails when max_iterations exceeded', async () => { mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'Still working...' }; @@ -4223,17 +4490,21 @@ describe('executeDagWorkflow -- terminal node output selection', () => { expect(result).toBe('Final summary text'); }); - it('returns undefined when the single terminal node produces no output', async () => { + it('fails node when the AI stream closes with no assistant output', async () => { + // Empty assistant output on AI nodes (`command:`/`prompt:`) typically + // indicates a silent provider rejection or stream interruption that + // didn't yield a result.isError chunk. Treat it as a node failure + // rather than a successful empty completion. mockSendQueryDag.mockImplementation(async function* () { - // No assistant content — empty output yield { type: 'result', sessionId: 'sess-empty' }; }); - const mockDeps = createMockDeps(); + const store = createMockStore(); + const mockDeps = createMockDeps(store); const platform = createMockPlatform(); const workflowRun = makeWorkflowRun(); - const result = await executeDagWorkflow( + await executeDagWorkflow( mockDeps, platform, 'conv-dag', @@ -4249,7 +4520,120 @@ describe('executeDagWorkflow -- terminal node output selection', () => { minimalConfig ); - expect(result).toBeUndefined(); + const eventCalls = (store.createWorkflowEvent as ReturnType).mock.calls; + const nodeFailedEvents = eventCalls.filter( + (call: unknown[]) => (call[0] as Record).event_type === 'node_failed' + ); + expect(nodeFailedEvents.length).toBeGreaterThan(0); + const failedData = (nodeFailedEvents[0][0] as Record).data as Record< + string, + unknown + >; + expect(failedData.error).toContain('produced no assistant output'); + // Workflow-level failure must propagate, not just the node event. + expect(store.failWorkflowRun).toHaveBeenCalled(); + }); + + it('does NOT fail node when stream yields no assistant text but a structuredOutput is present', async () => { + // Output-format nodes legitimately produce zero free-form text — the + // useful payload is the structuredOutput field. The empty-output guard + // must spare them. + mockSendQueryDag.mockImplementation(async function* () { + yield { + type: 'result', + sessionId: 'sess-structured', + structuredOutput: { category: 'math' }, + }; + }); + + const store = createMockStore(); + const mockDeps = createMockDeps(store); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'structured-only-dag', + nodes: [ + { + id: 'classify', + prompt: 'Classify this', + output_format: { type: 'object', properties: {} }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + const eventCalls = (store.createWorkflowEvent as ReturnType).mock.calls; + const nodeFailedEvents = eventCalls.filter( + (call: unknown[]) => (call[0] as Record).event_type === 'node_failed' + ); + expect(nodeFailedEvents.length).toBe(0); + const nodeCompletedEvents = eventCalls.filter( + (call: unknown[]) => (call[0] as Record).event_type === 'node_completed' + ); + expect(nodeCompletedEvents.length).toBeGreaterThan(0); + }); + + it('fails the run when a node specifies an unknown provider (defense-in-depth at execution time)', async () => { + // Loader-time validation also catches this (loader.ts iterates dagNodes + // after parsing), but the dag-executor's resolveNodeProviderAndModel + // throws as defense-in-depth in case a code path bypasses the loader. + const store = createMockStore(); + const mockDeps = createMockDeps(store); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'unknown-provider-dag', + nodes: [ + { + id: 'bad', + command: 'my-cmd', + provider: 'claud', // typo + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(store.failWorkflowRun).toHaveBeenCalled(); + // The "unknown provider" detail surfaces on the node_failed event; the + // workflow-level fail message is a generic "no successful nodes" summary. + const eventCalls = (store.createWorkflowEvent as ReturnType).mock.calls; + const nodeFailedEvents = eventCalls.filter( + (call: unknown[]) => (call[0] as Record).event_type === 'node_failed' + ); + expect(nodeFailedEvents.length).toBeGreaterThan(0); + const nodeFailedData = (nodeFailedEvents[0][0] as Record).data as Record< + string, + unknown + >; + expect(nodeFailedData.error).toContain("unknown provider 'claud'"); }); it('excludes intermediate nodes with dependents from terminal set (fan-in DAG)', async () => { @@ -4674,6 +5058,76 @@ describe('executeDagWorkflow -- approval node', () => { expect(pauseCalls.length).toBe(1); }); + it('on_reject does not write node_completed for the approval gate node ID', async () => { + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'Fixed based on feedback' }; + yield { type: 'result', sessionId: 'reject-no-poison-session' }; + }); + + const store = createMockStore(); + const mockDeps = createMockDeps(store); + const platform = createMockPlatform(); + + const workflowRun = makeWorkflowRun('reject-no-poison-run', { + metadata: { + approval: { + type: 'approval', + nodeId: 'review', + message: 'Approve this plan?', + onRejectPrompt: 'Fix based on: $REJECTION_REASON', + onRejectMaxAttempts: 3, + }, + rejection_reason: 'Missing edge case handling', + rejection_count: 1, + }, + }); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-approval', + testDir, + { + name: 'approval-no-poison', + nodes: [ + { + id: 'review', + approval: { + message: 'Approve this plan?', + on_reject: { prompt: 'Fix based on: $REJECTION_REASON', max_attempts: 3 }, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // The on_reject synthetic node must NOT produce a node_completed event with + // step_name equal to the approval gate's own ID ('review'). If it did, a + // subsequent resume would find the event via getCompletedDagNodeOutputs and + // skip the approval gate entirely, bypassing the human gate. + const eventCalls = (store.createWorkflowEvent as ReturnType).mock.calls; + const nodeCompletedEvents = eventCalls.filter( + (call: unknown[]) => (call[0] as Record).event_type === 'node_completed' + ); + const completedStepNames = nodeCompletedEvents.map( + (call: unknown[]) => (call[0] as Record).step_name + ); + expect(completedStepNames).not.toContain('review'); + + // The synthetic on_reject node MUST produce a node_completed event with the + // distinct ID 'review:on_reject'. This ensures the synthetic node itself is + // recorded as completed so it is not re-run on a subsequent resume. + expect(completedStepNames.filter((n: unknown) => n === 'review:on_reject').length).toBe(1); + }); + it('on_reject cancels when max_attempts exhausted', async () => { const store = createMockStore(); const mockDeps = createMockDeps(store); @@ -4787,6 +5241,112 @@ describe('executeDagWorkflow -- approval node', () => { 1 ); }); + + it('approval message substitutes $nodeId.output.field references from upstream structured output', async () => { + // Repro for: approval gates were rendering literal "$gather-context.output.repo_name" + // instead of resolved values, breaking interactive workflows like atlas-onboard. + // Parity: prompt/bash/loop/cancel nodes already get substituteNodeOutputRefs; + // approval.message must too so the human sees concrete values. + const structuredJson = { + repo_name: 'hcr-els', + app_code: 'CCELS', + frontend_port: 3012, + }; + + const commandsDir = join(testDir, '.archon', 'commands'); + await mkdir(commandsDir, { recursive: true }); + await writeFile(join(commandsDir, 'gather-context.md'), 'Gather context: $USER_MESSAGE'); + + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: JSON.stringify(structuredJson) }; + yield { type: 'result', sessionId: 'sid-approval-sub', structuredOutput: structuredJson }; + }); + + const store = createMockStore(); + const mockDeps = createMockDeps(store); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('approval-sub-run'); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-approval-sub', + testDir, + { + name: 'approval-sub-test', + nodes: [ + { + id: 'gather-context', + command: 'gather-context', + output_format: { + type: 'object', + properties: { + repo_name: { type: 'string' }, + app_code: { type: 'string' }, + frontend_port: { type: 'number' }, + }, + }, + }, + { + id: 'confirm', + depends_on: ['gather-context'], + approval: { + message: + 'Repo: $gather-context.output.repo_name | App: $gather-context.output.app_code | Port: $gather-context.output.frontend_port', + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // gather-context AI call ran once; approval node does NOT call AI + expect(mockSendQueryDag.mock.calls.length).toBe(1); + + // pauseWorkflowRun should receive the SUBSTITUTED message, not the literal placeholders + const pauseCalls = ( + store.pauseWorkflowRun as Mock<(id: string, ctx: Record) => Promise> + ).mock.calls; + expect(pauseCalls.length).toBe(1); + expect(pauseCalls[0][1]).toMatchObject({ + type: 'approval', + nodeId: 'confirm', + message: 'Repo: hcr-els | App: CCELS | Port: 3012', + }); + + // The fix touches FOUR emission sites (safeSendMessage / createWorkflowEvent / + // pauseWorkflowRun / event-emitter). Assert the other two reachable surfaces too — + // a future regression at any one of them would otherwise pass this test silently. + // (Per CodeRabbit review of PR coleam00/Archon#1426.) + + // (a) The chat-surface prompt emitted via platform.sendMessage must contain the + // substituted message and must NOT contain literal $gather-context.output refs. + const sentMessages = ( + platform.sendMessage as Mock<(...args: unknown[]) => Promise> + ).mock.calls.map((c: unknown[]) => c[1] as string); + expect(sentMessages.some(m => m.includes('Repo: hcr-els | App: CCELS | Port: 3012'))).toBe( + true + ); + expect(sentMessages.some(m => m.includes('$gather-context.output'))).toBe(false); + + // (b) The persisted approval_requested workflow event's data.message must be substituted. + const approvalRequestedEvents = ( + store.createWorkflowEvent as Mock<() => Promise> + ).mock.calls.filter( + (c: unknown[]) => (c[0] as { event_type: string }).event_type === 'approval_requested' + ); + expect(approvalRequestedEvents.length).toBe(1); + expect((approvalRequestedEvents[0][0] as { data: { message: string } }).data.message).toBe( + 'Repo: hcr-els | App: CCELS | Port: 3012' + ); + }); }); describe('executeDagWorkflow -- env var injection', () => { let testDir: string; @@ -5224,6 +5784,7 @@ describe('executeDagWorkflow -- cost tracking', () => { let callCount = 0; mockSendQueryDag.mockImplementation(function* () { callCount++; + yield { type: 'assistant', content: `Step ${String(callCount)} output` }; yield { type: 'result', sessionId: `sid-${String(callCount)}`, cost: 0.001 }; }); @@ -5265,6 +5826,7 @@ describe('executeDagWorkflow -- cost tracking', () => { it('omits total_cost_usd from completeWorkflowRun when no cost yielded', async () => { mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'Some output' }; yield { type: 'result', sessionId: 'sid-no-cost' }; }); @@ -6079,3 +6641,161 @@ describe('shouldContinueStreamingForStatus', () => { expect(shouldContinueStreamingForStatus('invalid-status')).toBe(false); }); }); + +describe('executeDagWorkflow -- final status derivation', () => { + // Invariant: if ANY non-skipped node has failed status, the run must be + // marked 'failed' — never 'completed' — regardless of how many other nodes + // succeeded. This covers the anyFailed branch in executeDagWorkflow + // (dag-executor.ts ~line 2956), which had no direct test coverage. + let testDir: string; + + beforeEach(async () => { + testDir = join( + tmpdir(), + `dag-status-test-${Date.now()}-${Math.random().toString(36).slice(2)}` + ); + await mkdir(testDir, { recursive: true }); + + mockSendQueryDag.mockClear(); + mockGetAgentProviderDag.mockClear(); + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'DAG AI response' }; + yield { type: 'result', sessionId: 'dag-session-id' }; + }); + mockGetAgentProviderDag.mockImplementation(() => ({ + sendQuery: mockSendQueryDag, + getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, + })); + }); + + afterEach(async () => { + try { + await rm(testDir, { recursive: true, force: true }); + } catch { + // ignore cleanup errors + } + }); + + it('one success + one independent failure -> failWorkflowRun, not completeWorkflowRun', async () => { + const mockStore = createMockStore(); + const mockDeps = createMockDeps(mockStore); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('dag-status-run-1'); + + const nodes: DagNode[] = [ + { id: 'pass', bash: 'echo ok' } as BashNode, + { id: 'fail', bash: 'exit 1' } as BashNode, + ]; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-status', + testDir, + { name: 'status-test', nodes }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect((mockStore.failWorkflowRun as ReturnType).mock.calls.length).toBe(1); + expect((mockStore.completeWorkflowRun as ReturnType).mock.calls.length).toBe(0); + expect(mockStore.failWorkflowRun).toHaveBeenCalledWith( + expect.anything(), + expect.stringContaining('fail') + ); + + // Confirm the failure message names the failing node + const sendMessage = platform.sendMessage as ReturnType; + const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1] as string); + const failMsg = messages.find((m: string) => m.includes('completed with failures')); + expect(failMsg).toBeDefined(); + }); + + it('multiple successes + one failure -> failWorkflowRun, not completeWorkflowRun', async () => { + const mockStore = createMockStore(); + const mockDeps = createMockDeps(mockStore); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('dag-status-run-2'); + + const nodes: DagNode[] = [ + { id: 'a', bash: 'echo a' } as BashNode, + { id: 'b', bash: 'echo b' } as BashNode, + { id: 'c', bash: 'echo c' } as BashNode, + { id: 'fail', bash: 'exit 1' } as BashNode, + ]; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-status', + testDir, + { name: 'status-test-multi', nodes }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect((mockStore.failWorkflowRun as ReturnType).mock.calls.length).toBe(1); + expect((mockStore.completeWorkflowRun as ReturnType).mock.calls.length).toBe(0); + expect(mockStore.failWorkflowRun).toHaveBeenCalledWith( + expect.anything(), + expect.stringContaining('fail') + ); + + const sendMessage = platform.sendMessage as ReturnType; + const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1] as string); + const failMsg = messages.find((m: string) => m.includes('completed with failures')); + expect(failMsg).toBeDefined(); + }); + + it('trigger_rule: none_failed skips dependent node + anyFailed still marks run failed', async () => { + const mockStore = createMockStore(); + const mockDeps = createMockDeps(mockStore); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('dag-status-run-3'); + + // Layer 1: A and B run in parallel. B fails. + // Layer 2: C depends on B with trigger_rule: none_failed — so C is skipped. + // Expected: anyFailed=true (from B), so run must be marked failed even though C is only skipped. + const nodes: DagNode[] = [ + { id: 'a', bash: 'echo a' } as BashNode, + { id: 'b', bash: 'exit 1' } as BashNode, + { id: 'c', bash: 'echo c', depends_on: ['b'], trigger_rule: 'none_failed' } as BashNode, + ]; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-status', + testDir, + { name: 'status-test-skip', nodes }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect((mockStore.failWorkflowRun as ReturnType).mock.calls.length).toBe(1); + expect((mockStore.completeWorkflowRun as ReturnType).mock.calls.length).toBe(0); + expect(mockStore.failWorkflowRun).toHaveBeenCalledWith( + expect.anything(), + expect.stringContaining('b') + ); + }); +}); diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index 419a9066f6..1af6d0da52 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -7,7 +7,7 @@ */ import { readFile } from 'fs/promises'; import { isAbsolute, resolve as resolvePath } from 'path'; -import { execFileAsync } from '@archon/git'; +import { execFileAsync, resolveBashPath } from '@archon/git'; import { discoverScriptsForCwd } from './script-discovery'; import type { IWorkflowPlatform, @@ -21,7 +21,11 @@ import type { ProviderCapabilities, TokenUsage, } from '@archon/providers/types'; -import { getProviderCapabilities } from '@archon/providers'; +import { + getProviderCapabilities, + getRegisteredProviders, + isRegisteredProvider, +} from '@archon/providers'; import type { DagNode, ApprovalNode, @@ -49,7 +53,6 @@ import { formatToolCall } from './utils/tool-formatter'; import { createLogger } from '@archon/paths'; import { getWorkflowEventEmitter } from './event-emitter'; import { evaluateCondition } from './condition-evaluator'; -import { inferProviderFromModel, isModelCompatible } from './model-validation'; import { logNodeStart, logNodeComplete, @@ -341,7 +344,17 @@ async function resolveNodeProviderAndModel( model: string | undefined; options: SendQueryOptions | undefined; }> { - const provider: string = node.provider ?? inferProviderFromModel(node.model, workflowProvider); + // Provider is explicit: node.provider ?? workflow.provider. Model never + // influences provider selection. Model strings pass through to the SDK. + const provider: string = node.provider ?? workflowProvider; + if (!isRegisteredProvider(provider)) { + throw new Error( + `Node '${node.id}': unknown provider '${provider}'. ` + + `Registered: ${getRegisteredProviders() + .map(p => p.id) + .join(', ')}` + ); + } const providerAssistantConfig = config.assistants[provider]; const model: string | undefined = @@ -350,12 +363,6 @@ async function resolveNodeProviderAndModel( ? workflowModel : (providerAssistantConfig?.model as string | undefined)); - if (!isModelCompatible(provider, model)) { - throw new Error( - `Node '${node.id}': model "${model ?? 'default'}" is not compatible with provider "${provider}"` - ); - } - // Get provider capabilities for capability warnings (static lookup, no instantiation) const caps = getProviderCapabilities(provider); @@ -1101,6 +1108,49 @@ async function executeNodeInternal( return { state: 'failed', output: nodeOutputText, error: creditError }; } + // Empty assistant output is a failure for AI nodes — a provider stream + // that closed cleanly with zero content typically means a silent + // rejection or interruption that didn't produce a result.isError chunk. + // Bash/script/approval nodes don't reach this path; they have their + // own dispatch and never stream through this loop. + // + // Idle-timeout exits are exempt: the timeout warning at line 1017 has + // already told the user the node "completed via idle timeout"; flipping + // that to a failure here would directly contradict the on-screen message. + if (nodeOutputText.trim() === '' && structuredOutput === undefined && !nodeIdleTimedOut) { + const duration = Date.now() - nodeStartTime; + const emptyError = `Node '${node.id}' produced no assistant output. The provider stream closed without yielding content — likely a silent provider rejection or stream interruption.`; + getLog().error({ nodeId: node.id, durationMs: duration }, 'dag.node_empty_output'); + await logNodeError(logDir, workflowRun.id, node.id, emptyError); + + deps.store + .createWorkflowEvent({ + workflow_run_id: workflowRun.id, + event_type: 'node_failed', + step_name: node.id, + data: { error: emptyError, duration_ms: duration }, + }) + .catch((err: Error) => { + getLog().error( + { err, workflowRunId: workflowRun.id, eventType: 'node_failed' }, + 'workflow_event_persist_failed' + ); + }); + + emitter.emit({ + type: 'node_failed', + runId: workflowRun.id, + nodeId: node.id, + nodeName: node.command ?? node.id, + error: emptyError, + }); + + lastNodeCancelCheck.delete(`${workflowRun.id}:${node.id}`); + lastNodeActivityUpdate.delete(`${workflowRun.id}:${node.id}`); + + return { state: 'failed', output: '', error: emptyError }; + } + const duration = Date.now() - nodeStartTime; getLog().info({ nodeId: node.id, durationMs: duration }, 'dag_node_completed'); await logNodeComplete(logDir, workflowRun.id, node.id, node.command ?? '', { @@ -1261,11 +1311,17 @@ async function executeBashNode( const finalScript = substituteNodeOutputRefs(substitutedScript, nodeOutputs, true); const timeout = node.timeout ?? SUBPROCESS_DEFAULT_TIMEOUT; - const subprocessEnv = - envVars && Object.keys(envVars).length > 0 ? { ...process.env, ...envVars } : undefined; + const subprocessEnv: NodeJS.ProcessEnv = { + ...process.env, + ARTIFACTS_DIR: artifactsDir, + LOG_DIR: logDir, + BASE_BRANCH: baseBranch, + ...(envVars ?? {}), + }; + const bashPath = resolveBashPath(); try { - const { stdout, stderr } = await execFileAsync('bash', ['-c', finalScript], { + const { stdout, stderr } = await execFileAsync(bashPath, ['-c', finalScript], { cwd, timeout, env: subprocessEnv, @@ -1318,7 +1374,7 @@ async function executeBashNode( if (isTimeout) { errorMsg = `Bash node '${node.id}' timed out after ${String(timeout)}ms`; } else if (err.message?.includes('ENOENT')) { - errorMsg = `Bash node '${node.id}' failed: bash executable not found in PATH`; + errorMsg = `Bash node '${node.id}' failed: bash executable not found at '${bashPath}'. Set ARCHON_BASH_PATH if Git Bash is installed elsewhere (e.g. user-scope installer at %LOCALAPPDATA%\\Programs\\Git\\bin\\bash.exe).`; } else if (err.message?.includes('EACCES')) { errorMsg = `Bash node '${node.id}' failed: permission denied (check cwd permissions)`; } else { @@ -1766,6 +1822,10 @@ async function executeLoopNode( // Build prompt — substituteWorkflowVariables throws if $BASE_BRANCH referenced but empty // Pass loopUserInput on the first resumed iteration; '' on all others (non-interactive // or subsequent iterations) so $LOOP_USER_INPUT substitutes to empty string explicitly. + // $LOOP_PREV_OUTPUT carries the previous iteration's cleaned output and is empty on + // the first iteration (no prior output exists). Across an interactive resume, the + // executor starts a fresh `lastIterationOutput` variable, so the first iteration of + // the resume also receives an empty $LOOP_PREV_OUTPUT. const { prompt: substitutedPrompt } = substituteWorkflowVariables( loop.prompt, workflowRun.id, @@ -1774,7 +1834,9 @@ async function executeLoopNode( baseBranch, docsDir, issueContext, - i === startIteration ? loopUserInput : '' + i === startIteration ? loopUserInput : '', + undefined, // rejectionReason + i === startIteration ? '' : lastIterationOutput ); const finalPrompt = substituteNodeOutputRefs(substitutedPrompt, nodeOutputs); @@ -1971,6 +2033,52 @@ async function executeLoopNode( ); } + // Empty assistant output is an iteration failure for AI loops — same + // contract as the single-shot AI-node guard in executeNodeInternal. A + // provider stream that closed cleanly with zero content typically means + // a silent rejection or interruption; left unchecked, an interactive + // loop would pause with a blank gate or burn the full max_iterations + // budget producing nothing. Idle-timeout exits are exempt — the + // notification above has already told the user the iteration completed + // via timeout, and flipping that to a failure would contradict it. + if (!iterationIdleTimedOut && fullOutput.trim() === '') { + const iterationDuration = Date.now() - iterationStart; + const emptyError = + 'Loop iteration produced no assistant output. The provider stream closed without yielding content — likely a silent provider rejection or stream interruption.'; + getLog().error( + { nodeId: node.id, iteration: i, durationMs: iterationDuration }, + 'loop_node.iteration_empty_output' + ); + getWorkflowEventEmitter().emit({ + type: 'loop_iteration_failed', + runId: workflowRun.id, + nodeId: node.id, + iteration: i, + error: emptyError, + }); + deps.store + .createWorkflowEvent({ + workflow_run_id: workflowRun.id, + event_type: 'loop_iteration_failed', + step_name: node.id, + data: { + iteration: i, + error: emptyError, + duration: iterationDuration, + nodeId: node.id, + }, + }) + .catch((evtErr: Error) => { + logEventStoreError(evtErr, i); + }); + return { + state: 'failed', + output: '', + error: `Loop iteration ${i} failed: ${emptyError}`, + costUsd: loopTotalCostUsd, + }; + } + // Batch mode: send accumulated output if (platform.getStreamingMode() === 'batch' && cleanOutput) { await safeSendMessage(platform, conversationId, cleanOutput, msgContext); @@ -2001,18 +2109,26 @@ async function executeLoopNode( nodeOutputs, true // escapedForBash ); - await execFileAsync('bash', ['-c', substitutedBash], { cwd }); + const loopBashPath = resolveBashPath(); + await execFileAsync(loopBashPath, ['-c', substitutedBash], { cwd }); bashComplete = true; // exit 0 = complete } catch (e) { const bashErr = e as NodeJS.ErrnoException; - // ENOENT or other system errors are unexpected — log them - if (bashErr.code === 'ENOENT') { - getLog().warn( + // System-level errors (ENOENT/EACCES) mean the bash binary itself is + // unreachable or unexecutable — that's environment breakage, not a + // condition-not-met outcome. Surface immediately so the loop fails + // fast instead of burning iterations against a broken binary. + if (bashErr.code === 'ENOENT' || bashErr.code === 'EACCES') { + getLog().error( { err: bashErr, nodeId: node.id, iteration: i }, 'loop_node.until_bash_exec_error' ); + throw new Error( + `Loop node '${node.id}' until_bash failed: cannot execute bash at '${resolveBashPath()}' (${bashErr.code}). Set ARCHON_BASH_PATH if Git Bash is installed elsewhere.` + ); } - bashComplete = false; // non-zero exit = not complete + // Non-zero exit from the bash script = condition not met yet, keep looping. + bashComplete = false; } } @@ -2249,9 +2365,21 @@ async function executeApprovalNode( rejectionReason ); - // Build a synthetic PromptNode to reuse executeNodeInternal + // Build a synthetic PromptNode to reuse executeNodeInternal. + // Use a distinct ID so the node_completed event written by executeNodeInternal + // does not collide with the approval gate's own ID in getCompletedDagNodeOutputs. + // If we used node.id here, a resumed run would find the event and treat the + // approval gate as already completed, bypassing the human gate entirely. + // + // Note: executeNodeInternal also emits node_started/node_completed WorkflowEmitterEvents + // with nodeId = `${node.id}:on_reject`. These flow through SSE into the web UI, where + // WorkflowExecution.tsx builds its nodeMap from all node_* events unconditionally. + // This means a transient `${node.id}:on_reject` phantom entry may appear in the UI's + // execution view during an on_reject cycle. This is cosmetic-only — the approval gate + // still re-presents correctly and the human gate contract is preserved. A follow-up can + // filter synthetic `:on_reject` IDs from the UI's nodeMap if needed. const syntheticNode: PromptNode = { - id: node.id, + id: `${node.id}:on_reject`, prompt: substituteNodeOutputRefs(substitutedPrompt, nodeOutputs), ...(node.depends_on ? { depends_on: node.depends_on } : {}), ...(node.idle_timeout ? { idle_timeout: node.idle_timeout } : {}), @@ -2294,9 +2422,12 @@ async function executeApprovalNode( // Fall through to re-pause at the approval gate } - // Standard approval gate — send message and pause + // Standard approval gate — send message and pause. + // Resolve $nodeId.output[.field] references so the human sees concrete values + // (parity with prompt/bash/loop/cancel nodes, which all run the same substitution). + const renderedMessage = substituteNodeOutputRefs(node.approval.message, nodeOutputs); const approvalMsg = - `⏸ **Approval required**: ${node.approval.message}\n\n` + + `⏸ **Approval required**: ${renderedMessage}\n\n` + `Run ID: \`${workflowRun.id}\`\n` + `Approve: \`/workflow approve ${workflowRun.id}\` | Reject: \`/workflow reject ${workflowRun.id}\``; await safeSendMessage(platform, conversationId, approvalMsg, msgContext); @@ -2306,7 +2437,7 @@ async function executeApprovalNode( workflow_run_id: workflowRun.id, event_type: 'approval_requested', step_name: node.id, - data: { message: node.approval.message }, + data: { message: renderedMessage }, }) .catch((err: Error) => { getLog().error( @@ -2316,7 +2447,7 @@ async function executeApprovalNode( }); await deps.store.pauseWorkflowRun(workflowRun.id, { - message: node.approval.message, + message: renderedMessage, nodeId: node.id, type: 'approval', captureResponse: node.approval.capture_response, @@ -2328,7 +2459,7 @@ async function executeApprovalNode( type: 'approval_pending', runId: workflowRun.id, nodeId: node.id, - message: node.approval.message, + message: renderedMessage, }); // Return completed — the between-layer status check will see 'paused' and break. @@ -2584,9 +2715,19 @@ export async function executeDagWorkflow( // 3b. Loop node dispatch — manages its own AI sessions and iteration if (isLoopNode(node)) { - // Resolve per-node provider/model overrides (same logic as other node types) - const loopProvider: string = - node.provider ?? inferProviderFromModel(node.model, workflowProvider); + // Resolve per-node provider/model overrides (same logic as other node types). + // Provider is explicit; model passes through to the SDK. Throw on an + // unknown provider so the outer catch below emits the standard + // node_failed event + user-facing message — the same path + // resolveNodeProviderAndModel uses for non-loop nodes. + const loopProvider: string = node.provider ?? workflowProvider; + if (!isRegisteredProvider(loopProvider)) { + throw new Error( + `Node '${node.id}': unknown provider '${loopProvider}'. Registered: ${getRegisteredProviders() + .map(p => p.id) + .join(', ')}` + ); + } const loopAssistantConfig = config.assistants[loopProvider]; const loopModel: string | undefined = node.model ?? @@ -2594,17 +2735,6 @@ export async function executeDagWorkflow( ? workflowModel : (loopAssistantConfig?.model as string | undefined)); - if (!isModelCompatible(loopProvider, loopModel)) { - return { - nodeId: node.id, - output: { - state: 'failed' as const, - output: '', - error: `Node '${node.id}': model "${loopModel ?? 'default'}" is not compatible with provider "${loopProvider}"`, - }, - }; - } - const output = await executeLoopNode( deps, platform, diff --git a/packages/workflows/src/defaults/bundled-defaults.generated.ts b/packages/workflows/src/defaults/bundled-defaults.generated.ts index cd430f3d5a..43ffbb6f9b 100644 --- a/packages/workflows/src/defaults/bundled-defaults.generated.ts +++ b/packages/workflows/src/defaults/bundled-defaults.generated.ts @@ -55,24 +55,24 @@ export const BUNDLED_COMMANDS: Record = { // Bundled default workflows (20 total) export const BUNDLED_WORKFLOWS: Record = { - "archon-adversarial-dev": "name: archon-adversarial-dev\ndescription: |\n Use when: User wants to build a complete application from scratch using adversarial development.\n Triggers: \"adversarial dev\", \"adversarial development\", \"build with adversarial\", \"gan dev\",\n \"adversarial build\", \"build app adversarially\", \"adversarial coding\".\n Does: Three-role GAN-inspired development — Planner creates spec with sprints, then a state-machine\n loop alternates between Generator (builds code) and Evaluator (attacks it) with hard pass/fail\n thresholds. The evaluator's job is to BREAK what the generator builds. If any criterion scores\n below 7/10, the sprint goes back to the generator with adversarial feedback. Stops on sprint\n failure after max retries.\n NOT for: Bug fixes, PR reviews, refactoring existing code, simple one-off tasks.\n\n Based on Anthropic's harness design article for long-running application development.\n Separates planning, building, and evaluation into distinct roles with adversarial tension.\nprovider: claude\nmodel: sonnet\n\nnodes:\n # ─── Phase 1: Planning ───────────────────────────────────────────────\n - id: plan\n prompt: |\n You are a product planning expert. Your job is to take a short user prompt and expand it\n into a comprehensive product specification.\n\n ## User Request\n\n $ARGUMENTS\n\n ## Your Task\n\n Write a comprehensive product specification to the file `$ARTIFACTS_DIR/spec.md` using the Write tool.\n\n The spec MUST include ALL of the following sections:\n\n ### 1. Product Overview\n What the product does, who it's for, core value proposition.\n\n ### 2. Tech Stack\n Specific technologies, frameworks, and libraries. Be opinionated — pick concrete choices,\n not \"a modern framework.\" Include exact package names and versions where relevant.\n\n ### 3. Design Language\n Visual style, specific color hex codes, typography choices, component patterns, spacing system.\n\n ### 4. Feature List\n Every feature organized by priority. Be exhaustive.\n\n ### 5. Sprint Plan\n Features broken into 3-6 sprints, ordered by dependency and importance:\n - **Sprint 1** should establish the foundation (project setup, core data models, basic UI shell)\n - Each subsequent sprint builds on the previous\n - Label each sprint clearly: \"Sprint 1: Foundation\", \"Sprint 2: Core Features\", etc.\n - List the specific features/deliverables for each sprint\n\n Be specific and opinionated. The more concrete the spec (exact API paths, specific color codes,\n named libraries), the better the generator can build and the evaluator can test.\n\n IMPORTANT: Write the spec to `$ARTIFACTS_DIR/spec.md` using the Write tool. Do NOT just output\n it as conversation text.\n allowed_tools: [Read, Write, Glob, Grep]\n\n # ─── Phase 2: Workspace Initialization ───────────────────────────────\n - id: init-workspace\n depends_on: [plan]\n bash: |\n ARTIFACTS=\"$ARTIFACTS_DIR\"\n\n # Create directory structure for harness communication\n mkdir -p \"$ARTIFACTS/contracts\"\n mkdir -p \"$ARTIFACTS/feedback\"\n mkdir -p \"$ARTIFACTS/app\"\n\n # Initialize isolated git repo in app directory\n cd \"$ARTIFACTS/app\"\n git init -q\n git commit --allow-empty -m \"Initial commit: adversarial-dev workspace\" -q\n\n # Extract sprint count from spec (find highest \"Sprint N\" reference)\n SPEC=\"$ARTIFACTS/spec.md\"\n SPRINT_COUNT=3\n if [ -f \"$SPEC\" ]; then\n FOUND=$(grep -ioE 'sprint\\s+[0-9]+' \"$SPEC\" | grep -oE '[0-9]+' | sort -n | tail -1)\n if [ -n \"$FOUND\" ] && [ \"$FOUND\" -ge 1 ] 2>/dev/null; then\n SPRINT_COUNT=$FOUND\n fi\n if [ \"$SPRINT_COUNT\" -gt 10 ]; then\n SPRINT_COUNT=10\n fi\n fi\n\n # Write initial state machine file\n cat > \"$ARTIFACTS/state.json\" << 'STATEEOF'\n {\n \"phase\": \"negotiating\",\n \"sprint\": 1,\n \"totalSprints\": SPRINT_COUNT_PLACEHOLDER,\n \"retry\": 0,\n \"maxRetries\": 3,\n \"passThreshold\": 7,\n \"completedSprints\": [],\n \"status\": \"running\"\n }\n STATEEOF\n STATE_TMP=\"$ARTIFACTS/state.json.tmp\"\n sed \"s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/\" \"$ARTIFACTS/state.json\" > \"$STATE_TMP\"\n mv \"$STATE_TMP\" \"$ARTIFACTS/state.json\"\n\n echo \"{\\\"totalSprints\\\": $SPRINT_COUNT, \\\"appDir\\\": \\\"$ARTIFACTS/app\\\", \\\"artifactsDir\\\": \\\"$ARTIFACTS\\\"}\"\n timeout: 30000\n\n # ─── Phase 3: Adversarial Sprint Loop ────────────────────────────────\n #\n # State machine driven by $ARTIFACTS_DIR/state.json\n # Each iteration plays ONE role: negotiator, generator, or evaluator\n # fresh_context ensures genuine separation between roles\n #\n - id: adversarial-sprint\n depends_on: [init-workspace]\n idle_timeout: 600000\n model: claude-opus-4-6[1m]\n loop:\n prompt: |\n # Adversarial Development — Sprint Loop\n\n You are part of a GAN-inspired adversarial development system with three distinct roles.\n Each iteration you play ONE role, determined by the current phase in the state file.\n\n ## FIRST: Read State\n\n Read `$ARTIFACTS_DIR/state.json` to determine:\n - `phase` — which role you play this iteration\n - `sprint` — current sprint number\n - `totalSprints` — how many sprints total\n - `retry` — current retry attempt (0 = first try)\n - `maxRetries` — max retries before hard failure (default 3)\n - `passThreshold` — minimum score to pass (default 7)\n\n Then read `$ARTIFACTS_DIR/spec.md` for product context.\n\n ## Directory Layout\n\n - App source code: `$ARTIFACTS_DIR/app/`\n - Sprint contracts: `$ARTIFACTS_DIR/contracts/sprint-{N}.json`\n - Evaluation feedback: `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`\n - State machine: `$ARTIFACTS_DIR/state.json`\n\n ---\n\n ## ROLE: CONTRACT NEGOTIATOR (phase = \"negotiating\")\n\n You negotiate the success criteria for the current sprint. Play BOTH sides sequentially:\n\n **Step 1 — Generator's Proposal:**\n Read the spec carefully. Identify what Sprint {N} should deliver based on the sprint plan.\n Propose a sprint contract with 5-15 specific, testable criteria.\n\n Each criterion MUST be concrete and verifiable. Examples:\n - GOOD: \"GET /api/tasks returns 200 with JSON array; each item has id (number), title (string), status (string), createdAt (ISO date)\"\n - GOOD: \"Clicking the Add Task button opens a modal with title input, priority dropdown (low/medium/high), and due date picker\"\n - BAD: \"The API works well\"\n - BAD: \"Tasks can be managed\"\n\n **Step 2 — Evaluator's Tightening:**\n Now review your proposal as an adversary. For EACH criterion ask:\n - Is it specific enough to test programmatically?\n - What edge cases are missing? (empty inputs, special characters, concurrent requests)\n - Is the bar high enough, or would sloppy code pass?\n\n Tighten vague criteria. Add edge cases. Raise the bar.\n\n **Write the final contract** to `$ARTIFACTS_DIR/contracts/sprint-{N}.json`:\n ```json\n {\n \"sprintNumber\": ,\n \"features\": [\"feature1\", \"feature2\", ...],\n \"criteria\": [\n {\n \"name\": \"short-kebab-name\",\n \"description\": \"Specific, testable description of what must be true\",\n \"threshold\": 7\n }\n ]\n }\n ```\n\n **Update state.json**: Set `\"phase\": \"building\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: GENERATOR (phase = \"building\")\n\n You are a software engineer. Build features that MUST survive an adversarial evaluator\n who will actively try to break your code.\n\n **Read these files:**\n 1. `$ARTIFACTS_DIR/spec.md` — full product spec (design language, tech stack, all features)\n 2. `$ARTIFACTS_DIR/contracts/sprint-{N}.json` — the contract you must satisfy\n 3. If `retry` > 0: read `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R-1}.json` for the\n evaluator's previous feedback\n\n **If this is a RETRY (retry > 0):**\n Read the feedback CAREFULLY. Every failed criterion must be addressed.\n - If scores were close (5-6) and trending up: REFINE your approach\n - If scores were low (1-4) or the approach is fundamentally broken: PIVOT to a new strategy\n - Address EVERY feedback item — the evaluator WILL check\n - Re-verify each fix by running the code before committing\n\n **Build rules:**\n - All code goes in `$ARTIFACTS_DIR/app/`\n - Build ONE feature at a time, verify it works, then commit:\n ```bash\n cd $ARTIFACTS_DIR/app && git add -A && git commit -m \"feat: description of what was built\"\n ```\n - Install dependencies as needed (npm/bun/pip/etc)\n - Test your code — start the server, hit the endpoints, verify the UI renders\n - Think about what the evaluator will attack: edge cases, error handling, input validation\n - Build defensively — the evaluator's job is to break you\n\n **Update state.json**: Set `\"phase\": \"evaluating\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: EVALUATOR (phase = \"evaluating\")\n\n You are an ADVERSARIAL QA agent. Your mandate is to BREAK what the generator built.\n You are not helpful. You are not generous. You are an attacker.\n\n **CRITICAL CONSTRAINTS:**\n - You are READ-ONLY for source code. NEVER use Write or Edit on files in `$ARTIFACTS_DIR/app/`.\n - You MAY use Bash to run the app, curl endpoints, run test scripts, check behavior.\n - You MUST kill any background processes (servers, watchers) you start BEFORE finishing.\n Use: `pkill -f \"node\\|bun\\|python\\|npm\" 2>/dev/null || true`\n - You MUST score EVERY criterion in the contract. No skipping.\n\n **Scoring guidelines:**\n - **9-10**: Exceptional. Works perfectly including edge cases the contract didn't mention.\n - **7-8**: Solid. Meets the criterion as stated. Minor polish issues at most.\n - **5-6**: Partial. Core functionality exists but fails important edge cases or has bugs.\n - **3-4**: Weak. Barely functional. Major gaps.\n - **1-2**: Broken. Does not work or is not implemented.\n\n Do NOT grade on a curve. Do NOT give benefit of the doubt. A 7 means \"genuinely meets the bar.\"\n If something is broken, say it's broken.\n\n **Read**: `$ARTIFACTS_DIR/contracts/sprint-{N}.json` for the criteria.\n\n **For each criterion:**\n 1. Read the relevant source code\n 2. Run the application (start server, test endpoints, check rendered UI)\n 3. Try to BREAK it — invalid inputs, missing fields, edge cases, error handling gaps\n 4. Score it honestly\n\n **Write evaluation** to `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`:\n ```json\n {\n \"passed\": = passThreshold, false otherwise>,\n \"scores\": {\n \"criterion-name\": ,\n ...\n },\n \"feedback\": [\n {\n \"criterion\": \"criterion-name\",\n \"score\": <1-10>,\n \"details\": \"Specific findings. Include file paths, line numbers, exact error messages, curl commands that failed.\"\n }\n ],\n \"overallSummary\": \"What worked, what didn't, what the generator must fix.\"\n }\n ```\n\n **Determine pass/fail** — `passed` is `true` ONLY if every single score >= `passThreshold`.\n\n **Update state.json based on result:**\n\n **If PASSED (all criteria >= threshold):**\n - Add current sprint number to `completedSprints` array\n - If `sprint` < `totalSprints`: set `\"phase\": \"negotiating\"`, increment `\"sprint\"` by 1, set `\"retry\": 0`\n - If `sprint` == `totalSprints`: set `\"phase\": \"complete\"`, set `\"status\": \"complete\"`\n\n **If FAILED:**\n - If `retry` < `maxRetries`: set `\"phase\": \"building\"`, increment `\"retry\"` by 1\n - If `retry` >= `maxRetries`: set `\"phase\": \"failed\"`, set `\"status\": \"failed\"`\n\n **IMPORTANT**: Kill all background processes before finishing:\n ```bash\n pkill -f \"node|bun|python|npm|next|vite|webpack\" 2>/dev/null || true\n ```\n\n ---\n\n ## COMPLETION\n\n After updating state.json, check the `status` field:\n - If `\"status\": \"complete\"` → all sprints passed! Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"failed\"` → sprint failed after max retries. Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"running\"` → more work to do. Do NOT output any completion signal.\n\n until: ALL_SPRINTS_COMPLETE\n max_iterations: 60\n fresh_context: true\n until_bash: |\n grep -qE '\"status\"\\s*:\\s*\"(complete|failed)\"' \"$ARTIFACTS_DIR/state.json\"\n\n # ─── Phase 4: Report ─────────────────────────────────────────────────\n - id: report\n depends_on: [adversarial-sprint]\n trigger_rule: all_done\n context: fresh\n model: haiku\n prompt: |\n You are a project reporter. Generate a comprehensive summary of the adversarial development run.\n\n ## Read ALL of these files:\n 1. `$ARTIFACTS_DIR/state.json` — final state (tells you success/failure, sprint count)\n 2. `$ARTIFACTS_DIR/spec.md` — the original product spec\n 3. All files in `$ARTIFACTS_DIR/contracts/` — sprint contracts (use Glob to find them)\n 4. All files in `$ARTIFACTS_DIR/feedback/` — evaluation results (use Glob to find them)\n\n ## Generate a report covering:\n\n ### Build Summary\n - What application was built (from the spec)\n - Final status: did all sprints pass or did it fail? On which sprint?\n - Total sprints completed vs planned\n\n ### Per-Sprint Breakdown\n For each sprint that was attempted:\n - What the contract required (features + key criteria)\n - How many attempts were needed (retry count)\n - Final scores for each criterion\n - Key feedback that drove retries and improvements\n\n ### Quality Metrics\n - Average score across all final-round criteria\n - Which criteria required the most retries\n - Where the adversarial evaluator pushed quality the highest\n\n ### How to Run\n - The application code lives in: `$ARTIFACTS_DIR/app/`\n - Include the tech stack and how to start the app (from the spec)\n - Include any setup steps (install deps, env vars, etc.)\n\n Write this report to `$ARTIFACTS_DIR/report.md` AND output it as your response so the user\n sees it directly.\n allowed_tools: [Read, Write, Glob, Grep]\n", + "archon-adversarial-dev": "name: archon-adversarial-dev\ndescription: |\n Use when: User wants to build a complete application from scratch using adversarial development.\n Triggers: \"adversarial dev\", \"adversarial development\", \"build with adversarial\", \"gan dev\",\n \"adversarial build\", \"build app adversarially\", \"adversarial coding\".\n Does: Three-role GAN-inspired development — Planner creates spec with sprints, then a state-machine\n loop alternates between Generator (builds code) and Evaluator (attacks it) with hard pass/fail\n thresholds. The evaluator's job is to BREAK what the generator builds. If any criterion scores\n below 7/10, the sprint goes back to the generator with adversarial feedback. Stops on sprint\n failure after max retries.\n NOT for: Bug fixes, PR reviews, refactoring existing code, simple one-off tasks.\n\n Based on Anthropic's harness design article for long-running application development.\n Separates planning, building, and evaluation into distinct roles with adversarial tension.\nprovider: claude\nmodel: sonnet\n\nnodes:\n # ─── Phase 1: Planning ───────────────────────────────────────────────\n - id: plan\n prompt: |\n You are a product planning expert. Your job is to take a short user prompt and expand it\n into a comprehensive product specification.\n\n ## User Request\n\n $ARGUMENTS\n\n ## Your Task\n\n Write a comprehensive product specification to the file `$ARTIFACTS_DIR/spec.md` using the Write tool.\n\n The spec MUST include ALL of the following sections:\n\n ### 1. Product Overview\n What the product does, who it's for, core value proposition.\n\n ### 2. Tech Stack\n Specific technologies, frameworks, and libraries. Be opinionated — pick concrete choices,\n not \"a modern framework.\" Include exact package names and versions where relevant.\n\n ### 3. Design Language\n Visual style, specific color hex codes, typography choices, component patterns, spacing system.\n\n ### 4. Feature List\n Every feature organized by priority. Be exhaustive.\n\n ### 5. Sprint Plan\n Features broken into 3-6 sprints, ordered by dependency and importance:\n - **Sprint 1** should establish the foundation (project setup, core data models, basic UI shell)\n - Each subsequent sprint builds on the previous\n - Label each sprint clearly: \"Sprint 1: Foundation\", \"Sprint 2: Core Features\", etc.\n - List the specific features/deliverables for each sprint\n\n Be specific and opinionated. The more concrete the spec (exact API paths, specific color codes,\n named libraries), the better the generator can build and the evaluator can test.\n\n IMPORTANT: Write the spec to `$ARTIFACTS_DIR/spec.md` using the Write tool. Do NOT just output\n it as conversation text.\n allowed_tools: [Read, Write, Glob, Grep]\n\n # ─── Phase 2: Workspace Initialization ───────────────────────────────\n - id: init-workspace\n depends_on: [plan]\n bash: |\n ARTIFACTS=\"$ARTIFACTS_DIR\"\n\n # Create directory structure for harness communication\n mkdir -p \"$ARTIFACTS/contracts\"\n mkdir -p \"$ARTIFACTS/feedback\"\n mkdir -p \"$ARTIFACTS/app\"\n\n # Initialize isolated git repo in app directory\n cd \"$ARTIFACTS/app\"\n git init -q\n git commit --allow-empty -m \"Initial commit: adversarial-dev workspace\" -q\n\n # Extract sprint count from spec (find highest \"Sprint N\" reference)\n SPEC=\"$ARTIFACTS/spec.md\"\n SPRINT_COUNT=3\n if [ -f \"$SPEC\" ]; then\n FOUND=$(grep -ioE 'sprint\\s+[0-9]+' \"$SPEC\" | grep -oE '[0-9]+' | sort -n | tail -1)\n if [ -n \"$FOUND\" ] && [ \"$FOUND\" -ge 1 ] 2>/dev/null; then\n SPRINT_COUNT=$FOUND\n fi\n if [ \"$SPRINT_COUNT\" -gt 10 ]; then\n SPRINT_COUNT=10\n fi\n fi\n\n # Write initial state machine file\n cat > \"$ARTIFACTS/state.json\" << 'STATEEOF'\n {\n \"phase\": \"negotiating\",\n \"sprint\": 1,\n \"totalSprints\": SPRINT_COUNT_PLACEHOLDER,\n \"retry\": 0,\n \"maxRetries\": 3,\n \"passThreshold\": 7,\n \"completedSprints\": [],\n \"status\": \"running\"\n }\n STATEEOF\n STATE_TMP=\"$ARTIFACTS/state.json.tmp\"\n sed \"s/SPRINT_COUNT_PLACEHOLDER/$SPRINT_COUNT/\" \"$ARTIFACTS/state.json\" > \"$STATE_TMP\"\n mv \"$STATE_TMP\" \"$ARTIFACTS/state.json\"\n\n echo \"{\\\"totalSprints\\\": $SPRINT_COUNT, \\\"appDir\\\": \\\"$ARTIFACTS/app\\\", \\\"artifactsDir\\\": \\\"$ARTIFACTS\\\"}\"\n timeout: 30000\n\n # ─── Phase 3: Adversarial Sprint Loop ────────────────────────────────\n #\n # State machine driven by $ARTIFACTS_DIR/state.json\n # Each iteration plays ONE role: negotiator, generator, or evaluator\n # fresh_context ensures genuine separation between roles\n #\n - id: adversarial-sprint\n depends_on: [init-workspace]\n idle_timeout: 600000\n model: opus[1m]\n loop:\n prompt: |\n # Adversarial Development — Sprint Loop\n\n You are part of a GAN-inspired adversarial development system with three distinct roles.\n Each iteration you play ONE role, determined by the current phase in the state file.\n\n ## FIRST: Read State\n\n Read `$ARTIFACTS_DIR/state.json` to determine:\n - `phase` — which role you play this iteration\n - `sprint` — current sprint number\n - `totalSprints` — how many sprints total\n - `retry` — current retry attempt (0 = first try)\n - `maxRetries` — max retries before hard failure (default 3)\n - `passThreshold` — minimum score to pass (default 7)\n\n Then read `$ARTIFACTS_DIR/spec.md` for product context.\n\n ## Directory Layout\n\n - App source code: `$ARTIFACTS_DIR/app/`\n - Sprint contracts: `$ARTIFACTS_DIR/contracts/sprint-{N}.json`\n - Evaluation feedback: `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`\n - State machine: `$ARTIFACTS_DIR/state.json`\n\n ---\n\n ## ROLE: CONTRACT NEGOTIATOR (phase = \"negotiating\")\n\n You negotiate the success criteria for the current sprint. Play BOTH sides sequentially:\n\n **Step 1 — Generator's Proposal:**\n Read the spec carefully. Identify what Sprint {N} should deliver based on the sprint plan.\n Propose a sprint contract with 5-15 specific, testable criteria.\n\n Each criterion MUST be concrete and verifiable. Examples:\n - GOOD: \"GET /api/tasks returns 200 with JSON array; each item has id (number), title (string), status (string), createdAt (ISO date)\"\n - GOOD: \"Clicking the Add Task button opens a modal with title input, priority dropdown (low/medium/high), and due date picker\"\n - BAD: \"The API works well\"\n - BAD: \"Tasks can be managed\"\n\n **Step 2 — Evaluator's Tightening:**\n Now review your proposal as an adversary. For EACH criterion ask:\n - Is it specific enough to test programmatically?\n - What edge cases are missing? (empty inputs, special characters, concurrent requests)\n - Is the bar high enough, or would sloppy code pass?\n\n Tighten vague criteria. Add edge cases. Raise the bar.\n\n **Write the final contract** to `$ARTIFACTS_DIR/contracts/sprint-{N}.json`:\n ```json\n {\n \"sprintNumber\": ,\n \"features\": [\"feature1\", \"feature2\", ...],\n \"criteria\": [\n {\n \"name\": \"short-kebab-name\",\n \"description\": \"Specific, testable description of what must be true\",\n \"threshold\": 7\n }\n ]\n }\n ```\n\n **Update state.json**: Set `\"phase\": \"building\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: GENERATOR (phase = \"building\")\n\n You are a software engineer. Build features that MUST survive an adversarial evaluator\n who will actively try to break your code.\n\n **Read these files:**\n 1. `$ARTIFACTS_DIR/spec.md` — full product spec (design language, tech stack, all features)\n 2. `$ARTIFACTS_DIR/contracts/sprint-{N}.json` — the contract you must satisfy\n 3. If `retry` > 0: read `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R-1}.json` for the\n evaluator's previous feedback\n\n **If this is a RETRY (retry > 0):**\n Read the feedback CAREFULLY. Every failed criterion must be addressed.\n - If scores were close (5-6) and trending up: REFINE your approach\n - If scores were low (1-4) or the approach is fundamentally broken: PIVOT to a new strategy\n - Address EVERY feedback item — the evaluator WILL check\n - Re-verify each fix by running the code before committing\n\n **Build rules:**\n - All code goes in `$ARTIFACTS_DIR/app/`\n - Build ONE feature at a time, verify it works, then commit:\n ```bash\n cd $ARTIFACTS_DIR/app && git add -A && git commit -m \"feat: description of what was built\"\n ```\n - Install dependencies as needed (npm/bun/pip/etc)\n - Test your code — start the server, hit the endpoints, verify the UI renders\n - Think about what the evaluator will attack: edge cases, error handling, input validation\n - Build defensively — the evaluator's job is to break you\n\n **Update state.json**: Set `\"phase\": \"evaluating\"`. Keep all other fields unchanged.\n\n ---\n\n ## ROLE: EVALUATOR (phase = \"evaluating\")\n\n You are an ADVERSARIAL QA agent. Your mandate is to BREAK what the generator built.\n You are not helpful. You are not generous. You are an attacker.\n\n **CRITICAL CONSTRAINTS:**\n - You are READ-ONLY for source code. NEVER use Write or Edit on files in `$ARTIFACTS_DIR/app/`.\n - You MAY use Bash to run the app, curl endpoints, run test scripts, check behavior.\n - You MUST kill any background processes (servers, watchers) you start BEFORE finishing.\n Use: `pkill -f \"node\\|bun\\|python\\|npm\" 2>/dev/null || true`\n - You MUST score EVERY criterion in the contract. No skipping.\n\n **Scoring guidelines:**\n - **9-10**: Exceptional. Works perfectly including edge cases the contract didn't mention.\n - **7-8**: Solid. Meets the criterion as stated. Minor polish issues at most.\n - **5-6**: Partial. Core functionality exists but fails important edge cases or has bugs.\n - **3-4**: Weak. Barely functional. Major gaps.\n - **1-2**: Broken. Does not work or is not implemented.\n\n Do NOT grade on a curve. Do NOT give benefit of the doubt. A 7 means \"genuinely meets the bar.\"\n If something is broken, say it's broken.\n\n **Read**: `$ARTIFACTS_DIR/contracts/sprint-{N}.json` for the criteria.\n\n **For each criterion:**\n 1. Read the relevant source code\n 2. Run the application (start server, test endpoints, check rendered UI)\n 3. Try to BREAK it — invalid inputs, missing fields, edge cases, error handling gaps\n 4. Score it honestly\n\n **Write evaluation** to `$ARTIFACTS_DIR/feedback/sprint-{N}-round-{R}.json`:\n ```json\n {\n \"passed\": = passThreshold, false otherwise>,\n \"scores\": {\n \"criterion-name\": ,\n ...\n },\n \"feedback\": [\n {\n \"criterion\": \"criterion-name\",\n \"score\": <1-10>,\n \"details\": \"Specific findings. Include file paths, line numbers, exact error messages, curl commands that failed.\"\n }\n ],\n \"overallSummary\": \"What worked, what didn't, what the generator must fix.\"\n }\n ```\n\n **Determine pass/fail** — `passed` is `true` ONLY if every single score >= `passThreshold`.\n\n **Update state.json based on result:**\n\n **If PASSED (all criteria >= threshold):**\n - Add current sprint number to `completedSprints` array\n - If `sprint` < `totalSprints`: set `\"phase\": \"negotiating\"`, increment `\"sprint\"` by 1, set `\"retry\": 0`\n - If `sprint` == `totalSprints`: set `\"phase\": \"complete\"`, set `\"status\": \"complete\"`\n\n **If FAILED:**\n - If `retry` < `maxRetries`: set `\"phase\": \"building\"`, increment `\"retry\"` by 1\n - If `retry` >= `maxRetries`: set `\"phase\": \"failed\"`, set `\"status\": \"failed\"`\n\n **IMPORTANT**: Kill all background processes before finishing:\n ```bash\n pkill -f \"node|bun|python|npm|next|vite|webpack\" 2>/dev/null || true\n ```\n\n ---\n\n ## COMPLETION\n\n After updating state.json, check the `status` field:\n - If `\"status\": \"complete\"` → all sprints passed! Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"failed\"` → sprint failed after max retries. Output: `ALL_SPRINTS_COMPLETE`\n - If `\"status\": \"running\"` → more work to do. Do NOT output any completion signal.\n\n until: ALL_SPRINTS_COMPLETE\n max_iterations: 60\n fresh_context: true\n until_bash: |\n grep -qE '\"status\"\\s*:\\s*\"(complete|failed)\"' \"$ARTIFACTS_DIR/state.json\"\n\n # ─── Phase 4: Report ─────────────────────────────────────────────────\n - id: report\n depends_on: [adversarial-sprint]\n trigger_rule: all_done\n context: fresh\n model: haiku\n prompt: |\n You are a project reporter. Generate a comprehensive summary of the adversarial development run.\n\n ## Read ALL of these files:\n 1. `$ARTIFACTS_DIR/state.json` — final state (tells you success/failure, sprint count)\n 2. `$ARTIFACTS_DIR/spec.md` — the original product spec\n 3. All files in `$ARTIFACTS_DIR/contracts/` — sprint contracts (use Glob to find them)\n 4. All files in `$ARTIFACTS_DIR/feedback/` — evaluation results (use Glob to find them)\n\n ## Generate a report covering:\n\n ### Build Summary\n - What application was built (from the spec)\n - Final status: did all sprints pass or did it fail? On which sprint?\n - Total sprints completed vs planned\n\n ### Per-Sprint Breakdown\n For each sprint that was attempted:\n - What the contract required (features + key criteria)\n - How many attempts were needed (retry count)\n - Final scores for each criterion\n - Key feedback that drove retries and improvements\n\n ### Quality Metrics\n - Average score across all final-round criteria\n - Which criteria required the most retries\n - Where the adversarial evaluator pushed quality the highest\n\n ### How to Run\n - The application code lives in: `$ARTIFACTS_DIR/app/`\n - Include the tech stack and how to start the app (from the spec)\n - Include any setup steps (install deps, env vars, etc.)\n\n Write this report to `$ARTIFACTS_DIR/report.md` AND output it as your response so the user\n sees it directly.\n allowed_tools: [Read, Write, Glob, Grep]\n", "archon-architect": "name: archon-architect\ndescription: |\n Use when: User wants an architectural sweep, complexity reduction, or codebase health improvement.\n Triggers: \"architect\", \"simplify codebase\", \"reduce complexity\", \"architectural sweep\",\n \"clean up architecture\", \"codebase health\", \"fix architecture\".\n Does: Scans codebase metrics -> analyzes architecture with principled lens -> plans targeted\n simplifications -> executes fixes with self-review loops (hooks) -> validates -> creates PR.\n NOT for: Single-file fixes, feature development, bug fixes, PR reviews.\n\n DAG workflow showcasing per-node hooks:\n - PostToolUse hooks create organic quality loops (lint after write, self-review)\n - PreToolUse hooks inject architectural principles before changes\n - Different nodes have different trust levels and steering\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: MEASURE\n # Gather raw metrics — file sizes, complexity hotspots, dependency fan-out\n # ═══════════════════════════════════════════════════════════════\n\n - id: scan-metrics\n bash: |\n echo \"=== FILE SIZE HOTSPOTS (top 30 largest source files) ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' \\\n -exec wc -l {} + 2>/dev/null | sort -rn | head -30\n\n echo \"\"\n echo \"=== IMPORT FAN-OUT (files with most imports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*'); do\n count=$(grep -c \"^import \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 8 ]; then\n echo \"$count imports: $f\"\n fi\n done | sort -rn | head -20\n\n echo \"\"\n echo \"=== EXPORT FAN-OUT (files with most exports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*'); do\n count=$(grep -c \"^export \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count exports: $f\"\n fi\n done | sort -rn | head -20\n\n echo \"\"\n echo \"=== FUNCTION LENGTH HOTSPOTS (functions over 50 lines) ===\"\n grep -rn \"^\\(export \\)\\?\\(async \\)\\?function \\|=> {$\" \\\n --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null \\\n | head -30\n\n echo \"\"\n echo \"=== TYPE SAFETY GAPS ===\"\n echo \"any usage:\"\n grep -rn \": any\\b\\|as any\\b\" --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | wc -l\n echo \"eslint-disable comments:\"\n grep -rn \"eslint-disable\" --include='*.ts' --exclude-dir=node_modules --exclude-dir=.git --exclude-dir=dist . 2>/dev/null | wc -l\n timeout: 60000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: ANALYZE\n # Read through hotspots with an architectural lens\n # Hooks inject assessment criteria after every file read\n # ═══════════════════════════════════════════════════════════════\n\n - id: analyze\n prompt: |\n You are a senior software architect performing a codebase health assessment.\n\n ## Codebase Metrics\n\n $scan-metrics.output\n\n ## User Focus\n\n $ARGUMENTS\n\n ## Instructions\n\n 1. Read the top 10-15 files flagged by the metrics above (largest, most imports, most exports)\n 2. For each file, assess the criteria injected after you read it (you'll see them)\n 3. Build a running list of architectural concerns\n 4. Focus on:\n - Modules doing too many things (SRP violations)\n - Abstractions that don't earn their complexity\n - Duplicated patterns that should be consolidated (Rule of Three)\n - God files or god functions\n - Leaky abstractions or tight coupling between layers\n - Dead code or unused exports\n 5. Do NOT suggest changes yet — only diagnose\n\n ## Output\n\n Write a structured assessment to $ARTIFACTS_DIR/architecture-assessment.md with:\n - Executive summary (3-5 sentences)\n - Top findings ranked by impact\n - For each finding: file, what's wrong, why it matters, estimated effort\n depends_on: [scan-metrics]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n hooks:\n PostToolUse:\n - matcher: \"Read\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n For the file you just read, assess:\n (1) Single responsibility — does this module do exactly one thing?\n (2) Cognitive load — could a new team member understand this in 5 minutes?\n (3) Abstraction value — does every abstraction earn its complexity, or is it premature?\n (4) Dependency direction — does this file depend on things at its own level or below, not above?\n Add any concerns to your running list. Be specific — cite line ranges and function names.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: PLAN\n # Prioritize and scope the changes — pure reasoning, no tools\n # ═══════════════════════════════════════════════════════════════\n\n - id: plan\n prompt: |\n You are planning targeted architectural improvements.\n\n ## Assessment\n\n $analyze.output\n\n ## Principles\n\n - KISS: prefer straightforward over clever\n - YAGNI: remove speculative abstractions\n - Rule of Three: only extract when a pattern appears 3+ times\n - Each change must be independently revertable\n - Do NOT mix refactoring with behavior changes\n - Scope to what can be done safely in one pass (max 5-7 files)\n\n ## Instructions\n\n 1. From the assessment, select the top 3-5 highest-impact, lowest-risk improvements\n 2. For each, write a precise plan: which file, what to change, why\n 3. Order them so each change is independent (no cascading dependencies between changes)\n 4. Estimate blast radius — how many other files are affected\n\n ## Output\n\n Write the plan as a numbered list. Be specific about exactly what code to change.\n Keep it concise — the implement node will follow this literally.\n depends_on: [analyze]\n allowed_tools: [Read]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: EXECUTE\n # Make the changes with hooks creating quality feedback loops\n # ═══════════════════════════════════════════════════════════════\n\n - id: simplify\n prompt: |\n You are implementing targeted architectural simplifications.\n\n ## Plan\n\n $plan.output\n\n ## Rules\n\n - Follow the plan exactly — do not add extra improvements you notice along the way\n - Each change must preserve existing behavior (refactor only, no feature changes)\n - After each file edit, you'll be prompted to validate — follow those instructions\n - If a change turns out to be harder than expected, skip it and move on\n - Commit each logical change separately with a clear commit message\n\n ## Instructions\n\n 1. Work through the plan items in order\n 2. For each item: read the file, make the change, follow the post-edit checklist\n 3. After all changes, do a final `git diff --stat` to verify scope\n depends_on: [plan]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n Before writing: Is this file in your plan? If not, explain why you're\n touching it. Check how many files import from this module — changes to\n widely-imported modules need extra scrutiny.\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just modified a file. Do these things NOW before moving on:\n 1. Run the type checker to verify your change compiles\n 2. Re-read the file you changed — is it ACTUALLY simpler, or did you just move complexity around?\n 3. State in ONE sentence why this change reduces complexity. If you cannot justify it, revert it.\n - matcher: \"Read\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Before modifying this file, consider: will your change reduce or increase\n the number of concepts a reader needs to hold in their head?\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Check the exit code. If the command failed, diagnose the root cause\n before attempting a fix. Do not blindly retry.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE\n # Run full validation suite — bash only, cannot edit to \"fix\" failures\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n bash: |\n echo \"=== TYPE CHECK ===\"\n bun run type-check 2>&1\n TC_EXIT=$?\n\n echo \"\"\n echo \"=== LINT ===\"\n bun run lint 2>&1\n LINT_EXIT=$?\n\n echo \"\"\n echo \"=== TESTS ===\"\n bun run test 2>&1\n TEST_EXIT=$?\n\n echo \"\"\n echo \"=== RESULTS ===\"\n echo \"Type check: $([ $TC_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Lint: $([ $LINT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Tests: $([ $TEST_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n\n # Always exit 0 so downstream nodes can read output and decide\n if [ $TC_EXIT -eq 0 ] && [ $LINT_EXIT -eq 0 ] && [ $TEST_EXIT -eq 0 ]; then\n echo \"VALIDATION_STATUS: PASS\"\n else\n echo \"VALIDATION_STATUS: FAIL\"\n fi\n depends_on: [simplify]\n timeout: 300000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: FIX VALIDATION FAILURES (if any)\n # Only runs if validate failed — focused fix with same quality hooks\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-failures\n prompt: |\n Review the validation output below.\n\n ## Validation Output\n\n $validate.output\n\n ## Instructions\n\n If the output ends with \"VALIDATION_STATUS: PASS\", respond with\n \"All checks passed — no fixes needed.\" and stop.\n\n If there are failures:\n\n 1. Read the validation failures carefully\n 2. Fix ONLY what's broken — do not make additional improvements\n 3. If a fix requires changing behavior (not just fixing a type/lint error),\n revert the original change instead\n 4. Run the specific failing check after each fix to confirm it passes\n 5. After all fixes, run the full validation suite: `bun run validate`\n depends_on: [validate]\n context: fresh\n hooks:\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just made a fix. Run the specific failing validation check NOW\n to verify your fix works. Do not batch fixes — verify each one.\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n You are fixing validation failures only. Do not make any changes\n beyond what's needed to pass the failing checks. If in doubt, revert\n the original change that caused the failure.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: CREATE PR\n # Hooks ensure this node only does git operations\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a pull request for the architectural improvements.\n\n ## Context\n\n - Architecture assessment: $analyze.output\n - Plan: $plan.output\n - Validation: $validate.output\n\n ## Instructions\n\n 1. Stage all changes and create a single commit (or verify existing commits)\n 2. Push the branch: `git push -u origin HEAD`\n 3. Check if a PR already exists: `gh pr list --head $(git branch --show-current)`\n 4. Create the PR with:\n - Title: concise description of what was simplified (under 70 chars)\n - Body: use the format below\n 5. Save the PR URL to `$ARTIFACTS_DIR/.pr-url`\n\n ## PR Body Format\n\n ```markdown\n ## Architectural Sweep\n\n **Focus**: $ARGUMENTS\n\n ### Assessment\n\n [3-5 sentence summary from the architecture assessment]\n\n ### Changes\n\n [For each change: what file, what was simplified, why]\n\n ### Validation\n\n - [x] Type check passes\n - [x] Lint passes\n - [x] Tests pass\n - [x] Each change preserves existing behavior\n ```\n depends_on: [fix-failures]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n permissionDecision: deny\n permissionDecisionReason: \"PR creation node — do not modify source files. Use only git and gh commands.\"\n PostToolUse:\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Verify this command succeeded. If git push or gh pr create failed,\n read the error message carefully before retrying.\n", "archon-assist": "name: archon-assist\ndescription: |\n Use when: No other workflow matches the request.\n Handles: Questions, debugging, exploration, one-off tasks, explanations, CI failures, general help.\n Capability: Full Claude Code agent with all tools available.\n Note: Will inform user when assist mode is used for tracking.\n\nnodes:\n - id: assist\n command: archon-assist\n", "archon-comprehensive-pr-review": "name: archon-comprehensive-pr-review\ndescription: |\n Use when: User wants a comprehensive code review of a pull request with automatic fixes.\n Triggers: \"review this PR\", \"review PR #123\", \"comprehensive review\", \"full PR review\",\n \"review and fix\", \"check this PR\", \"code review\".\n Does: Syncs PR with main (rebase if needed) -> runs 5 specialized review agents in parallel ->\n synthesizes findings -> auto-fixes CRITICAL/HIGH issues -> reports remaining issues.\n NOT for: Quick questions about a PR, checking CI status, simple \"what changed\" queries.\n\n This workflow produces artifacts in $ARTIFACTS_DIR/../reviews/pr-{number}/ and posts\n a comprehensive review comment to the GitHub PR.\n\nnodes:\n - id: scope\n command: archon-pr-review-scope\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [scope]\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n", "archon-create-issue": "name: archon-create-issue\ndescription: |\n Use when: User wants to report a bug or problem as a GitHub issue with automated reproduction.\n Triggers: \"create issue\", \"file a bug\", \"report this bug\", \"open an issue for\",\n \"create github issue\", \"report issue\", \"log this bug\".\n Does: Classifies problem area (haiku) -> gathers context in parallel (templates, git state, duplicates) ->\n investigates relevant code -> reproduces the issue using area-specific tools (agent-browser, CLI, DB queries) ->\n gates on reproduction success -> creates issue with full evidence OR reports back if cannot reproduce.\n NOT for: Feature requests, enhancements, or non-bug work. Only for bugs/problems.\n\n Reproduction gating: If the issue cannot be reproduced, the workflow does NOT create an issue.\n Instead, it reports what was tried and suggests next steps to the user.\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: CLASSIFY — Haiku classification of user's problem\n # ═══════════════════════════════════════════════════════════════\n\n - id: classify\n prompt: |\n You are a problem classifier for the Archon codebase. Analyze the user's\n description and determine the issue type and which area of the system is affected.\n\n ## User's Description\n $ARGUMENTS\n\n ## Area Definitions\n | Area | Packages | Indicators |\n |------|----------|------------|\n | web-ui | @archon/web, @archon/server (routes, web adapter) | UI rendering, SSE streaming, React components, browser behavior |\n | api-server | @archon/server (routes, middleware) | HTTP endpoints, response codes, request handling |\n | cli | @archon/cli | CLI commands, workflow invocation from terminal, output formatting |\n | isolation | @archon/isolation, @archon/git | Worktrees, branch operations, cleanup, environment lifecycle |\n | workflows | @archon/workflows | YAML parsing, DAG execution, variable substitution, node types |\n | database | @archon/core (db/) | SQLite/PostgreSQL queries, schema, data integrity, migrations |\n | adapters | @archon/adapters | Slack/Telegram/GitHub/Discord message handling, auth, polling |\n | core | @archon/core (orchestrator, handlers, clients) | Message routing, session management, AI client streaming |\n | other | Any package not covered above | Cross-cutting concerns, build tooling, config, unknown area |\n\n ## Classification Rules\n - Choose the MOST SPECIFIC area. \"SSE disconnects\" = web-ui (not api-server).\n - If ambiguous between two areas, pick the one closer to the user-facing symptom.\n - Use \"other\" only when the problem genuinely doesn't fit any specific area.\n - needs_server: Set to \"true\" if reproducing requires a running Archon server.\n Typically true for: web-ui, api-server, core, adapters.\n Typically false for: cli, isolation, workflows, database.\n For \"other\": use your judgment based on the description.\n - repro_hint: Extract the user's reproduction steps into a concise instruction.\n If no explicit steps given, infer the most likely way to trigger the issue.\n\n Provide reasoning for your classification.\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n type:\n type: string\n enum: [\"bug\", \"regression\", \"crash\", \"performance\", \"configuration\"]\n area:\n type: string\n enum: [\"web-ui\", \"api-server\", \"cli\", \"isolation\", \"workflows\", \"database\", \"adapters\", \"core\", \"other\"]\n title:\n type: string\n keywords:\n type: string\n repro_hint:\n type: string\n needs_server:\n type: string\n enum: [\"true\", \"false\"]\n required: [type, area, title, keywords, repro_hint, needs_server]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: PARALLEL CONTEXT GATHERING\n # ═══════════════════════════════════════════════════════════════\n\n - id: fetch-template\n bash: |\n # Search for GitHub issue templates in standard locations\n TEMPLATES_FOUND=0\n\n # Check for issue template directory (YAML-based templates)\n if [ -d \".github/ISSUE_TEMPLATE\" ]; then\n echo \"=== Issue Templates Found ===\"\n for f in .github/ISSUE_TEMPLATE/*.md .github/ISSUE_TEMPLATE/*.yaml .github/ISSUE_TEMPLATE/*.yml; do\n if [ -f \"$f\" ]; then\n TEMPLATES_FOUND=$((TEMPLATES_FOUND + 1))\n echo \"--- Template: $f ---\"\n cat \"$f\"\n echo \"\"\n fi\n done\n fi\n\n # Check for single issue template\n for f in .github/ISSUE_TEMPLATE.md docs/ISSUE_TEMPLATE.md; do\n if [ -f \"$f\" ]; then\n TEMPLATES_FOUND=$((TEMPLATES_FOUND + 1))\n echo \"--- Template: $f ---\"\n cat \"$f\"\n fi\n done\n\n if [ \"$TEMPLATES_FOUND\" -eq 0 ]; then\n echo \"No issue templates found — will use standard format\"\n fi\n depends_on: [classify]\n\n - id: git-context\n bash: |\n echo \"=== Branch ===\"\n git branch --show-current\n\n echo \"=== Recent Commits (last 15) ===\"\n git log --oneline -15\n\n echo \"=== Working Tree Status ===\"\n git status --short\n\n echo \"=== Modified Files (last 3 commits) ===\"\n git diff --name-only HEAD~3..HEAD 2>/dev/null || echo \"(fewer than 3 commits)\"\n\n echo \"=== Environment ===\"\n echo \"Node: $(node --version 2>/dev/null || echo 'N/A')\"\n echo \"Bun: $(bun --version 2>/dev/null || echo 'N/A')\"\n echo \"OS: $(uname -s 2>/dev/null || echo 'Windows') $(uname -r 2>/dev/null || ver 2>/dev/null || echo '')\"\n echo \"Platform: $(uname -m 2>/dev/null || echo 'unknown')\"\n depends_on: [classify]\n\n - id: dedup-check\n bash: |\n KEYWORDS=$classify.output.keywords\n echo \"=== Searching for duplicates: $KEYWORDS ===\"\n\n echo \"--- Open Issues ---\"\n gh issue list --search \"$KEYWORDS\" --state open --limit 5 --json number,title,url,labels 2>/dev/null || echo \"No open matches\"\n\n echo \"--- Recently Closed ---\"\n gh issue list --search \"$KEYWORDS\" --state closed --limit 3 --json number,title,url,labels 2>/dev/null || echo \"No closed matches\"\n depends_on: [classify]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: INVESTIGATE — Search codebase for related code\n # ═══════════════════════════════════════════════════════════════\n\n - id: investigate\n prompt: |\n You are a codebase investigator. Search for code related to the reported problem.\n\n ## Problem\n - **Area**: $classify.output.area\n - **Type**: $classify.output.type\n - **Title**: $classify.output.title\n - **Reproduction hint**: $classify.output.repro_hint\n\n ## Git Context\n $git-context.output\n\n ## Instructions\n\n 1. Based on the area, search the relevant packages:\n - web-ui: `packages/web/src/`, `packages/server/src/adapters/web/`, `packages/server/src/routes/`\n - api-server: `packages/server/src/routes/`, `packages/server/src/`\n - cli: `packages/cli/src/`\n - isolation: `packages/isolation/src/`, `packages/git/src/`\n - workflows: `packages/workflows/src/`\n - database: `packages/core/src/db/`\n - adapters: `packages/adapters/src/`\n - core: `packages/core/src/orchestrator/`, `packages/core/src/handlers/`\n - other: search broadly based on keywords — check `packages/*/src/`, config files, build scripts\n\n 2. Find: entry points, error handling paths, related type definitions, recent changes\n to the affected area (check git log for the specific files).\n\n 3. Write your findings to `$ARTIFACTS_DIR/issue-context.md` with this structure:\n ```\n # Codebase Investigation\n ## Relevant Files\n - `file:line` — description of what's there\n ## Error Handling\n - How errors are currently handled in this area\n ## Recent Changes\n - Any recent commits touching this code\n ## Suspected Root Cause\n - Based on code analysis, where the bug likely is\n ```\n\n Be thorough but focused. Only include files directly relevant to the reported problem.\n depends_on: [classify, git-context]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: REPRODUCE — Area-specific issue reproduction\n # ═══════════════════════════════════════════════════════════════\n\n - id: start-server\n bash: |\n # Allocate a free port using Bun's OS assignment\n PORT=$(bun -e \"const s = Bun.serve({port: 0, fetch: () => new Response('')}); console.log(s.port); s.stop()\")\n echo \"$PORT\" > \"$ARTIFACTS_DIR/.server-port\"\n\n # Start dev server in background\n PORT=$PORT bun run dev:server > \"$ARTIFACTS_DIR/.server-log\" 2>&1 &\n SERVER_PID=$!\n echo \"$SERVER_PID\" > \"$ARTIFACTS_DIR/.server-pid\"\n\n # Wait for server to be ready (up to 30s)\n for i in $(seq 1 30); do\n if curl -s \"http://localhost:$PORT/api/health\" > /dev/null 2>&1; then\n echo \"Server ready on port $PORT (PID: $SERVER_PID)\"\n exit 0\n fi\n sleep 1\n done\n\n echo \"WARNING: Server may not be fully ready after 30s (port $PORT, PID $SERVER_PID)\"\n echo \"Continuing anyway — reproduce node will handle connection errors\"\n depends_on: [classify]\n when: \"$classify.output.needs_server == 'true'\"\n timeout: 45000\n\n - id: reproduce\n prompt: |\n You are an issue reproduction specialist. Your job is to reproduce the reported\n problem and capture evidence (screenshots, command output, error messages).\n\n ## Problem Context\n - **Area**: $classify.output.area\n - **Type**: $classify.output.type\n - **Title**: $classify.output.title\n - **Reproduction hint**: $classify.output.repro_hint\n\n ## Investigation Findings\n $investigate.output\n\n ## Server Info\n If a server was started, read the port from: `cat \"$ARTIFACTS_DIR/.server-port\"`\n If the file doesn't exist, no server is running (area doesn't need one).\n\n ---\n\n ## Reproduction Playbooks\n\n Follow the playbook matching the area. Capture ALL evidence to `$ARTIFACTS_DIR/`.\n\n ### web-ui\n 1. Read the server port: `PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" | tr -d '\\n')`\n 2. Open the app: `agent-browser open http://localhost:$PORT`\n 3. Take a baseline screenshot: `agent-browser screenshot \"$ARTIFACTS_DIR/repro-01-baseline.png\"`\n 4. Get interactive elements: `agent-browser snapshot -i`\n 5. Navigate to the area related to the issue (use @refs from snapshot)\n 6. Perform the actions described in the repro_hint\n 7. Screenshot each significant state: `agent-browser screenshot \"$ARTIFACTS_DIR/repro-02-action.png\"`\n 8. If an error appears, capture it: `agent-browser get text @errorElement`\n 9. Check browser console: `agent-browser console`\n 10. Check for JS errors: `agent-browser errors`\n 11. Final screenshot: `agent-browser screenshot \"$ARTIFACTS_DIR/repro-03-result.png\"`\n 12. Close browser: `agent-browser close`\n\n ### api-server\n 1. Read the server port: `PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" | tr -d '\\n')`\n 2. Create a test conversation: `curl -s -X POST http://localhost:$PORT/api/conversations -H \"Content-Type: application/json\" -d '{}'`\n 3. Hit the problematic endpoint based on the repro_hint\n 4. Capture response codes and bodies: `curl -s -w \"\\nHTTP_CODE: %{http_code}\\n\" ...`\n 5. For SSE issues: `curl -s -N http://localhost:$PORT/api/stream/` (timeout after 10s)\n 6. Check server logs: `cat \"$ARTIFACTS_DIR/.server-log\" | tail -50`\n 7. Save all curl output to `$ARTIFACTS_DIR/repro-api-responses.txt`\n\n ### cli\n 1. Run the CLI command that should trigger the issue\n 2. Capture stdout and stderr separately:\n `bun run cli > \"$ARTIFACTS_DIR/repro-cli-stdout.txt\" 2> \"$ARTIFACTS_DIR/repro-cli-stderr.txt\"; echo \"EXIT_CODE: $?\" >> \"$ARTIFACTS_DIR/repro-cli-stdout.txt\"`\n 3. If workflow-related: `bun run cli workflow list --json > \"$ARTIFACTS_DIR/repro-workflow-list.json\" 2>&1`\n 4. If the command hangs, use timeout: `timeout 30 bun run cli `\n 5. Check for error messages in output\n\n ### isolation\n 1. Check current state: `bun run cli isolation list > \"$ARTIFACTS_DIR/repro-isolation-list.txt\" 2>&1`\n 2. Check git worktrees: `git worktree list > \"$ARTIFACTS_DIR/repro-worktree-list.txt\"`\n 3. Check branches: `git branch -a > \"$ARTIFACTS_DIR/repro-branches.txt\"`\n 4. Try the operation that should fail (based on repro_hint)\n 5. Capture the error output\n 6. Query isolation DB: `sqlite3 ~/.archon/archon.db \"SELECT * FROM remote_agent_isolation_environments ORDER BY created_at DESC LIMIT 10\" > \"$ARTIFACTS_DIR/repro-isolation-db.txt\" 2>&1`\n\n ### workflows\n 1. List workflows: `bun run cli workflow list --json > \"$ARTIFACTS_DIR/repro-workflow-list.json\" 2>&1`\n 2. If a specific workflow is mentioned, try running it:\n `bun run cli workflow run --no-worktree \"test input\" > \"$ARTIFACTS_DIR/repro-workflow-run.txt\" 2>&1`\n 3. If YAML parsing is the issue, try loading the definition directly\n 4. Check for error messages in execution output\n\n ### database\n 1. Check DB exists: `ls -la ~/.archon/archon.db 2>/dev/null`\n 2. Run targeted queries against affected tables:\n - `sqlite3 ~/.archon/archon.db \".schema \" > \"$ARTIFACTS_DIR/repro-db-schema.txt\"`\n - `sqlite3 ~/.archon/archon.db \"SELECT COUNT(*) FROM
\" > \"$ARTIFACTS_DIR/repro-db-counts.txt\"`\n 3. Check for the specific data condition described in the repro_hint\n 4. If PostgreSQL: use `psql $DATABASE_URL -c \"...\"` instead\n\n ### adapters\n 1. Read the server port: `PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" | tr -d '\\n')`\n 2. Check adapter configuration: look for relevant env vars in `.env`\n 3. Check server startup logs: `cat \"$ARTIFACTS_DIR/.server-log\" | grep -i \"adapter\\|slack\\|telegram\\|github\\|discord\" | head -20`\n 4. If the adapter fails to initialize, capture the error\n 5. Test message routing via web API as a proxy:\n `curl -s -X POST http://localhost:$PORT/api/conversations//message -H \"Content-Type: application/json\" -d '{\"message\":\"/status\"}'`\n\n ### core\n 1. Read the server port: `PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" | tr -d '\\n')`\n 2. Create a conversation: `curl -s -X POST http://localhost:$PORT/api/conversations -H \"Content-Type: application/json\" -d '{}'`\n 3. Send a message that triggers the issue:\n `curl -s -X POST http://localhost:$PORT/api/conversations//message -H \"Content-Type: application/json\" -d '{\"message\":\"\"}'`\n 4. Poll for responses: `curl -s http://localhost:$PORT/api/conversations//messages`\n 5. Check session state in DB: `sqlite3 ~/.archon/archon.db \"SELECT * FROM remote_agent_sessions WHERE conversation_id=''\" 2>/dev/null`\n 6. Check server logs: `cat \"$ARTIFACTS_DIR/.server-log\" | tail -50`\n\n ### other\n 1. Run `bun run validate` to check for any obvious failures — capture output:\n `bun run validate > \"$ARTIFACTS_DIR/repro-validate.txt\" 2>&1; echo \"EXIT_CODE: $?\" >> \"$ARTIFACTS_DIR/repro-validate.txt\"`\n 2. Search the codebase for keywords from the repro_hint:\n - Use Grep/Glob to find related files\n - Check recent git log for relevant changes\n 3. If the description implies a build or config issue:\n - Check `package.json` scripts, `tsconfig.json`, `.env.example`\n - Try running the relevant build/dev command\n 4. If the description implies a runtime issue:\n - Start the server (if `.server-port` file exists) and try to trigger the behavior\n - Check logs for errors\n 5. Document everything you tried, even if nothing reproduces clearly\n\n ---\n\n ## Output\n\n After following the playbook, write your findings to `$ARTIFACTS_DIR/reproduction-results.md`:\n\n ```markdown\n # Reproduction Results\n\n ## Status: [REPRODUCED | NOT_REPRODUCED | PARTIAL]\n\n ## Steps Taken\n 1. [step]\n 2. [step]\n\n ## Expected Behavior\n [what should happen]\n\n ## Actual Behavior\n [what actually happened — or \"could not trigger the reported behavior\"]\n\n ## Evidence Files\n - `$ARTIFACTS_DIR/repro-*.png` — screenshots (if web-ui)\n - `$ARTIFACTS_DIR/repro-*.txt` — command output\n - `$ARTIFACTS_DIR/repro-*.json` — structured data\n\n ## Environment\n [OS, versions, relevant config]\n\n ## Notes\n [any additional observations, suspected root cause refinements]\n ```\n\n CRITICAL: The Status line MUST be exactly one of: REPRODUCED, NOT_REPRODUCED, PARTIAL.\n This value is read by a downstream bash node to decide whether to create the issue.\n\n Even if you cannot fully reproduce the issue, document what you tried\n and what you observed. Partial reproduction is still valuable evidence.\n depends_on: [classify, git-context, investigate, start-server]\n context: fresh\n skills:\n - agent-browser\n trigger_rule: one_success\n idle_timeout: 300000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: CLEANUP + GATE\n # ═══════════════════════════════════════════════════════════════\n\n - id: cleanup-server\n bash: |\n SERVER_PID=$(cat \"$ARTIFACTS_DIR/.server-pid\" 2>/dev/null | tr -d '\\n')\n SERVER_PORT=$(cat \"$ARTIFACTS_DIR/.server-port\" 2>/dev/null | tr -d '\\n')\n\n if [ -z \"$SERVER_PID\" ]; then\n echo \"No server was started — skipping cleanup\"\n exit 0\n fi\n\n echo \"Cleaning up server PID $SERVER_PID on port $SERVER_PORT...\"\n\n # Kill by PID (cross-platform)\n kill \"$SERVER_PID\" 2>/dev/null || taskkill //F //T //PID \"$SERVER_PID\" 2>/dev/null || true\n\n # Kill by port (fallback)\n if [ -n \"$SERVER_PORT\" ]; then\n fuser -k \"$SERVER_PORT/tcp\" 2>/dev/null || true\n lsof -ti:\"$SERVER_PORT\" 2>/dev/null | xargs kill -9 2>/dev/null || true\n netstat -ano 2>/dev/null | grep \":$SERVER_PORT \" | grep LISTENING | awk '{print $5}' | sort -u | while read pid; do\n taskkill //F //T //PID \"$pid\" 2>/dev/null || true\n done\n fi\n\n # Close any agent-browser session\n agent-browser close 2>/dev/null || true\n\n sleep 1\n echo \"Cleanup complete\"\n depends_on: [reproduce]\n trigger_rule: all_done\n\n - id: check-reproduction\n bash: |\n # Read the reproduction status from the results file\n if [ ! -f \"$ARTIFACTS_DIR/reproduction-results.md\" ]; then\n echo \"NOT_REPRODUCED\"\n exit 0\n fi\n\n STATUS=$(grep -oE '(NOT_REPRODUCED|REPRODUCED|PARTIAL)' \"$ARTIFACTS_DIR/reproduction-results.md\" | head -1)\n\n if [ -z \"$STATUS\" ]; then\n echo \"NOT_REPRODUCED\"\n else\n echo \"$STATUS\"\n fi\n depends_on: [cleanup-server]\n trigger_rule: all_done\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: BRANCH ON REPRODUCTION RESULT\n # ═══════════════════════════════════════════════════════════════\n\n - id: report-failure\n prompt: |\n The issue could not be reproduced. Report this to the user with actionable detail.\n\n ## Problem Description\n - **Title**: $classify.output.title\n - **Area**: $classify.output.area\n - **Type**: $classify.output.type\n - **Reproduction hint**: $classify.output.repro_hint\n\n ## What Was Tried\n $reproduce.output\n\n ## Investigation Findings\n $investigate.output\n\n ## Instructions\n\n Report to the user clearly:\n\n 1. **State upfront**: \"Could not reproduce the reported issue. No GitHub issue was created.\"\n\n 2. **Summarize what was tried**: List the specific steps the reproduce node took,\n based on the area playbook. Be concrete — \"Started server on port X, navigated to Y,\n clicked Z — no error appeared.\"\n\n 3. **Share what was found**: Include relevant findings from the investigation\n (code references, recent changes, suspected areas).\n\n 4. **Suggest next steps**:\n - Ask the user to provide more specific reproduction steps\n - Mention any environment-specific factors that might matter\n (OS, browser, database state, specific data conditions)\n - If the investigation found suspicious code, mention it as a lead\n - Suggest running with debug logging: `LOG_LEVEL=debug bun run dev`\n\n 5. **Offer to retry**: \"If you can provide more specific steps, run the workflow\n again with those details.\"\n\n Do NOT create a GitHub issue. The purpose of this node is to communicate back to the\n user so they can provide better information or investigate manually.\n depends_on: [check-reproduction]\n when: \"$check-reproduction.output == 'NOT_REPRODUCED'\"\n context: fresh\n\n - id: draft-issue\n prompt: |\n You are a technical writer drafting a GitHub issue. Assemble all gathered\n context into a clear, well-structured issue body.\n\n ## Classification\n - **Type**: $classify.output.type\n - **Area**: $classify.output.area\n - **Title**: $classify.output.title\n\n ## Issue Template\n If templates were found, use the most appropriate one as the structure:\n $fetch-template.output\n\n ## Duplicate Check Results\n $dedup-check.output\n\n ## Codebase Investigation\n $investigate.output\n\n ## Reproduction Results\n $reproduce.output\n\n ## Instructions\n\n 1. **Check duplicates first**: If the dedup-check found a clearly matching open issue,\n note this prominently at the top. Still draft the issue but add a note suggesting\n it may be a duplicate of #XYZ.\n\n 2. **Use the template** if one was found for bug reports. Fill every section with real data.\n\n 3. **Structure** (if no template):\n ```markdown\n ## Description\n [Clear 1-2 sentence description]\n\n ## Steps to Reproduce\n [Numbered steps from reproduction results]\n\n ## Expected Behavior\n [What should happen]\n\n ## Actual Behavior\n [What actually happened, with evidence]\n\n ## Environment\n - OS: [from git-context]\n - Bun: [version]\n - Node: [version]\n - Branch: [current branch]\n\n ## Relevant Code\n [Key file:line references from investigation]\n\n ## Additional Context\n [Screenshots, logs, database state — reference artifact files]\n ```\n\n 4. **Include reproduction evidence**:\n - If REPRODUCED: include full steps and all evidence\n - If PARTIAL: include what was observed, note incomplete reproduction\n\n 5. **Suggest labels** based on classification:\n - Area label: `area: web`, `area: cli`, `area: workflows`, etc.\n - Type label: `bug`, `regression`, `performance`, etc.\n\n 6. Write the complete issue body to `$ARTIFACTS_DIR/issue-draft.md`\n\n 7. Write a one-line suggested title to `$ARTIFACTS_DIR/.issue-title`\n\n 8. Write suggested labels (comma-separated) to `$ARTIFACTS_DIR/.issue-labels`\n depends_on: [check-reproduction, fetch-template, dedup-check, investigate]\n when: \"$check-reproduction.output != 'NOT_REPRODUCED'\"\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: CREATE ISSUE\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-issue\n prompt: |\n Create the GitHub issue using the drafted content.\n\n ## Instructions\n\n 1. Read the draft: `cat \"$ARTIFACTS_DIR/issue-draft.md\"`\n 2. Read the title: `cat \"$ARTIFACTS_DIR/.issue-title\"`\n 3. Read suggested labels: `cat \"$ARTIFACTS_DIR/.issue-labels\"`\n\n 4. Check which labels actually exist in the repo:\n ```bash\n gh label list --json name -q '.[].name' | head -50\n ```\n Only use labels that exist. Skip any suggested label that doesn't match.\n\n 5. Create the issue:\n ```bash\n gh issue create \\\n --title \"$(cat \"$ARTIFACTS_DIR/.issue-title\")\" \\\n --body-file \"$ARTIFACTS_DIR/issue-draft.md\" \\\n --label \"label1,label2\"\n ```\n\n 6. Capture the result:\n ```bash\n ISSUE_URL=$(gh issue list --limit 1 --json url -q '.[0].url')\n echo \"$ISSUE_URL\" > \"$ARTIFACTS_DIR/.issue-url\"\n ```\n\n 7. Report to the user:\n - Issue URL\n - Title\n - Labels applied\n - Whether duplicates were found\n - Summary of reproduction results (reproduced/partial)\n depends_on: [draft-issue]\n context: fresh\n", - "archon-feature-development": "name: archon-feature-development\ndescription: |\n Use when: Implementing a feature from an existing plan.\n Input: Path to a plan file ($ARTIFACTS_DIR/plan.md) or GitHub issue containing a plan.\n Does: Implements the plan with validation loops -> creates pull request.\n NOT for: Creating plans (plans should be created separately), bug fixes, code reviews.\n\nnodes:\n - id: implement\n command: archon-implement\n model: claude-opus-4-6[1m]\n\n - id: create-pr\n command: archon-create-pr\n depends_on: [implement]\n context: fresh\n", - "archon-fix-github-issue": "name: archon-fix-github-issue\ndescription: |\n Use when: User wants to FIX, RESOLVE, or IMPLEMENT a solution for a GitHub issue.\n Triggers: \"fix this issue\", \"implement issue #123\", \"resolve this bug\", \"fix it\",\n \"fix issue\", \"resolve issue\", \"fix #123\".\n NOT for: Comprehensive multi-agent reviews (use archon-issue-review-full),\n questions about issues, CI failures, PR reviews, general exploration.\n\n DAG workflow that:\n 1. Classifies the issue (bug/feature/enhancement/etc)\n 2. Researches context (web research + codebase exploration via investigate/plan)\n 3. Routes to investigate (bugs) or plan (features) based on classification\n 4. Implements the fix/feature with validation\n 5. Creates a draft PR using the repo's PR template\n 6. Runs smart review (always code review + CLAUDE.md check, conditional additional agents)\n 7. Aggressively self-fixes all findings (tests, docs, error handling)\n 8. Simplifies changed code (implements fixes directly, not just reports)\n 9. Reports results back to the GitHub issue with follow-up suggestions\n\nprovider: claude\nmodel: sonnet\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: FETCH & CLASSIFY\n # ═══════════════════════════════════════════════════════════════\n\n - id: extract-issue-number\n prompt: |\n Find the GitHub issue number for this request.\n\n Request: $ARGUMENTS\n\n Rules:\n - If the message contains an explicit issue number (e.g., \"#709\", \"issue 709\", \"709\"), extract that number.\n - If the message is ambiguous (e.g., \"fix the SQLite timestamp bug\"), use `gh issue list` to search for matching issues and pick the best match.\n\n CRITICAL: Your final output must be ONLY the bare number with no quotes, no markdown, no explanation. Example correct output: 709\n\n - id: fetch-issue\n bash: |\n # Strip quotes, whitespace, markdown backticks from AI output\n ISSUE_NUM=$(echo \"$extract-issue-number.output\" | tr -d \"'\\\"\\`\\n \" | grep -oE '[0-9]+' | head -1)\n if [ -z \"$ISSUE_NUM\" ]; then\n echo \"Failed to extract issue number from: $extract-issue-number.output\" >&2\n exit 1\n fi\n gh issue view \"$ISSUE_NUM\" --json title,body,labels,comments,state,url,author\n depends_on: [extract-issue-number]\n\n - id: classify\n prompt: |\n You are an issue classifier. Analyze the GitHub issue below and determine its type.\n\n ## Issue Content\n\n $fetch-issue.output\n\n ## Classification Rules\n\n | Type | Indicators |\n |------|------------|\n | bug | \"broken\", \"error\", \"crash\", \"doesn't work\", stack traces, regression |\n | feature | \"add\", \"new\", \"support\", \"would be nice\", net-new capability |\n | enhancement | \"improve\", \"better\", \"update existing\", \"extend\", incremental improvement |\n | refactor | \"clean up\", \"simplify\", \"reorganize\", \"restructure\" |\n | chore | \"update deps\", \"upgrade\", \"maintenance\", \"CI/CD\" |\n | documentation | \"docs\", \"readme\", \"clarify\", \"examples\" |\n\n Provide reasoning for your classification.\n depends_on: [fetch-issue]\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n issue_type:\n type: string\n enum: [\"bug\", \"feature\", \"enhancement\", \"refactor\", \"chore\", \"documentation\"]\n title:\n type: string\n reasoning:\n type: string\n required: [issue_type, title, reasoning]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: RESEARCH (parallel with PR template fetch)\n # ═══════════════════════════════════════════════════════════════\n\n - id: web-research\n command: archon-web-research\n depends_on: [classify]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: INVESTIGATE (bugs) / PLAN (features)\n # ═══════════════════════════════════════════════════════════════\n\n - id: investigate\n command: archon-investigate-issue\n depends_on: [classify, web-research]\n when: \"$classify.output.issue_type == 'bug'\"\n context: fresh\n\n - id: plan\n command: archon-create-plan\n depends_on: [classify, web-research]\n when: \"$classify.output.issue_type != 'bug'\"\n context: fresh\n\n # Bridge: ensure investigation.md exists for the implement step\n # archon-fix-issue reads from $ARTIFACTS_DIR/investigation.md\n # archon-create-plan writes to $ARTIFACTS_DIR/plan.md\n # This node copies plan.md → investigation.md when the plan path was taken\n - id: bridge-artifacts\n bash: |\n if [ -f \"$ARTIFACTS_DIR/plan.md\" ] && [ ! -f \"$ARTIFACTS_DIR/investigation.md\" ]; then\n cp \"$ARTIFACTS_DIR/plan.md\" \"$ARTIFACTS_DIR/investigation.md\"\n echo \"Bridged plan.md to investigation.md for implement step\"\n elif [ -f \"$ARTIFACTS_DIR/investigation.md\" ]; then\n echo \"investigation.md exists from investigate step\"\n else\n echo \"WARNING: No investigation.md or plan.md found — implement may fail\"\n fi\n depends_on: [investigate, plan]\n trigger_rule: one_success\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement\n command: archon-fix-issue\n depends_on: [bridge-artifacts]\n context: fresh\n model: claude-opus-4-6[1m]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n command: archon-validate\n depends_on: [implement]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: CREATE DRAFT PR\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a draft pull request for the current branch.\n\n ## Context\n\n - **Issue**: $ARGUMENTS\n - **Classification**: $classify.output\n - **Issue title**: $classify.output.title\n\n ## Instructions\n\n 1. Check git status — ensure all changes are committed. If uncommitted changes exist, stage and commit them.\n 2. Push the branch: `git push -u origin HEAD`\n 3. Read implementation artifacts from `$ARTIFACTS_DIR/` for context:\n - `$ARTIFACTS_DIR/investigation.md` or `$ARTIFACTS_DIR/plan.md`\n - `$ARTIFACTS_DIR/implementation.md`\n - `$ARTIFACTS_DIR/validation.md`\n 4. Check if a PR already exists for this branch: `gh pr list --head $(git branch --show-current)`\n - If PR exists, skip creation and capture its number\n 5. Look for the project's PR template at `.github/pull_request_template.md`, `.github/PULL_REQUEST_TEMPLATE.md`, or `docs/PULL_REQUEST_TEMPLATE.md`. Read whichever one exists.\n 6. Create a DRAFT PR: `gh pr create --draft --base $BASE_BRANCH`\n - Title: concise, imperative mood, under 70 chars\n - Body: if a PR template was found, fill in **every section** with details from the artifacts. Don't skip sections or leave placeholders. If no template, write a body with summary, changes, validation evidence, and `Fixes #...`.\n - Link to issue: include `Fixes #...` or `Closes #...`\n 7. Capture PR identifiers:\n ```bash\n PR_NUMBER=$(gh pr view --json number -q '.number')\n echo \"$PR_NUMBER\" > \"$ARTIFACTS_DIR/.pr-number\"\n PR_URL=$(gh pr view --json url -q '.url')\n echo \"$PR_URL\" > \"$ARTIFACTS_DIR/.pr-url\"\n ```\n depends_on: [validate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: REVIEW\n # ═══════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [create-pr]\n context: fresh\n\n - id: review-classify\n prompt: |\n You are a PR review classifier. Analyze the PR scope and determine\n which review agents should run.\n\n ## PR Scope\n\n $review-scope.output\n\n ## Rules\n\n - **Code review**: ALWAYS run. This is mandatory for every PR. It also checks\n the PR against CLAUDE.md rules and project conventions.\n - **Error handling**: Run if the diff touches code with try/catch, error handling,\n async/await, or adds new failure paths.\n - **Test coverage**: Run if the diff touches source code (not just tests, docs, or config).\n - **Comment quality**: Run if the diff adds or modifies comments, docstrings, JSDoc,\n or significant documentation within code files.\n - **Docs impact**: Run if the diff adds/removes/renames public APIs, commands, CLI flags,\n environment variables, or user-facing features.\n\n Provide your reasoning for each decision.\n depends_on: [review-scope]\n model: haiku\n allowed_tools: []\n context: fresh\n output_format:\n type: object\n properties:\n run_code_review:\n type: string\n enum: [\"true\", \"false\"]\n run_error_handling:\n type: string\n enum: [\"true\", \"false\"]\n run_test_coverage:\n type: string\n enum: [\"true\", \"false\"]\n run_comment_quality:\n type: string\n enum: [\"true\", \"false\"]\n run_docs_impact:\n type: string\n enum: [\"true\", \"false\"]\n reasoning:\n type: string\n required:\n - run_code_review\n - run_error_handling\n - run_test_coverage\n - run_comment_quality\n - run_docs_impact\n - reasoning\n\n # Code review always runs — mandatory\n - id: code-review\n command: archon-code-review-agent\n depends_on: [review-classify]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_error_handling == 'true'\"\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_test_coverage == 'true'\"\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_comment_quality == 'true'\"\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_docs_impact == 'true'\"\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 8: SYNTHESIZE + SELF-FIX\n # ═══════════════════════════════════════════════════════════════\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n - id: self-fix\n command: archon-self-fix-all\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 9: SIMPLIFY\n # ═══════════════════════════════════════════════════════════════\n\n - id: simplify\n command: archon-simplify-changes\n depends_on: [self-fix]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 10: REPORT\n # ═══════════════════════════════════════════════════════════════\n\n - id: report\n command: archon-issue-completion-report\n depends_on: [simplify]\n context: fresh\n", - "archon-idea-to-pr": "name: archon-idea-to-pr\ndescription: |\n Use when: You have a feature idea or description and want end-to-end development.\n Input: Feature description in natural language, or path to a PRD file\n Output: PR ready for merge with comprehensive review completed\n\n Full workflow:\n 1. Create comprehensive implementation plan with codebase analysis\n 2. Setup branch and extract scope limits\n 3. Verify plan research is still valid\n 4. Implement all tasks with type-checking\n 5. Run full validation suite\n 6. Create PR with template, mark ready\n 7. Comprehensive code review (5 parallel agents with scope limit awareness)\n 8. Synthesize and fix review findings\n 9. Final summary with decision matrix -> GitHub comment + follow-up recommendations\n\n NOT for: Executing existing plans (use archon-plan-to-pr), quick fixes, standalone reviews.\n\nnodes:\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 0: CREATE PLAN\n # ═══════════════════════════════════════════════════════════════════\n\n - id: create-plan\n command: archon-create-plan\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 1: SETUP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: plan-setup\n command: archon-plan-setup\n depends_on: [create-plan]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 2: CONFIRM PLAN\n # ═══════════════════════════════════════════════════════════════════\n\n - id: confirm-plan\n command: archon-confirm-plan\n depends_on: [plan-setup]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 3: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-tasks\n command: archon-implement-tasks\n depends_on: [confirm-plan]\n context: fresh\n model: claude-opus-4-6[1m]\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 4: VALIDATE\n # ═══════════════════════════════════════════════════════════════════\n\n - id: validate\n command: archon-validate\n depends_on: [implement-tasks]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 5: FINALIZE PR\n # ═══════════════════════════════════════════════════════════════════\n\n - id: finalize-pr\n command: archon-finalize-pr\n depends_on: [validate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 6: CODE REVIEW\n # ═══════════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [finalize-pr]\n context: fresh\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [review-scope]\n context: fresh\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n context: fresh\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 7: FIX REVIEW ISSUES\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 8: FINAL SUMMARY & FOLLOW-UP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: workflow-summary\n command: archon-workflow-summary\n depends_on: [implement-fixes]\n context: fresh\n", + "archon-feature-development": "name: archon-feature-development\ndescription: |\n Use when: Implementing a feature from an existing plan.\n Input: Path to a plan file ($ARTIFACTS_DIR/plan.md) or GitHub issue containing a plan.\n Does: Implements the plan with validation loops -> creates pull request.\n NOT for: Creating plans (plans should be created separately), bug fixes, code reviews.\n\nnodes:\n - id: implement\n command: archon-implement\n model: opus[1m]\n\n - id: create-pr\n command: archon-create-pr\n depends_on: [implement]\n context: fresh\n", + "archon-fix-github-issue": "name: archon-fix-github-issue\ndescription: |\n Use when: User wants to FIX, RESOLVE, or IMPLEMENT a solution for a GitHub issue.\n Triggers: \"fix this issue\", \"implement issue #123\", \"resolve this bug\", \"fix it\",\n \"fix issue\", \"resolve issue\", \"fix #123\".\n NOT for: Comprehensive multi-agent reviews (use archon-issue-review-full),\n questions about issues, CI failures, PR reviews, general exploration.\n\n DAG workflow that:\n 1. Classifies the issue (bug/feature/enhancement/etc)\n 2. Researches context (web research + codebase exploration via investigate/plan)\n 3. Routes to investigate (bugs) or plan (features) based on classification\n 4. Implements the fix/feature with validation\n 5. Creates a draft PR using the repo's PR template\n 6. Runs smart review (always code review + CLAUDE.md check, conditional additional agents)\n 7. Aggressively self-fixes all findings (tests, docs, error handling)\n 8. Simplifies changed code (implements fixes directly, not just reports)\n 9. Reports results back to the GitHub issue with follow-up suggestions\n\nprovider: claude\nmodel: sonnet\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: FETCH & CLASSIFY\n # ═══════════════════════════════════════════════════════════════\n\n - id: extract-issue-number\n prompt: |\n Find the GitHub issue number for this request.\n\n Request: $ARGUMENTS\n\n Rules:\n - If the message contains an explicit issue number (e.g., \"#709\", \"issue 709\", \"709\"), extract that number.\n - If the message is ambiguous (e.g., \"fix the SQLite timestamp bug\"), use `gh issue list` to search for matching issues and pick the best match.\n\n CRITICAL: Your final output must be ONLY the bare number with no quotes, no markdown, no explanation. Example correct output: 709\n\n - id: fetch-issue\n bash: |\n # Strip quotes, whitespace, markdown backticks from AI output\n ISSUE_NUM=$(echo \"$extract-issue-number.output\" | tr -d \"'\\\"\\`\\n \" | grep -oE '[0-9]+' | head -1)\n if [ -z \"$ISSUE_NUM\" ]; then\n echo \"Failed to extract issue number from: $extract-issue-number.output\" >&2\n exit 1\n fi\n gh issue view \"$ISSUE_NUM\" --json title,body,labels,comments,state,url,author\n depends_on: [extract-issue-number]\n\n - id: classify\n prompt: |\n You are an issue classifier. Analyze the GitHub issue below and determine its type.\n\n ## Issue Content\n\n $fetch-issue.output\n\n ## Classification Rules\n\n | Type | Indicators |\n |------|------------|\n | bug | \"broken\", \"error\", \"crash\", \"doesn't work\", stack traces, regression |\n | feature | \"add\", \"new\", \"support\", \"would be nice\", net-new capability |\n | enhancement | \"improve\", \"better\", \"update existing\", \"extend\", incremental improvement |\n | refactor | \"clean up\", \"simplify\", \"reorganize\", \"restructure\" |\n | chore | \"update deps\", \"upgrade\", \"maintenance\", \"CI/CD\" |\n | documentation | \"docs\", \"readme\", \"clarify\", \"examples\" |\n\n Provide reasoning for your classification.\n depends_on: [fetch-issue]\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n issue_type:\n type: string\n enum: [\"bug\", \"feature\", \"enhancement\", \"refactor\", \"chore\", \"documentation\"]\n title:\n type: string\n reasoning:\n type: string\n required: [issue_type, title, reasoning]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: RESEARCH (parallel with PR template fetch)\n # ═══════════════════════════════════════════════════════════════\n\n - id: web-research\n command: archon-web-research\n depends_on: [classify]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: INVESTIGATE (bugs) / PLAN (features)\n # ═══════════════════════════════════════════════════════════════\n\n - id: investigate\n command: archon-investigate-issue\n depends_on: [classify, web-research]\n when: \"$classify.output.issue_type == 'bug'\"\n context: fresh\n\n - id: plan\n command: archon-create-plan\n depends_on: [classify, web-research]\n when: \"$classify.output.issue_type != 'bug'\"\n context: fresh\n\n # Bridge: ensure investigation.md exists for the implement step\n # archon-fix-issue reads from $ARTIFACTS_DIR/investigation.md\n # archon-create-plan writes to $ARTIFACTS_DIR/plan.md\n # This node copies plan.md → investigation.md when the plan path was taken\n - id: bridge-artifacts\n bash: |\n if [ -f \"$ARTIFACTS_DIR/plan.md\" ] && [ ! -f \"$ARTIFACTS_DIR/investigation.md\" ]; then\n cp \"$ARTIFACTS_DIR/plan.md\" \"$ARTIFACTS_DIR/investigation.md\"\n echo \"Bridged plan.md to investigation.md for implement step\"\n elif [ -f \"$ARTIFACTS_DIR/investigation.md\" ]; then\n echo \"investigation.md exists from investigate step\"\n else\n echo \"WARNING: No investigation.md or plan.md found — implement may fail\"\n fi\n depends_on: [investigate, plan]\n trigger_rule: one_success\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement\n command: archon-fix-issue\n depends_on: [bridge-artifacts]\n context: fresh\n model: opus[1m]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n command: archon-validate\n depends_on: [implement]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: CREATE DRAFT PR\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a draft pull request for the current branch.\n\n ## Context\n\n - **Issue**: $ARGUMENTS\n - **Classification**: $classify.output\n - **Issue title**: $classify.output.title\n\n ## Instructions\n\n 1. Check git status — ensure all changes are committed. If uncommitted changes exist, stage and commit them.\n 2. Push the branch: `git push -u origin HEAD`\n 3. Read implementation artifacts from `$ARTIFACTS_DIR/` for context:\n - `$ARTIFACTS_DIR/investigation.md` or `$ARTIFACTS_DIR/plan.md`\n - `$ARTIFACTS_DIR/implementation.md`\n - `$ARTIFACTS_DIR/validation.md`\n 4. Check if a PR already exists for this branch: `gh pr list --head $(git branch --show-current)`\n - If PR exists, skip creation and capture its number\n 5. Look for the project's PR template at `.github/pull_request_template.md`, `.github/PULL_REQUEST_TEMPLATE.md`, or `docs/PULL_REQUEST_TEMPLATE.md`. Read whichever one exists.\n 6. Create a DRAFT PR: `gh pr create --draft --base $BASE_BRANCH`\n - Title: concise, imperative mood, under 70 chars\n - Body: if a PR template was found, fill in **every section** with details from the artifacts. Don't skip sections or leave placeholders. If no template, write a body with summary, changes, validation evidence, and `Fixes #...`.\n - Link to issue: include `Fixes #...` or `Closes #...`\n 7. Capture PR identifiers:\n ```bash\n PR_NUMBER=$(gh pr view --json number -q '.number')\n echo \"$PR_NUMBER\" > \"$ARTIFACTS_DIR/.pr-number\"\n PR_URL=$(gh pr view --json url -q '.url')\n echo \"$PR_URL\" > \"$ARTIFACTS_DIR/.pr-url\"\n ```\n depends_on: [validate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: REVIEW\n # ═══════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [create-pr]\n context: fresh\n\n - id: review-classify\n prompt: |\n You are a PR review classifier. Analyze the PR scope and determine\n which review agents should run.\n\n ## PR Scope\n\n $review-scope.output\n\n ## Rules\n\n - **Code review**: ALWAYS run. This is mandatory for every PR. It also checks\n the PR against CLAUDE.md rules and project conventions.\n - **Error handling**: Run if the diff touches code with try/catch, error handling,\n async/await, or adds new failure paths.\n - **Test coverage**: Run if the diff touches source code (not just tests, docs, or config).\n - **Comment quality**: Run if the diff adds or modifies comments, docstrings, JSDoc,\n or significant documentation within code files.\n - **Docs impact**: Run if the diff adds/removes/renames public APIs, commands, CLI flags,\n environment variables, or user-facing features.\n\n Provide your reasoning for each decision.\n depends_on: [review-scope]\n model: haiku\n allowed_tools: []\n context: fresh\n output_format:\n type: object\n properties:\n run_code_review:\n type: string\n enum: [\"true\", \"false\"]\n run_error_handling:\n type: string\n enum: [\"true\", \"false\"]\n run_test_coverage:\n type: string\n enum: [\"true\", \"false\"]\n run_comment_quality:\n type: string\n enum: [\"true\", \"false\"]\n run_docs_impact:\n type: string\n enum: [\"true\", \"false\"]\n reasoning:\n type: string\n required:\n - run_code_review\n - run_error_handling\n - run_test_coverage\n - run_comment_quality\n - run_docs_impact\n - reasoning\n\n # Code review always runs — mandatory\n - id: code-review\n command: archon-code-review-agent\n depends_on: [review-classify]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_error_handling == 'true'\"\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_test_coverage == 'true'\"\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_comment_quality == 'true'\"\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [review-classify]\n when: \"$review-classify.output.run_docs_impact == 'true'\"\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 8: SYNTHESIZE + SELF-FIX\n # ═══════════════════════════════════════════════════════════════\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n - id: self-fix\n command: archon-self-fix-all\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 9: SIMPLIFY\n # ═══════════════════════════════════════════════════════════════\n\n - id: simplify\n command: archon-simplify-changes\n depends_on: [self-fix]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 10: REPORT\n # ═══════════════════════════════════════════════════════════════\n\n - id: report\n command: archon-issue-completion-report\n depends_on: [simplify]\n context: fresh\n", + "archon-idea-to-pr": "name: archon-idea-to-pr\ndescription: |\n Use when: You have a feature idea or description and want end-to-end development.\n Input: Feature description in natural language, or path to a PRD file\n Output: PR ready for merge with comprehensive review completed\n\n Full workflow:\n 1. Create comprehensive implementation plan with codebase analysis\n 2. Setup branch and extract scope limits\n 3. Verify plan research is still valid\n 4. Implement all tasks with type-checking\n 5. Run full validation suite\n 6. Create PR with template, mark ready\n 7. Comprehensive code review (5 parallel agents with scope limit awareness)\n 8. Synthesize and fix review findings\n 9. Final summary with decision matrix -> GitHub comment + follow-up recommendations\n\n NOT for: Executing existing plans (use archon-plan-to-pr), quick fixes, standalone reviews.\n\nnodes:\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 0: CREATE PLAN\n # ═══════════════════════════════════════════════════════════════════\n\n - id: create-plan\n command: archon-create-plan\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 1: SETUP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: plan-setup\n command: archon-plan-setup\n depends_on: [create-plan]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 2: CONFIRM PLAN\n # ═══════════════════════════════════════════════════════════════════\n\n - id: confirm-plan\n command: archon-confirm-plan\n depends_on: [plan-setup]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 3: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-tasks\n command: archon-implement-tasks\n depends_on: [confirm-plan]\n context: fresh\n model: opus[1m]\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 4: VALIDATE\n # ═══════════════════════════════════════════════════════════════════\n\n - id: validate\n command: archon-validate\n depends_on: [implement-tasks]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 5: FINALIZE PR\n # ═══════════════════════════════════════════════════════════════════\n\n - id: finalize-pr\n command: archon-finalize-pr\n depends_on: [validate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 6: CODE REVIEW\n # ═══════════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [finalize-pr]\n context: fresh\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [review-scope]\n context: fresh\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n context: fresh\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 7: FIX REVIEW ISSUES\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 8: FINAL SUMMARY & FOLLOW-UP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: workflow-summary\n command: archon-workflow-summary\n depends_on: [implement-fixes]\n context: fresh\n", "archon-interactive-prd": "name: archon-interactive-prd\ndescription: |\n Use when: User wants to create a PRD through guided conversation.\n Triggers: \"create a prd\", \"new prd\", \"interactive prd\", \"plan a feature\",\n \"product requirements\", \"write a prd\".\n NOT for: Autonomous PRD generation without human input (use archon-ralph-generate).\n\n Interactive workflow that guides the user through problem-first PRD creation:\n 1. Understand the idea → ask foundation questions → wait for answers\n 2. Research market & codebase → ask deep dive questions → wait for answers\n 3. Assess technical feasibility → ask scope questions → wait for answers\n 4. Generate PRD → validate technical claims against codebase → output\n\nprovider: claude\ninteractive: true\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: INITIATE — Understand the idea\n # ═══════════════════════════════════════════════════════════════\n\n - id: initiate\n model: sonnet\n prompt: |\n You are a sharp product manager starting a PRD creation process.\n You think from first principles — start with primitives, not features.\n\n The user wants to build: $ARGUMENTS\n\n If the input is clear, restate your understanding in 2-3 sentences and confirm:\n \"I understand you want to build: {restated understanding}. Is this correct?\"\n\n If the input is vague or empty, ask:\n \"What do you want to build? Describe the product, feature, or capability.\"\n\n Then present the Foundation Questions (all at once — the user will answer in the next step):\n\n **Foundation Questions:**\n\n 1. **Who** has this problem? Be specific — not just \"users\" but what type of person/role?\n 2. **What** problem are they facing? Describe the observable pain, not the assumed need.\n 3. **Why** can't they solve it today? What alternatives exist and why do they fail?\n 4. **Why now?** What changed that makes this worth building?\n 5. **How** will you know if you solved it? What would success look like?\n\n Keep it conversational. Don't generate any PRD content yet.\n\n # ═══════════════════════════════════════════════════════════════\n # GATE 1: User answers foundation questions\n # ═══════════════════════════════════════════════════════════════\n\n - id: foundation-gate\n approval:\n message: \"Answer the foundation questions above. Your answers will guide the research phase.\"\n capture_response: true\n depends_on: [initiate]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: GROUNDING — Research market & codebase\n # ═══════════════════════════════════════════════════════════════\n\n - id: research\n model: sonnet\n prompt: |\n You are researching context for a PRD. Think from first principles —\n what already exists before proposing anything new.\n\n **The idea**: $ARGUMENTS\n\n **User's foundation answers**:\n $foundation-gate.output\n\n Research the landscape:\n\n 1. Search the web for similar products, competitors, and how others solve this problem\n 2. **Explore the codebase deeply** — find related existing functionality, APIs, UI components,\n database tables, and patterns. Read actual files, don't assume. Note exact file paths and\n what each file does.\n 3. Look for common patterns, anti-patterns, and recent trends\n\n **First principles rule**: Before suggesting anything new, verify what already exists.\n If there's an existing API endpoint, UI page, or component that partially solves the\n problem, note it explicitly. The best solution extends what exists, not replaces it.\n\n Present a summary to the user:\n\n **What I found:**\n - {Market insights — similar products, competitor approaches}\n - {What already exists in the codebase — specific files, endpoints, components}\n - {Key insight that might change the approach}\n\n Then ask the **Deep Dive Questions**:\n\n 1. **Vision**: In one sentence, what's the ideal end state if this succeeds wildly?\n 2. **Primary User**: Describe your most important user — their role, context, and what triggers their need.\n 3. **Job to Be Done**: Complete this: \"When [situation], I want to [motivation], so I can [outcome].\"\n 4. **Non-Users**: Who is explicitly NOT the target?\n 5. **Constraints**: What limitations exist? (time, budget, technical, regulatory)\n\n Does the research change or refine your thinking? Answer the deep dive questions.\n depends_on: [foundation-gate]\n\n # ═══════════════════════════════════════════════════════════════\n # GATE 2: User answers deep dive questions\n # ═══════════════════════════════════════════════════════════════\n\n - id: deepdive-gate\n approval:\n message: \"Answer the deep dive questions above (vision, primary user, JTBD, constraints). Add any adjustments from the research.\"\n capture_response: true\n depends_on: [research]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: TECHNICAL GROUNDING — Feasibility from what exists\n # ═══════════════════════════════════════════════════════════════\n\n - id: technical\n model: sonnet\n prompt: |\n You are assessing technical feasibility for a PRD.\n Think from first principles — start with what exists, not what you'd build from scratch.\n\n **The idea**: $ARGUMENTS\n **Foundation answers**: $foundation-gate.output\n **Deep dive answers**: $deepdive-gate.output\n\n **CRITICAL**: Explore the codebase by READING actual files. Do not guess or assume.\n For every claim you make about the codebase, cite the exact file and line.\n\n 1. **What already exists** that partially solves this problem?\n - Read existing API endpoints, DB queries, UI components\n - Note exact function names, table schemas, component names\n - What data is already being collected/stored?\n 2. **What's the smallest change** to the existing system that solves the core problem?\n - Prefer extending existing files over creating new ones\n - Prefer using existing endpoints over creating new ones\n - Prefer adding to existing UI pages over new pages\n 3. **What are the actual primitives** we need?\n - A new DB query? An existing one that needs a parameter?\n - A new component? Or an existing component that needs a prop?\n - A new endpoint? Or an existing endpoint that already returns the data?\n 4. **What's the risk?**\n - Where could this go wrong?\n - What assumptions need validation?\n\n Present a summary:\n\n **What Already Exists (verified by reading code):**\n - {endpoint/component/query} at `{file:line}` — {what it does}\n - {endpoint/component/query} at `{file:line}` — {what it does}\n\n **Smallest Change to Solve the Problem:**\n - {change 1}: {extend/modify} `{file}` — {what to do}\n - {change 2}: {extend/modify} `{file}` — {what to do}\n\n **Technical Context:**\n - Feasibility: {HIGH/MEDIUM/LOW} because {reason}\n - Key risk: {main concern}\n - Estimated phases: {rough breakdown}\n\n Then ask the **Scope Questions**:\n\n 1. **MVP Definition**: What's the absolute minimum to test if this works?\n 2. **Must Have vs Nice to Have**: What 2-3 things MUST be in v1? What can wait?\n 3. **Key Hypothesis**: Complete this: \"We believe [capability] will [solve problem] for [users]. We'll know we're right when [measurable outcome].\"\n 4. **Out of Scope**: What are you explicitly NOT building?\n 5. **Open Questions**: What uncertainties could change the approach?\n depends_on: [deepdive-gate]\n\n # ═══════════════════════════════════════════════════════════════\n # GATE 3: User answers scope questions\n # ═══════════════════════════════════════════════════════════════\n\n - id: scope-gate\n approval:\n message: \"Answer the scope questions above (MVP, must-haves, hypothesis, exclusions). This is the final input before PRD generation.\"\n capture_response: true\n depends_on: [technical]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: GENERATE — Write the PRD\n # ═══════════════════════════════════════════════════════════════\n\n - id: generate\n model: sonnet\n prompt: |\n You are generating a PRD from the user's guided inputs.\n\n **The idea**: $ARGUMENTS\n **Foundation answers**: $foundation-gate.output\n **Deep dive answers**: $deepdive-gate.output\n **Scope answers**: $scope-gate.output\n\n Generate a complete PRD file at `$ARTIFACTS_DIR/prds/{kebab-case-name}.prd.md`.\n\n First create the directory:\n ```bash\n mkdir -p $ARTIFACTS_DIR/prds\n ```\n\n **First principles rule**: Before writing the Technical Approach section, READ the\n actual codebase files you're referencing. Verify:\n - File paths exist\n - Function/component names are correct\n - API endpoints you reference actually exist (or note they need to be created)\n - DB table and column names match the schema\n - Event type names match the constants in the code\n\n The PRD must include ALL of these sections, filled from the user's answers:\n\n 1. **Problem Statement** — from foundation answers (who/what/why)\n 2. **Evidence** — from research findings and user's evidence\n 3. **Proposed Solution** — synthesized from all inputs. Prefer extending existing\n primitives over creating new ones.\n 4. **Key Hypothesis** — from scope answers\n 5. **What We're NOT Building** — from scope answers\n 6. **Success Metrics** — from foundation \"how will you know\" + scope\n 7. **Open Questions** — from scope answers\n 8. **Users & Context** — from deep dive (primary user, JTBD, non-users)\n 9. **Solution Detail** — MoSCoW table from scope must-haves, MVP definition\n 10. **Technical Approach** — from technical feasibility. MUST reference actual\n verified file paths, function names, and schemas. Mark anything unverified\n as \"needs verification\".\n 11. **Implementation Phases** — from technical breakdown, with status table\n and parallel opportunities\n 12. **Decisions Log** — key decisions made during the conversation\n\n **Rules:**\n - If info is missing, write \"TBD — needs research\" not filler\n - Be specific and concrete, not generic\n - Every file path in Technical Approach must be verified by reading the file\n - Prefer \"extend X\" over \"create new Y\" in implementation phases\n\n After writing the file, output the file path only — the validator will check it.\n depends_on: [scope-gate]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE — Check technical claims against codebase\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n model: sonnet\n prompt: |\n You are a technical validator checking a PRD for accuracy.\n\n Read the PRD file that was just generated. The generate node output the file path:\n $generate.output\n\n Find the PRD file — check `$ARTIFACTS_DIR/prds/` for the most recently created `.prd.md` file:\n ```bash\n ls -t $ARTIFACTS_DIR/prds/*.prd.md | head -1\n ```\n\n Read the entire PRD, then verify EVERY technical claim against the actual codebase:\n\n **Check 1: File paths** — For every file referenced in \"Technical Approach\" and\n \"Implementation Phases\", verify it exists. If it doesn't, note the correction.\n\n **Check 2: API endpoints** — For every endpoint mentioned, check if it already exists\n in `packages/server/src/routes/api.ts`. If it does, the PRD should say \"extend\" not \"create\".\n If the PRD proposes a new endpoint for data that an existing endpoint already returns,\n flag it.\n\n **Check 3: DB schemas** — For every table/column referenced, verify the actual names\n in the migration files or schema code. Check event type names against the\n `WORKFLOW_EVENT_TYPES` constant.\n\n **Check 4: UI components** — For every component referenced, verify it exists.\n If the PRD proposes a new page but an existing page already serves a similar purpose,\n flag it.\n\n **Check 5: Function/type names** — Verify function names, type names, and interface\n names are correct.\n\n After checking, if there are ANY corrections needed:\n 1. Edit the PRD file directly — fix incorrect names, paths, and references\n 2. Add a `## Validation Notes` section at the bottom documenting what was corrected\n\n If everything checks out, add:\n ```\n ## Validation Notes\n\n All technical references verified against codebase. No corrections needed.\n ```\n\n Output a summary of what was checked and corrected:\n\n ```\n ## PRD Validated\n\n **File**: `{prd-path}`\n **Checks**: {N} file paths, {N} endpoints, {N} DB references, {N} components\n **Corrections**: {count}\n {list corrections if any}\n\n To start implementation: `/prp-plan {prd-path}`\n ```\n depends_on: [generate]\n", "archon-issue-review-full": "name: archon-issue-review-full\ndescription: |\n Use when: User wants a FULL, COMPREHENSIVE fix + review pipeline for a GitHub issue.\n Triggers: \"full review\", \"comprehensive fix\", \"fix with full review\", \"deep review\", \"issue review full\".\n NOT for: Simple issue fixes (use archon-fix-github-issue instead),\n questions about issues, CI failures, PR reviews, general exploration.\n\n Full workflow:\n 1. Investigate issue -> root cause analysis, implementation plan\n 2. Implement fix -> code changes, tests, PR creation\n 3. Comprehensive review -> 5 parallel agents with scope awareness\n 4. Fix review issues -> address CRITICAL/HIGH findings\n 5. Final summary -> decision matrix, follow-up recommendations\n\nnodes:\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 1: INVESTIGATE\n # ═══════════════════════════════════════════════════════════════════\n\n - id: investigate\n command: archon-investigate-issue\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 2: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement\n command: archon-implement-issue\n depends_on: [investigate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 3: CODE REVIEW\n # ═══════════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [implement]\n context: fresh\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [review-scope]\n context: fresh\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n context: fresh\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 4: FIX REVIEW ISSUES\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 5: FINAL SUMMARY\n # ═══════════════════════════════════════════════════════════════════\n\n - id: summary\n command: archon-workflow-summary\n depends_on: [implement-fixes]\n context: fresh\n", - "archon-piv-loop": "name: archon-piv-loop\ndescription: |\n Use when: User wants guided Plan-Implement-Validate development with human-in-the-loop.\n Triggers: \"piv\", \"piv loop\", \"plan implement validate\", \"guided development\",\n \"structured development\", \"build a feature\", \"develop with review\".\n NOT for: Autonomous implementation without planning (use archon-feature-development).\n NOT for: PRD creation (use archon-interactive-prd).\n NOT for: Ralph story-based implementation (use archon-ralph-dag).\n\n Interactive PIV loop workflow — the foundational AI coding methodology:\n 1. EXPLORE: Iterative conversation with human to understand the problem (arbitrary rounds)\n 2. PLAN: Create structured plan -> iterative review & revision (arbitrary rounds)\n 3. IMPLEMENT: Autonomous task-by-task implementation from plan (Ralph loop)\n 4. VALIDATE: Automated code review -> iterative human feedback & fixes (arbitrary rounds)\n\n The PIV loop comes AFTER a PRD exists. Each PIV loop focuses on ONE granular feature or bug fix.\n Input: A description of what to build, a path to an existing plan, or a GitHub issue number.\n\nprovider: claude\ninteractive: true\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: EXPLORE — Iterative exploration with human\n # Understand the idea, explore the codebase, converge on approach\n # Loops until the user says they're ready to create the plan.\n # ═══════════════════════════════════════════════════════════════\n\n - id: explore\n loop:\n prompt: |\n # PIV Loop — Exploration\n\n You are a senior engineering partner in an iterative exploration session.\n Your goal: DEEPLY UNDERSTAND what to build before any code is written.\n\n **User's request**: $ARGUMENTS\n **User's latest input**: $LOOP_USER_INPUT\n\n ---\n\n ## If this is the FIRST iteration (no user input yet):\n\n ### Step 1: Parse the Input\n\n Determine what the user provided:\n\n **If it's a file path** (ends in `.md`, `.plan.md`, or `.prd.md`):\n - Read the file\n - If it's an existing plan → summarize it and ask if they want to refine or proceed\n - If it's a PRD → identify the specific phase/feature to focus on\n\n **If it's a GitHub issue** (`#123` format):\n - Fetch it: `gh issue view {number} --json title,body,labels,comments`\n - Summarize the issue context\n\n **If it's free text**:\n - This is a feature idea or bug description. Use it directly.\n\n ### Step 2: Explore the Codebase\n\n Before asking questions, DO YOUR HOMEWORK:\n\n 1. **Read CLAUDE.md** — understand project conventions, architecture, and constraints\n 2. **Search for related code** — find existing implementations similar to what the user wants\n 3. **Read key files** — understand the current state of code the user wants to change\n 4. **Check recent git history** — `git log --oneline -20` for recent changes in the area\n\n ### Step 3: Present Your Understanding\n\n ```\n ## What I Understand\n\n You want to: {restated understanding in 2-3 sentences}\n\n ## What Already Exists\n\n - {file:line} — {what it does and how it relates}\n - {file:line} — {what it does and how it relates}\n - {pattern/component} — {how it could be extended or reused}\n\n ## Initial Architecture Thoughts\n\n Based on what exists, I'm thinking:\n - {approach 1 — extend existing X}\n - {approach 2 — if approach 1 doesn't work}\n - {key architectural decision that needs your input}\n ```\n\n ### Step 4: Ask Targeted Questions\n\n Ask 4-6 questions focused on DECISIONS, not information gathering:\n - Scope boundaries, architecture preferences, tech decisions\n - Constraints, existing code extension vs fresh build, testing expectations\n - Reference actual code you found — don't ask generic questions\n\n ---\n\n ## If the user has provided input (subsequent iterations):\n\n ### Step 1: Process Their Response\n\n Read their answers carefully. Identify:\n - Decisions they've made\n - Areas they want you to explore further\n - Questions they asked YOU back (answer these with evidence!)\n\n ### Step 2: Do Targeted Research\n\n Based on their response:\n - If they mentioned specific technologies → research best practices\n - If they pointed you to specific code → read it thoroughly\n - If they asked you to explore an area → do a thorough investigation\n - If they made architecture decisions → validate against the codebase\n\n ### Step 3: Present Updated Understanding\n\n Show what you learned, answer their questions with file:line references,\n and present your refined architecture recommendation.\n\n ### Step 4: Converge or Continue\n\n **If there are still important open questions:**\n Ask 2-4 focused questions about remaining ambiguities.\n\n **If the picture is clear and you have enough to create a plan:**\n Present a final implementation summary:\n\n ```\n ## Implementation Summary\n\n ### What We're Building\n {Clear, specific description}\n\n ### Scope Boundary\n - IN: {what's included}\n - OUT: {what's explicitly excluded}\n\n ### Architecture\n - {key decisions}\n\n ### Files That Will Change\n - `{file}` — {what changes and why}\n\n ### Success Criteria\n - [ ] {specific, testable criterion}\n - [ ] All validation passes\n\n ### Key Risks\n - {risk — and mitigation}\n ```\n\n Then tell the user: \"I have a clear picture. Say **ready** and I'll create\n the structured implementation plan, or share any final thoughts.\"\n\n **CRITICAL — READ THIS CAREFULLY**:\n - NEVER output PLAN_READY unless the user's LATEST message contains\n an EXPLICIT phrase like \"ready\", \"create the plan\", \"let's go\", \"proceed\", or \"I'm done\".\n - If the user asked a question → do NOT emit the signal. Answer the question.\n - If the user gave feedback or requested changes → do NOT emit the signal. Address it.\n - If the user said \"also check X\" or \"one more thing\" → do NOT emit the signal. Explore it.\n - If you are unsure whether the user is approving → do NOT emit the signal. Ask them.\n - The ONLY correct time to emit the signal is when the user's message CLEARLY means\n \"stop exploring, I'm ready for you to create the plan.\"\n until: PLAN_READY\n max_iterations: 15\n interactive: true\n gate_message: |\n Answer the questions above, ask me to explore specific areas,\n or say \"ready\" when you're satisfied with the exploration.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: PLAN — Create the structured implementation plan\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-plan\n model: sonnet\n depends_on: [explore]\n context: fresh\n prompt: |\n # PIV Loop — Create Structured Plan\n\n You are creating a structured implementation plan from a completed exploration phase.\n This plan will be the SOLE GUIDE for the implementation agent — it must be complete,\n specific, and actionable.\n\n **Original request**: $ARGUMENTS\n **Final exploration summary**: $explore.output\n\n ---\n\n ## Step 1: Read the Codebase (Again)\n\n Before writing the plan, verify your understanding is current:\n\n 1. **Read CLAUDE.md** — capture all relevant conventions\n 2. **Read every file you plan to change** — note exact current state\n 3. **Read example test files** — understand testing patterns\n 4. **Check for any recent changes** — `git log --oneline -10`\n\n ## Step 2: Determine Plan Location\n\n Generate a kebab-case slug from the feature name.\n Save to `.claude/archon/plans/{slug}.plan.md`.\n\n ```bash\n mkdir -p .claude/archon/plans\n ```\n\n ## Step 3: Write the Plan\n\n Use this template. Fill EVERY section with specific, verified information.\n\n ```markdown\n # Feature: {Title}\n\n ## Summary\n {1-2 sentences: what changes and why}\n\n ## Mission\n {The core goal in one clear statement}\n\n ## Success Criteria\n - [ ] {Specific, testable criterion}\n - [ ] All validation passes (`bun run validate` or equivalent)\n - [ ] No regressions in existing tests\n\n ## Scope\n ### In Scope\n - {What we ARE building}\n ### Out of Scope\n - {What we are NOT building — and why}\n\n ## Codebase Context\n ### Key Files\n | File | Role | Action |\n |------|------|--------|\n | `{path}` | {what it does} | CREATE / UPDATE |\n\n ### Patterns to Follow\n {Actual code snippets from the codebase to mirror}\n\n ## Architecture\n - {Decision 1 — with rationale}\n - {Decision 2 — with rationale}\n\n ## Task List\n Execute in order. Each task is atomic and independently verifiable.\n\n ### Task 1: {ACTION} `{file path}`\n **Action**: CREATE / UPDATE\n **Details**: {Exact changes — specific enough for an agent with no context}\n **Pattern**: Follow `{source file}:{lines}`\n **Validate**: `{command to verify this task}`\n\n ## Testing Strategy\n | Test File | Test Cases | Validates |\n |-----------|-----------|-----------|\n | `{path}` | {cases} | {what it validates} |\n\n ## Validation Commands\n 1. Type check: `{command}`\n 2. Lint: `{command}`\n 3. Tests: `{command}`\n 4. Full validation: `{command}`\n\n ## Risks\n | Risk | Impact | Mitigation |\n |------|--------|------------|\n | {risk} | {HIGH/MED/LOW} | {specific mitigation} |\n ```\n\n ## Step 4: Verify the Plan\n\n 1. Check every file path referenced — verify they exist\n 2. Check every pattern cited — verify the code matches\n 3. Check task ordering — ensure dependencies are respected\n 4. Check completeness — could an agent with NO context implement this?\n\n ## Step 5: Report\n\n ```\n ## Plan Created\n\n **File**: `.claude/archon/plans/{slug}.plan.md`\n **Tasks**: {count}\n **Files to change**: {count}\n\n Key decisions:\n - {decision 1}\n - {decision 2}\n\n Please review the plan and provide feedback.\n ```\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2b: PLAN — Iterative plan refinement\n # Review and revise the plan as many times as needed.\n # ═══════════════════════════════════════════════════════════════\n\n - id: refine-plan\n depends_on: [create-plan]\n loop:\n prompt: |\n # PIV Loop — Plan Refinement\n\n The user is reviewing the implementation plan and providing feedback.\n\n **User's feedback**: $LOOP_USER_INPUT\n\n ---\n\n ## Step 1: Find and Read the Plan\n\n ```bash\n ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1\n ```\n\n Read the entire plan file. Also read CLAUDE.md for conventions.\n\n ## Step 2: Process Feedback\n\n **If there is no user feedback yet** (first iteration, $LOOP_USER_INPUT is empty):\n - Read the plan carefully\n - Present a summary of the plan's key decisions and task list\n - Ask the user to review and provide feedback\n - Do NOT emit the completion signal on the first iteration\n\n **If the user EXPLICITLY approved** (said \"approved\", \"looks good\", \"let's go\", etc.):\n - Make no changes\n - Output: \"Plan approved. Proceeding to implementation.\"\n - Signal completion: PLAN_APPROVED\n\n **If the user provided specific feedback:**\n - Parse each piece of feedback\n - Edit the plan file directly:\n - Add/remove/modify tasks as requested\n - Update success criteria if needed\n - Adjust testing strategy if needed\n - Re-verify file paths and patterns after changes\n\n **CRITICAL**: NEVER emit PLAN_APPROVED unless the user's latest\n message EXPLICITLY says \"approved\", \"looks good\", \"ship it\", or similar approval.\n Questions, feedback, and requests for changes are NOT approval.\n\n ## Step 3: Show Changes\n\n ```\n ## Plan Revised\n\n Changes made:\n - {change 1}\n - {change 2}\n\n Updated stats:\n - Tasks: {count}\n - Files to change: {count}\n\n Review the updated plan and provide more feedback, or say \"approved\" to proceed.\n ```\n until: PLAN_APPROVED\n max_iterations: 10\n interactive: true\n gate_message: |\n Review the plan document. Provide specific feedback on what to change,\n or say \"approved\" to begin implementation.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: IMPLEMENT — Setup\n # Read the plan, prepare the environment\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement-setup\n depends_on: [refine-plan]\n bash: |\n set -e\n\n PLAN_FILE=$(ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1)\n\n if [ -z \"$PLAN_FILE\" ]; then\n echo \"ERROR: No plan file found in .claude/archon/plans/\"\n exit 1\n fi\n\n # Install dependencies if needed\n if [ -f \"bun.lock\" ] || [ -f \"bun.lockb\" ]; then\n echo \"Installing dependencies...\"\n bun install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"package-lock.json\" ]; then\n npm ci 2>&1 | tail -3\n elif [ -f \"yarn.lock\" ]; then\n yarn install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"pnpm-lock.yaml\" ]; then\n pnpm install --frozen-lockfile 2>&1 | tail -3\n fi\n\n echo \"BRANCH=$(git branch --show-current)\"\n echo \"GIT_ROOT=$(git rev-parse --show-toplevel)\"\n echo \"PLAN_FILE=$PLAN_FILE\"\n\n echo \"=== PLAN_START ===\"\n cat \"$PLAN_FILE\"\n echo \"\"\n echo \"=== PLAN_END ===\"\n\n TASK_COUNT=$(grep -c \"^### Task [0-9]\" \"$PLAN_FILE\" || true)\n echo \"TASK_COUNT=${TASK_COUNT:-0}\"\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3b: IMPLEMENT — Task-by-Task Loop (Ralph pattern)\n # Fresh context each iteration. Reads plan from disk.\n # One task per iteration. Validates before committing.\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement\n depends_on: [implement-setup]\n idle_timeout: 600000\n model: claude-opus-4-6[1m]\n loop:\n prompt: |\n # PIV Loop — Implementation Agent\n\n You are an autonomous coding agent in a FRESH session — no memory of previous iterations.\n Your job: Read the plan from disk, implement ONE task, validate, commit, update tracking, exit.\n\n **Golden Rule**: If validation fails, fix it before committing. Never commit broken code.\n\n ---\n\n ## Phase 0: CONTEXT — Load State\n\n The setup node produced this context:\n\n $implement-setup.output\n\n **User's original request**: $USER_MESSAGE\n\n ---\n\n ### 0.1 Parse Plan File\n\n Extract the `PLAN_FILE=...` line from the context above.\n\n ### 0.2 Read Current State (from disk — not from context above)\n\n The context above is a snapshot from before the loop started. Previous iterations\n may have changed things. **You MUST re-read from disk:**\n\n 1. **Read the plan file** — your implementation guide\n 2. **Read progress tracking** — check if `.claude/archon/plans/progress.txt` exists\n 3. **Read CLAUDE.md** — project conventions and constraints\n\n ### 0.3 Check Git State\n\n ```bash\n git log --oneline -10\n git status\n ```\n\n ---\n\n ## Phase 1: SELECT — Pick Next Task\n\n From the plan file, identify tasks by `### Task N:` headers.\n Cross-reference with commits from previous iterations and progress tracking.\n\n **If ALL tasks are complete** → Skip to Phase 5 (Completion).\n\n ### Announce Selection\n\n ```\n -- Task Selected ------------------------------------------------\n Task: {N} — {task title}\n Action: {CREATE / UPDATE}\n File: {file path}\n -----------------------------------------------------------------\n ```\n\n ---\n\n ## Phase 2: IMPLEMENT — Execute the Task\n\n 1. Read the file you're about to change (if it exists)\n 2. Read the pattern file referenced in the plan\n 3. Make changes following the plan EXACTLY\n 4. Type-check after each file: `bun run type-check 2>&1 || true`\n\n ---\n\n ## Phase 3: VALIDATE — Verify the Task\n\n ```bash\n bun run type-check && bun run lint && bun run test && bun run format:check\n ```\n\n If validation fails: fix, re-run (up to 3 attempts). If unfixable, note in progress\n tracking and do NOT commit broken code.\n\n ---\n\n ## Phase 4: COMMIT — Save Changes\n\n ```bash\n git add -A\n git diff --cached --stat\n git commit -m \"$(cat <<'EOF'\n {type}: {task description}\n\n PIV Task {N}: {brief details}\n EOF\n )\"\n ```\n\n Track progress in `.claude/archon/plans/progress.txt`:\n ```\n ## Task {N}: {title} — COMPLETED\n Date: {ISO date}\n Files: {list}\n Commit: {short hash}\n ---\n ```\n\n ---\n\n ## Phase 5: COMPLETE — Check All Tasks\n\n If ALL tasks are done:\n 1. Run full validation: `bun run validate 2>&1`\n 2. Push: `git push -u origin HEAD`\n 3. Signal: `COMPLETE`\n\n If tasks remain, report status and end normally. The loop engine starts a fresh iteration.\n until: COMPLETE\n max_iterations: 15\n fresh_context: true\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: VALIDATE — Automated code review\n # Review all changes against the plan\n # ═══════════════════════════════════════════════════════════════\n\n - id: code-review\n model: sonnet\n depends_on: [implement]\n context: fresh\n prompt: |\n # PIV Loop — Automated Code Review\n\n The implementation phase is complete. Review ALL changes against the plan.\n\n **Implementation output**: $implement.output\n\n ---\n\n ## Step 1: Find and Read the Plan\n\n ```bash\n ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1\n ```\n\n ## Step 2: Review All Changes\n\n ```bash\n git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD\n git diff $BASE_BRANCH..HEAD --stat\n git diff $BASE_BRANCH..HEAD\n ```\n\n ## Step 3: Check Against Plan\n\n For EACH task: was it implemented correctly? Do success criteria hold?\n For EACH file: check quality, security, patterns, CLAUDE.md compliance.\n\n ## Step 4: Run Validation\n\n ```bash\n bun run validate 2>&1 || (bun run type-check && bun run lint && bun run test && bun run format:check)\n ```\n\n ## Step 5: Fix Obvious Issues\n\n Fix type errors, lint warnings, missing imports, formatting. Commit any fixes:\n ```bash\n git add -A && git commit -m \"fix: address code review findings\" 2>/dev/null || true\n ```\n\n ## Step 6: Present Review\n\n ```\n ## Code Review Complete\n\n ### Implementation Status\n | Task | Status | Notes |\n |------|--------|-------|\n | {task} | DONE / PARTIAL / MISSING | {notes} |\n\n ### Validation Results\n - Type-check: PASS / FAIL\n - Lint: PASS / FAIL\n - Tests: PASS / FAIL\n - Format: PASS / FAIL\n\n ### Code Quality Findings\n {Issues found, or \"No issues found.\"}\n\n ### Recommendation\n {READY FOR REVIEW / NEEDS FIXES}\n ```\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4b: VALIDATE — Iterative human feedback & fixes\n # The user tests the implementation and provides feedback.\n # Loops until the user approves.\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-feedback\n depends_on: [code-review]\n loop:\n prompt: |\n # PIV Loop — Address Validation Feedback\n\n The human has reviewed the implementation and provided feedback.\n\n **Human's feedback**: $LOOP_USER_INPUT\n\n ---\n\n ## Step 1: Read Context\n\n ```bash\n ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1\n ```\n\n Read the plan file and CLAUDE.md for conventions.\n\n ## Step 2: Process Feedback\n\n **If there is no user feedback yet** (first iteration, $LOOP_USER_INPUT is empty):\n - Present the code review results and ask the user to test the implementation\n - Do NOT emit the completion signal on the first iteration\n\n **If the user EXPLICITLY approved** (said \"approved\", \"looks good\", \"ship it\", etc.):\n - Output: \"Implementation approved!\"\n - Signal: VALIDATED\n\n **CRITICAL**: NEVER emit VALIDATED unless the user's latest\n message EXPLICITLY says \"approved\", \"looks good\", \"ship it\", or similar approval.\n\n **If the user provided specific feedback:**\n 1. Read the relevant files\n 2. Understand each issue\n 3. Make the fixes\n 4. Type-check after each change\n\n ## Step 3: Full Validation\n\n ```bash\n bun run validate 2>&1 || (bun run type-check && bun run lint && bun run test && bun run format:check)\n ```\n\n ## Step 4: Commit Fixes\n\n ```bash\n git add -A\n git commit -m \"$(cat <<'EOF'\n fix: address review feedback\n\n Changes:\n - {fix 1}\n - {fix 2}\n EOF\n )\"\n ```\n\n ## Step 5: Report\n\n ```\n ## Feedback Addressed\n\n Changes made:\n - {fix 1}\n - {fix 2}\n\n Validation: {PASS / FAIL with details}\n\n Review again, or say \"approved\" to finalize.\n ```\n until: VALIDATED\n max_iterations: 10\n interactive: true\n gate_message: |\n Test the implementation yourself and review the code changes.\n Provide specific feedback on what needs fixing, or say \"approved\" to finalize.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: FINALIZE — Push, create PR, generate summary\n # ═══════════════════════════════════════════════════════════════\n\n - id: finalize\n model: sonnet\n depends_on: [fix-feedback]\n context: fresh\n prompt: |\n # PIV Loop — Finalize\n\n The implementation has been approved. Push changes and create a PR.\n\n ---\n\n ## Step 1: Push Changes\n\n ```bash\n git push -u origin HEAD 2>&1 || true\n ```\n\n ## Step 2: Generate Summary\n\n ```bash\n git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD\n git diff --stat $(git merge-base HEAD $BASE_BRANCH)..HEAD\n ```\n\n Read the plan file and progress tracking for context.\n\n ## Step 3: Create PR (if not already created)\n\n ```bash\n gh pr view HEAD --json url 2>/dev/null || echo \"NO_PR\"\n ```\n\n If no PR exists:\n\n ```bash\n cat .github/pull_request_template.md 2>/dev/null || echo \"NO_TEMPLATE\"\n ```\n\n Create with `gh pr create --draft --base $BASE_BRANCH`:\n - Title from the plan's feature name\n - Body summarizing the implementation\n - Use a HEREDOC for the body\n\n ## Step 4: Output Summary\n\n ```\n ===============================================================\n PIV LOOP — COMPLETE\n ===============================================================\n\n Feature: {from plan}\n Plan: {plan file path}\n Branch: {branch name}\n PR: {url}\n\n -- Tasks Completed -----------------------------------------------\n {list from progress tracking}\n\n -- Commits -------------------------------------------------------\n {git log output}\n\n -- Files Changed -------------------------------------------------\n {git diff --stat output}\n\n -- Validation ----------------------------------------------------\n All checks passed.\n ===============================================================\n ```\n", - "archon-plan-to-pr": "name: archon-plan-to-pr\ndescription: |\n Use when: You have an existing implementation plan and want to execute it end-to-end.\n Input: Path to a plan file ($ARTIFACTS_DIR/plan.md or .agents/plans/*.md)\n Output: PR ready for merge with comprehensive review completed\n\n Full workflow:\n 1. Read plan, setup branch, extract scope limits\n 2. Verify plan research is still valid\n 3. Implement all tasks with type-checking\n 4. Run full validation suite\n 5. Create PR with template, mark ready\n 6. Comprehensive code review (5 parallel agents with scope limit awareness)\n 7. Synthesize and fix review findings\n 8. Final summary with decision matrix -> GitHub comment + follow-up recommendations\n\n NOT for: Creating plans from scratch (use archon-idea-to-pr), quick fixes, standalone reviews.\n\nnodes:\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 1: SETUP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: plan-setup\n command: archon-plan-setup\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 2: CONFIRM PLAN\n # ═══════════════════════════════════════════════════════════════════\n\n - id: confirm-plan\n command: archon-confirm-plan\n depends_on: [plan-setup]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 3: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-tasks\n command: archon-implement-tasks\n depends_on: [confirm-plan]\n context: fresh\n model: claude-opus-4-6[1m]\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 4: VALIDATE\n # ═══════════════════════════════════════════════════════════════════\n\n - id: validate\n command: archon-validate\n depends_on: [implement-tasks]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 5: FINALIZE PR\n # ═══════════════════════════════════════════════════════════════════\n\n - id: finalize-pr\n command: archon-finalize-pr\n depends_on: [validate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 6: CODE REVIEW\n # ═══════════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [finalize-pr]\n context: fresh\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [review-scope]\n context: fresh\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n context: fresh\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 7: FIX REVIEW ISSUES\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 8: FINAL SUMMARY & FOLLOW-UP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: workflow-summary\n command: archon-workflow-summary\n depends_on: [implement-fixes]\n context: fresh\n", - "archon-ralph-dag": "name: archon-ralph-dag\ndescription: |\n Use when: User wants to run a Ralph implementation loop.\n Triggers: \"ralph\", \"run ralph\", \"ralph dag\", \"run ralph dag\".\n\n DAG workflow that:\n 1. Detects input: existing prd.json, existing prd.md (needs stories), or raw idea\n 2. Generates prd.md + prd.json if needed (explores codebase, breaks into stories)\n 3. Validates PRD files, reads project context, installs dependencies\n 4. Runs Ralph loop (fresh context per iteration) implementing one story per iteration\n 5. Creates PR and reports completion\n\n Accepts: An idea description, a path to an existing prd.md, or a directory with prd.md + prd.json\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # NODE 1: DETECT INPUT\n # Determines what the user provided: full PRD, partial PRD, or idea\n # ═══════════════════════════════════════════════════════════════\n\n - id: detect-input\n model: haiku\n prompt: |\n # Detect Ralph Input\n\n **User input**: $ARGUMENTS\n\n Determine what the user provided and prepare the PRD directory. Follow these steps exactly:\n\n ## Step 1: Detect worktree\n\n Run `git worktree list --porcelain` to check if you're in a worktree.\n If you see multiple entries, you ARE in a worktree. The first entry (the one without \"branch\" pointing to your current branch) is the **main repo root**. Save it — you'll need it to find files.\n\n ## Step 2: Classify the input\n\n Look at the user input above. It's one of three things:\n\n **Case A — Ralph directory path** (contains `.archon/ralph/`):\n Extract the directory. Check if both `prd.json` and `prd.md` exist there (try locally first, then in the main repo root if in a worktree).\n\n **Case B — File path** (ends in `.md`):\n This is an external PRD file. Find it:\n 1. Try the path as-is (relative to cwd)\n 2. Try it as an absolute path\n 3. If in a worktree, try it relative to the **main repo root** from Step 1\n Once found, read the file to confirm it's a PRD.\n\n **Case C — Free text**:\n Not a file path — it's a feature idea.\n\n ## Step 3: Auto-discover existing ralph PRDs\n\n If the input didn't point to a specific path, check if `.archon/ralph/` contains any `prd.json` files:\n ```bash\n find .archon/ralph -name \"prd.json\" -type f 2>/dev/null\n ```\n\n ## Step 4: Take action based on classification\n\n **If Case A and both files exist** → output `ready` (no further action needed)\n\n **If Case B (external PRD found)**:\n 1. Derive a kebab-case slug from the PRD filename or title (e.g., `workflow-lifecycle-overhaul`)\n 2. Create the ralph directory: `mkdir -p .archon/ralph/{slug}`\n 3. Copy the PRD content to `.archon/ralph/{slug}/prd.md`\n 4. Output `external_prd` with the new prd_dir\n\n **If Case C or auto-discovered ralph dir has prd.md but no prd.json** → output `needs_generation`\n\n ## Output\n\n Your final output MUST be exactly one JSON object:\n ```json\n {\"input_type\": \"ready|external_prd|needs_generation\", \"prd_dir\": \".archon/ralph/{slug}\"}\n ```\n output_format:\n type: object\n properties:\n input_type:\n type: string\n enum: [ready, external_prd, needs_generation]\n prd_dir:\n type: string\n required: [input_type, prd_dir]\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 2: GENERATE PRD\n # Scenario 1: User has an idea → generate prd.md + prd.json\n # Scenario 2: User has prd.md → generate prd.json with stories\n # Skipped if prd.json already exists\n # ═══════════════════════════════════════════════════════════════\n\n - id: generate-prd\n depends_on: [detect-input]\n when: \"$detect-input.output.input_type != 'ready'\"\n command: archon-ralph-generate\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 3: VALIDATE & SETUP\n # Finds PRD directory, reads all state files, installs deps,\n # verifies the environment is ready for implementation.\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate-prd\n depends_on: [detect-input, generate-prd]\n trigger_rule: one_success\n bash: |\n set -e\n\n # ── 1. Find PRD directory (passed from detect-input) ──────\n PRD_DIR=$detect-input.output.prd_dir\n\n # If detect-input didn't know the PRD dir (generated from scratch), discover it\n if [ -z \"$PRD_DIR\" ] || [ ! -f \"$PRD_DIR/prd.json\" ]; then\n FOUND=$(find .archon/ralph -name \"prd.json\" -type f 2>/dev/null | head -1)\n if [ -n \"$FOUND\" ]; then\n PRD_DIR=$(dirname \"$FOUND\")\n fi\n fi\n\n if [ -z \"$PRD_DIR\" ] || [ ! -f \"$PRD_DIR/prd.json\" ]; then\n echo \"ERROR: No prd.json found after generation step.\"\n echo \"Check the generate-prd node output for errors.\"\n exit 1\n fi\n\n if [ ! -f \"$PRD_DIR/prd.md\" ]; then\n echo \"ERROR: prd.md not found in $PRD_DIR\"\n exit 1\n fi\n\n # ── 2. Install dependencies (worktrees lack node_modules) ──\n if [ -f \"bun.lock\" ] || [ -f \"bun.lockb\" ]; then\n echo \"Installing dependencies (bun)...\"\n bun install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"package-lock.json\" ]; then\n echo \"Installing dependencies (npm)...\"\n npm ci 2>&1 | tail -3\n elif [ -f \"yarn.lock\" ]; then\n echo \"Installing dependencies (yarn)...\"\n yarn install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"pnpm-lock.yaml\" ]; then\n echo \"Installing dependencies (pnpm)...\"\n pnpm install --frozen-lockfile 2>&1 | tail -3\n fi\n\n # ── 3. Git state ──────────────────────────────────────────\n echo \"BRANCH=$(git branch --show-current)\"\n echo \"GIT_ROOT=$(git rev-parse --show-toplevel)\"\n\n # ── 4. Output PRD context ─────────────────────────────────\n echo \"PRD_DIR=$PRD_DIR\"\n echo \"=== PRD_JSON_START ===\"\n cat \"$PRD_DIR/prd.json\"\n echo \"\"\n echo \"=== PRD_JSON_END ===\"\n echo \"=== PRD_MD_START ===\"\n cat \"$PRD_DIR/prd.md\"\n echo \"\"\n echo \"=== PRD_MD_END ===\"\n echo \"=== PROGRESS_START ===\"\n if [ -f \"$PRD_DIR/progress.txt\" ]; then\n cat \"$PRD_DIR/progress.txt\"\n else\n echo \"(no progress yet)\"\n fi\n echo \"\"\n echo \"=== PROGRESS_END ===\"\n\n # ── 5. Summary ────────────────────────────────────────────\n TOTAL=$(grep -c '\"passes\"' \"$PRD_DIR/prd.json\" || true)\n DONE=$(grep -c '\"passes\": true' \"$PRD_DIR/prd.json\" || true)\n TOTAL=${TOTAL:-0}\n DONE=${DONE:-0}\n echo \"STORIES_TOTAL=$TOTAL\"\n echo \"STORIES_DONE=$DONE\"\n echo \"STORIES_REMAINING=$(( TOTAL - DONE ))\"\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 4: RALPH IMPLEMENTATION LOOP\n # Fresh context each iteration. Reads PRD state from disk.\n # One story per iteration. Validates before committing.\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement\n depends_on: [validate-prd]\n idle_timeout: 600000\n model: claude-opus-4-6[1m]\n loop:\n prompt: |\n # Ralph Agent — Autonomous Story Implementation\n\n You are an autonomous coding agent in a FRESH session — you have no memory of previous iterations.\n Your job: Read state from disk, implement ONE story, validate, commit, update tracking, exit.\n\n **Golden Rule**: If validation fails, fix it before committing. Never commit broken code. Never skip validation.\n\n ---\n\n ## Phase 0: CONTEXT — Load Project State\n\n The upstream setup node produced this context:\n\n $validate-prd.output\n\n **User message**: $USER_MESSAGE\n\n ---\n\n ### 0.1 Parse PRD Directory\n\n Extract the `PRD_DIR=...` line from the context above. This is the directory containing your PRD files.\n Store this path — use it for ALL file operations below.\n\n ### 0.2 Read Current State (from disk, not from context above)\n\n The context above is a snapshot from before the loop started. Previous iterations may have changed files.\n **You MUST re-read from disk to get the current state:**\n\n 1. **Read `{prd-dir}/progress.txt`** — your only link to previous iterations\n - Check the `## Codebase Patterns` section FIRST for learnings from prior iterations\n - Check recent entries for gotchas to avoid\n 2. **Read `{prd-dir}/prd.json`** — the source of truth for story completion state\n 3. **Read `{prd-dir}/prd.md`** — full requirements, technical patterns, acceptance criteria\n\n ### 0.3 Read Project Rules\n\n ```bash\n cat CLAUDE.md\n ```\n\n Note all coding standards, patterns, and rules. Follow them exactly.\n\n **PHASE_0_CHECKPOINT:**\n - [ ] PRD directory identified\n - [ ] progress.txt read (or noted as absent)\n - [ ] prd.json read — know which stories pass/fail\n - [ ] prd.md read — understand requirements\n - [ ] CLAUDE.md rules noted\n\n ---\n\n ## Phase 1: SELECT — Pick Next Story\n\n ### 1.1 Find Eligible Story\n\n From `prd.json`, find the **highest priority** story where:\n - `passes` is `false`\n - ALL stories in `dependsOn` have `passes: true`\n\n **If ALL stories have `passes: true`** → Skip to Phase 6 (Completion).\n\n **If no eligible stories exist** (all remaining are blocked):\n ```\n BLOCKED: No eligible stories. Remaining stories and their blockers:\n - {story-id}: blocked by {dep-id} (passes: false)\n ```\n End normally. The loop will terminate on max_iterations.\n\n ### 1.2 Announce Selection\n\n ```\n ── Story Selected ──────────────────────────────────\n ID: {story-id}\n Title: {story-title}\n Priority: {priority}\n Dependencies: {deps or \"none\"}\n\n Acceptance Criteria:\n - {criterion 1}\n - {criterion 2}\n - ...\n ────────────────────────────────────────────────────\n ```\n\n After announcing the selected story, emit the story started event:\n ```bash\n bun run cli workflow event emit --run-id $WORKFLOW_ID --type ralph_story_started --data '{\"story_id\":\"{story-id}\",\"title\":\"{story-title}\"}' || true\n ```\n\n **PHASE_1_CHECKPOINT:**\n - [ ] Eligible story found (or all complete / all blocked)\n - [ ] Acceptance criteria understood\n - [ ] Dependencies verified as complete\n\n ---\n\n ## Phase 2: IMPLEMENT — Code the Story\n\n ### 2.1 Explore Before Coding\n\n Before writing any code:\n 1. Read all files you plan to modify — understand current state\n 2. Check `## Codebase Patterns` in progress.txt for discovered patterns\n 3. Look for similar implementations in the codebase to mirror\n 4. Read the `technicalNotes` field from the story in prd.json\n\n ### 2.2 Implementation Rules\n\n **DO:**\n - Implement ONLY the selected story — one story per iteration\n - Follow existing code patterns exactly (naming, structure, imports, error handling)\n - Match the project's coding standards from CLAUDE.md\n - Write or update tests as required by acceptance criteria\n - Keep changes minimal and focused\n\n **DON'T:**\n - Refactor unrelated code\n - Add improvements not in the acceptance criteria\n - Change formatting of lines you didn't modify\n - Install new dependencies without justification from prd.md\n - Touch files unrelated to this story\n - Over-engineer — do the simplest thing that satisfies the criteria\n\n ### 2.3 Verify Types After Each File\n\n After modifying each file, run:\n ```bash\n bun run type-check\n ```\n\n **If types fail:**\n 1. Read the error carefully\n 2. Fix the type issue in your code\n 3. Re-run type-check\n 4. Do NOT proceed to the next file until types pass\n\n **PHASE_2_CHECKPOINT:**\n - [ ] Only the selected story was implemented\n - [ ] Types compile after each file change\n - [ ] Tests written/updated as needed\n - [ ] No unrelated changes\n\n ---\n\n ## Phase 3: VALIDATE — Full Verification\n\n ### 3.1 Static Analysis\n\n ```bash\n bun run type-check && bun run lint\n ```\n\n **Must pass with zero errors and zero warnings.**\n\n **If lint fails:**\n 1. Run `bun run lint:fix` for auto-fixable issues\n 2. Manually fix remaining issues\n 3. Re-run lint\n 4. Proceed only when clean\n\n ### 3.2 Tests\n\n ```bash\n bun run test\n ```\n\n **All tests must pass.**\n\n **If tests fail:**\n 1. Read the failure output\n 2. Determine: bug in your implementation or pre-existing failure?\n 3. If your bug → fix the implementation (not the test)\n 4. If pre-existing → note it but don't fix unrelated tests\n 5. Re-run tests\n 6. Repeat until green\n\n ### 3.3 Format Check\n\n ```bash\n bun run format:check\n ```\n\n **If formatting fails:**\n ```bash\n bun run format\n ```\n\n ### 3.4 Verify Acceptance Criteria\n\n Go through EACH acceptance criterion from the story:\n - Is it satisfied by your implementation?\n - Can you verify it (read the code, run a command, check a file)?\n\n If a criterion is NOT met, go back to Phase 2 and fix it.\n\n **PHASE_3_CHECKPOINT:**\n - [ ] Type-check passes\n - [ ] Lint passes (0 errors, 0 warnings)\n - [ ] All tests pass\n - [ ] Format is clean\n - [ ] Every acceptance criterion verified\n\n ---\n\n ## Phase 4: COMMIT — Save Changes\n\n ### 4.1 Review Staged Changes\n\n ```bash\n git add -A\n git status\n git diff --cached --stat\n ```\n\n Verify only expected files are staged. If unexpected files appear, investigate before committing.\n\n ### 4.2 Write Commit Message\n\n ```bash\n git commit -m \"$(cat <<'EOF'\n feat: {story-title}\n\n Implements {story-id} from PRD.\n\n Changes:\n - {change 1}\n - {change 2}\n - {change 3}\n EOF\n )\"\n ```\n\n **Commit message rules:**\n - Prefix: `feat:` for features, `fix:` for bugs, `refactor:` for refactors\n - Title: the story title (not the PRD name)\n - Body: list the actual changes made\n - Do NOT include AI attribution\n\n **PHASE_4_CHECKPOINT:**\n - [ ] Only expected files committed\n - [ ] Commit message is clear and accurate\n - [ ] Working directory is clean after commit\n\n ---\n\n ## Phase 5: TRACK — Update Progress Files\n\n ### 5.1 Update prd.json\n\n Set `passes: true` and add a note for the completed story:\n\n ```json\n {\n \"id\": \"{story-id}\",\n \"passes\": true,\n \"notes\": \"Implemented in iteration {N}. Files: {list}.\"\n }\n ```\n\n After updating prd.json, emit the story completed event:\n ```bash\n bun run cli workflow event emit --run-id $WORKFLOW_ID --type ralph_story_completed --data '{\"story_id\":\"{story-id}\",\"title\":\"{story-title}\"}' || true\n ```\n\n ### 5.2 Update progress.txt\n\n **Append** to `{prd-dir}/progress.txt`:\n\n ```\n ## {ISO Date} — {story-id}: {story-title}\n\n **Status**: PASSED\n **Files changed**:\n - {file1} — {what changed}\n - {file2} — {what changed}\n\n **Acceptance criteria verified**:\n - [x] {criterion 1}\n - [x] {criterion 2}\n\n **Learnings**:\n - {Any pattern discovered}\n - {Any gotcha encountered}\n - {Any deviation from expected approach}\n\n ---\n ```\n\n ### 5.3 Update Codebase Patterns (if applicable)\n\n If you discovered a **reusable pattern** that future iterations should know about, **prepend** it to the `## Codebase Patterns` section at the TOP of progress.txt.\n\n Format:\n ```\n ## Codebase Patterns\n\n ### {Pattern Name}\n - **Where**: `{file:lines}`\n - **Pattern**: {description}\n - **Example**: `{code snippet}`\n ```\n\n If the `## Codebase Patterns` section doesn't exist yet, create it at the top of the file.\n\n **PHASE_5_CHECKPOINT:**\n - [ ] prd.json updated with `passes: true`\n - [ ] progress.txt appended with iteration details\n - [ ] Codebase patterns updated (if applicable)\n\n ---\n\n ## Phase 6: COMPLETE — Check All Stories\n\n ### 6.1 Re-read prd.json\n\n ```bash\n cat {prd-dir}/prd.json\n ```\n\n Count stories where `passes: false`.\n\n ### 6.2 If ALL Stories Pass\n\n 1. **Push the branch:**\n ```bash\n git push -u origin HEAD\n ```\n\n 2. **Read the PR template:**\n Look for a PR template in the repo — check `.github/pull_request_template.md`, `.github/PULL_REQUEST_TEMPLATE.md`, and `docs/pull_request_template.md`. Read whichever one exists.\n\n If a template was found, fill in **every section** using the context from this implementation. Don't skip sections or leave placeholders — fill them honestly based on the actual changes (summary, architecture, validation evidence, security, compatibility, rollback, etc.).\n\n If no template was found, write a summary with: problem, what changed, stories table, and validation evidence.\n\n 3. **Create a draft PR** using `gh pr create --draft --base $BASE_BRANCH --title \"feat: {PRD feature name}\"` with the filled-in template as the body. Use a HEREDOC for the body.\n\n 4. **Output completion signal:**\n ```\n COMPLETE\n ```\n\n ### 6.3 If Stories Remain\n\n Report status and end normally:\n ```\n ── Iteration Complete ──────────────────────────────\n Story completed: {story-id} — {story-title}\n Stories remaining: {count}\n Next eligible: {next-story-id} — {next-story-title}\n ────────────────────────────────────────────────────\n ```\n\n The loop engine will start the next iteration with a fresh context.\n\n ---\n\n ## Handling Edge Cases\n\n ### Validation fails repeatedly\n - If type-check or tests fail 3+ times on the same error, step back\n - Re-read the acceptance criteria — you may be misunderstanding the requirement\n - Check if the story is too large (needs breaking down)\n - Note the blocker in progress.txt and end the iteration\n\n ### Story is too large for one iteration\n - Implement the minimum viable subset that satisfies the most critical acceptance criteria\n - Set `passes: true` only if ALL criteria are met\n - If you can't meet all criteria, leave `passes: false` and note what's done in progress.txt\n - The next iteration will pick it up and continue\n\n ### Pre-existing test failures\n - If tests were failing BEFORE your changes, note them but don't fix unrelated code\n - Run only the test files related to your changes if the full suite has pre-existing issues\n - Document pre-existing failures in progress.txt\n\n ### Dependency install fails\n - Check if `bun.lock` or equivalent exists\n - Try `bun install` without `--frozen-lockfile`\n - Note the issue in progress.txt\n\n ### Git state is dirty at iteration start\n - This shouldn't happen (fresh worktree), but if it does:\n - Run `git status` to understand what's dirty\n - If it's leftover from a failed previous iteration, commit or stash\n - Never discard changes silently\n\n ### Blocked stories — all remaining have unmet dependencies\n - Report the dependency chain in your output\n - Check if a dependency was incorrectly left as `passes: false`\n - If a dependency should be `passes: true` (the code exists and works), fix prd.json\n - Otherwise, end the iteration — the loop will exhaust max_iterations\n\n ---\n\n ## File Format Reference\n\n ### prd.json Schema\n\n ```json\n {\n \"feature\": \"Feature Name\",\n \"issueNumber\": 123,\n \"userStories\": [\n {\n \"id\": \"US-001\",\n \"title\": \"Short title\",\n \"description\": \"As a..., I want..., so that...\",\n \"acceptanceCriteria\": [\"criterion 1\", \"criterion 2\"],\n \"technicalNotes\": \"Implementation hints\",\n \"dependsOn\": [\"US-000\"],\n \"priority\": 1,\n \"passes\": false,\n \"notes\": \"\"\n }\n ]\n }\n ```\n\n ### progress.txt Format\n\n ```\n ## Codebase Patterns\n\n ### {Pattern Name}\n - Where: `file:lines`\n - Pattern: description\n - Example: `code`\n\n ---\n\n ## {Date} — {story-id}: {title}\n\n **Status**: PASSED\n **Files changed**: ...\n **Acceptance criteria verified**: ...\n **Learnings**: ...\n\n ---\n ```\n\n ---\n\n ## Success Criteria\n\n - **ONE_STORY**: Exactly one story implemented per iteration\n - **VALIDATED**: Type-check + lint + tests + format all pass before commit\n - **COMMITTED**: Changes committed with clear message\n - **TRACKED**: prd.json and progress.txt updated accurately\n - **PATTERNS_SHARED**: Discovered patterns added to progress.txt for future iterations\n - **NO_SCOPE_CREEP**: No unrelated changes, no refactoring, no \"improvements\"\n until: COMPLETE\n max_iterations: 15\n fresh_context: true\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 5: COMPLETION REPORT\n # Reads final state and produces a summary.\n # ═══════════════════════════════════════════════════════════════\n\n - id: report\n depends_on: [implement]\n prompt: |\n # Completion Report\n\n The Ralph implementation loop has finished. Generate a completion report.\n\n ## Context\n\n **Loop output (last iteration):**\n\n $implement.output\n\n **Setup context:**\n\n $validate-prd.output\n\n ---\n\n ## Instructions\n\n ### 1. Read Final State\n\n Extract the `PRD_DIR=...` from the setup context above.\n Read the CURRENT files from disk:\n\n ```bash\n cat {prd-dir}/prd.json\n cat {prd-dir}/progress.txt\n ```\n\n ### 2. Gather Git Info\n\n ```bash\n git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD\n git diff --stat $(git merge-base HEAD $BASE_BRANCH)..HEAD\n ```\n\n ### 3. Check PR Status\n\n ```bash\n gh pr view HEAD --json url,number,state 2>/dev/null || echo \"No PR found\"\n ```\n\n ### 4. Generate Report\n\n Output this format:\n\n ```\n ═══════════════════════════════════════════════════════\n RALPH DAG — COMPLETION REPORT\n ═══════════════════════════════════════════════════════\n\n Feature: {feature name from prd.json}\n PRD: {prd-dir}\n Branch: {branch name}\n PR: {url or \"not created\"}\n\n ── Stories ─────────────────────────────────────────\n\n | ID | Title | Status |\n |----|-------|--------|\n {for each story from prd.json}\n\n Total: {N}/{M} stories passing\n\n ── Commits ─────────────────────────────────────────\n\n {git log output}\n\n ── Files Changed ─────────────────────────────────\n\n {git diff --stat output}\n\n ── Patterns Discovered ─────────────────────────────\n\n {from ## Codebase Patterns in progress.txt, or \"None\"}\n\n ═══════════════════════════════════════════════════════\n ```\n\n Keep it factual. No commentary — just the data.\n", - "archon-refactor-safely": "name: archon-refactor-safely\ndescription: |\n Use when: User wants to refactor code safely with continuous validation and behavior preservation.\n Triggers: \"refactor\", \"refactor safely\", \"split this file\", \"extract module\", \"break up\",\n \"decompose\", \"safe refactor\", \"split file\", \"extract into modules\".\n Does: Scans refactoring scope -> analyzes impact (read-only) -> plans ordered task list ->\n executes with type-check hooks after every edit -> validates full suite ->\n verifies behavior preservation (read-only) -> creates PR with before/after comparison.\n NOT for: Bug fixes (use archon-fix-github-issue), feature development (use archon-feature-development),\n general architecture sweeps (use archon-architect), PR reviews.\n\n Key safety features:\n - Analysis and verification nodes are read-only (denied_tools: [Write, Edit, Bash])\n - PreToolUse hooks check if each edit is in the plan\n - PostToolUse hooks force type-check after every file change\n - Behavior verification confirms no logic changes after refactoring\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: SCAN — Find files matching the refactoring target\n # ═══════════════════════════════════════════════════════════════\n\n - id: scan-scope\n bash: |\n echo \"=== REFACTORING TARGET ===\"\n echo \"User request: $ARGUMENTS\"\n echo \"\"\n\n echo \"=== FILE SIZE ANALYSIS (source files by size) ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts' \\\n -exec wc -l {} + 2>/dev/null | sort -rn | head -30\n echo \"\"\n\n echo \"=== FILES OVER 500 LINES ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts' \\\n -exec sh -c 'lines=$(wc -l < \"$1\"); if [ \"$lines\" -gt 500 ]; then echo \"$lines $1\"; fi' _ {} \\; 2>/dev/null | sort -rn\n echo \"\"\n\n echo \"=== FUNCTION COUNT PER FILE (top 20) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts'); do\n count=$(grep -cE '^\\s*(export\\s+)?(async\\s+)?function\\s|=>\\s*\\{' \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count functions: $f\"\n fi\n done | sort -rn | head -20\n echo \"\"\n\n echo \"=== EXPORT ANALYSIS (files with many exports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts'); do\n count=$(grep -c \"^export \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count exports: $f\"\n fi\n done | sort -rn | head -20\n timeout: 60000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: ANALYZE IMPACT — Read-only deep analysis\n # Maps call sites, identifies risk areas, understands dependencies\n # ═══════════════════════════════════════════════════════════════\n\n - id: analyze-impact\n prompt: |\n You are a senior software engineer analyzing code for a safe refactoring.\n\n ## Refactoring Request\n\n $ARGUMENTS\n\n ## Codebase Scan Results\n\n $scan-scope.output\n\n ## Instructions\n\n 1. Identify the PRIMARY file(s) targeted for refactoring based on the user's request\n and the scan results above\n 2. Read each target file thoroughly — understand every function, type, and export\n 3. For each target file, map ALL call sites:\n - Use Grep to find every import of the target file across the codebase\n - Track which specific exports are used and where\n - Note any dynamic imports or re-exports through index files\n 4. Identify risk areas:\n - Functions with complex internal dependencies (shared closures, module-level state)\n - Circular dependencies between functions in the file\n - Any module-level side effects (top-level `const`, initialization code)\n - Exports that are part of the public API vs internal-only\n 5. Check for existing tests:\n - Find test files for the target module(s)\n - Note what's tested and what isn't\n\n ## Output\n\n Write a thorough impact analysis to `$ARTIFACTS_DIR/impact-analysis.md` with:\n\n ### Target Files\n - File path, line count, function count\n - List of all exported symbols with brief descriptions\n\n ### Dependency Map\n - Which files import from the target (with specific imports used)\n - Which files the target imports from\n\n ### Risk Assessment\n - Module-level state or side effects\n - Complex internal dependencies between functions\n - Public API surface that must be preserved exactly\n\n ### Test Coverage\n - Existing test files and what they cover\n - Critical paths that must remain tested\n\n ### Recommended Decomposition Strategy\n - Suggested module boundaries (which functions group together)\n - Rationale for each grouping (cohesion, shared dependencies)\n depends_on: [scan-scope]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: PLAN REFACTOR — Ordered task list with rollback strategy\n # Read-only: produces the plan, does not execute it\n # ═══════════════════════════════════════════════════════════════\n\n - id: plan-refactor\n prompt: |\n You are planning a safe refactoring. You must produce a precise, ordered plan\n that another agent will follow literally.\n\n ## Impact Analysis\n\n $analyze-impact.output\n\n ## Refactoring Goal\n\n $ARGUMENTS\n\n ## Principles\n\n - **Behavior preservation**: The refactoring must NOT change any behavior — only structure\n - **Incremental**: Each step must leave the codebase in a compilable state\n - **Reversible**: Each step can be independently reverted\n - **No mixed concerns**: Do not combine refactoring with bug fixes or improvements\n - **Preserve public API**: All existing exports must remain accessible from the same import paths\n - **Maximum file size**: Target 500 lines or fewer per file after refactoring\n\n ## Instructions\n\n 1. Read the impact analysis from `$ARTIFACTS_DIR/impact-analysis.md`\n 2. Read the target file(s) to understand the current structure\n 3. Design the decomposition:\n - Group related functions into cohesive modules\n - Identify shared utilities, types, and constants\n - Plan the new file structure with descriptive names\n 4. Write an ordered task list where each task is:\n - Independent and leaves code compilable after completion\n - Specific about what to extract and where\n - Clear about import updates needed\n\n ## Output\n\n Write the plan to `$ARTIFACTS_DIR/refactor-plan.md` with:\n\n ### File Structure (Before)\n ```\n [current structure with line counts]\n ```\n\n ### File Structure (After)\n ```\n [planned structure with estimated line counts]\n ```\n\n ### Ordered Tasks\n\n For each task:\n ```\n ## Task N: [brief description]\n\n **Action**: CREATE | EXTRACT | UPDATE\n **Source**: [source file]\n **Target**: [target file]\n **What moves**:\n - function functionName (lines X-Y)\n - type TypeName (lines X-Y)\n\n **Import updates needed**:\n - [file]: change import from [old] to [new]\n\n **Rollback**: [how to undo this specific step]\n ```\n\n ### Validation Commands\n - Type check: `bun run type-check`\n - Lint: `bun run lint`\n - Tests: `bun run test`\n - Format: `bun run format:check`\n depends_on: [analyze-impact]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: EXECUTE REFACTOR — Implements the plan with guardrails\n # Hooks enforce type-check after every edit and plan adherence\n # ═══════════════════════════════════════════════════════════════\n\n - id: execute-refactor\n model: claude-opus-4-6[1m]\n prompt: |\n You are executing a refactoring plan with strict safety guardrails.\n\n ## Plan\n\n Read the full plan from `$ARTIFACTS_DIR/refactor-plan.md` — follow it LITERALLY.\n\n ## Rules\n\n - **Follow the plan exactly** — do not add extra improvements or cleanups\n - **One task at a time** — complete each task fully before starting the next\n - **Type-check after every file change** — you'll be prompted to do this after each edit\n - **Preserve all behavior** — refactoring means moving code, not changing it\n - **Preserve the public API** — if the original file exported something, it must still be\n importable from the same path (use re-exports in the original file if needed)\n - **Update all import sites** — every file that imported from the original must be updated\n - **Commit after each logical task** — one commit per plan task with a clear message\n\n ## Process for Each Task\n\n 1. Read the plan task\n 2. Read the source file to understand current state\n 3. Create the new file (if extracting) with the functions/types being moved\n 4. Update the source file to remove the moved code and add imports from the new file\n 5. Update the original file's exports to re-export from the new module (API preservation)\n 6. Use Grep to find and update ALL import sites across the codebase\n 7. Run `bun run type-check` to verify (you'll be reminded by hooks)\n 8. Commit: `git add -A && git commit -m \"refactor: [task description]\"`\n 9. Move to next task\n\n ## Handling Problems\n\n - If type-check fails after a change: fix it immediately before proceeding\n - If a task is more complex than planned: complete it anyway, note the deviation\n - If you discover the plan missed an import site: update it and note it\n - NEVER skip a task — complete them in order\n depends_on: [plan-refactor]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n Before modifying this file: Is this file in your refactoring plan\n ($ARTIFACTS_DIR/refactor-plan.md)? If it's not a planned target file\n AND not a file that imports from the target, explain why you're touching it.\n Unplanned changes increase risk.\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just modified a file. STOP and do these things NOW before making any\n other changes:\n 1. Run `bun run type-check` to verify the change compiles\n 2. If type-check fails, fix the error immediately\n 3. Verify you preserved the exact same behavior — no logic changes, only structural moves\n Only proceed to the next change after type-check passes.\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Check the exit code. If type-check or any validation failed, fix the issue\n before continuing. Do not accumulate broken state.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE — Full test suite (bash, no AI escape hatch)\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n bash: |\n echo \"=== TYPE CHECK ===\"\n bun run type-check 2>&1\n TC_EXIT=$?\n\n echo \"\"\n echo \"=== LINT ===\"\n bun run lint 2>&1\n LINT_EXIT=$?\n\n echo \"\"\n echo \"=== FORMAT CHECK ===\"\n bun run format:check 2>&1\n FMT_EXIT=$?\n\n echo \"\"\n echo \"=== TESTS ===\"\n bun run test 2>&1\n TEST_EXIT=$?\n\n echo \"\"\n echo \"=== FILE SIZE CHECK ===\"\n echo \"Files still over 500 lines:\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts' \\\n -exec sh -c 'lines=$(wc -l < \"$1\"); if [ \"$lines\" -gt 500 ]; then echo \"$lines $1\"; fi' _ {} \\; 2>/dev/null | sort -rn\n echo \"\"\n\n echo \"=== RESULTS ===\"\n echo \"Type check: $([ $TC_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Lint: $([ $LINT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Format: $([ $FMT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Tests: $([ $TEST_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n\n if [ $TC_EXIT -eq 0 ] && [ $LINT_EXIT -eq 0 ] && [ $FMT_EXIT -eq 0 ] && [ $TEST_EXIT -eq 0 ]; then\n echo \"VALIDATION_STATUS: PASS\"\n else\n echo \"VALIDATION_STATUS: FAIL\"\n fi\n depends_on: [execute-refactor]\n timeout: 300000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: FIX VALIDATION FAILURES (if any)\n # Only does real work if validation failed\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-failures\n prompt: |\n Review the validation output below.\n\n ## Validation Output\n\n $validate.output\n\n ## Instructions\n\n If the output ends with \"VALIDATION_STATUS: PASS\", respond with\n \"All checks passed — no fixes needed.\" and stop.\n\n If there are failures:\n\n 1. Read the validation failures carefully\n 2. Fix ONLY what's broken — do not make additional improvements\n 3. If a fix requires changing behavior (not just fixing a type/lint error),\n revert the original change instead\n 4. Run the specific failing check after each fix to confirm it passes\n 5. After all fixes, run the full validation suite: `bun run validate`\n\n If there are files still over 500 lines, note them but do NOT attempt further\n splitting in this node — that would require a new plan cycle.\n depends_on: [validate]\n context: fresh\n hooks:\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just made a fix. Run the specific failing validation check NOW\n to verify your fix works. Do not batch fixes — verify each one.\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n You are fixing validation failures only. Do not make any changes\n beyond what's needed to pass the failing checks. If in doubt, revert\n the original change that caused the failure.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: VERIFY BEHAVIOR — Read-only confirmation\n # Ensures the refactoring preserved behavior by tracing call paths\n # ═══════════════════════════════════════════════════════════════\n\n - id: verify-behavior\n prompt: |\n You are a code reviewer verifying that a refactoring preserved exact behavior.\n You can ONLY read files — you cannot make any changes.\n\n ## Refactoring Plan\n\n Read the plan from `$ARTIFACTS_DIR/refactor-plan.md` to understand what was intended.\n\n ## Instructions\n\n 1. Use Grep and Glob to find all files in the new module locations listed in\n the plan, then Read each one. (Note: Bash is denied in this read-only node,\n so use Grep/Glob/Read to discover changes instead of git commands.)\n 2. For each new file created by the refactoring:\n - Verify the extracted functions match the originals exactly (no logic changes)\n - Check that all types and interfaces are preserved\n 3. For the original file(s):\n - Verify re-exports exist for all symbols that were previously exported\n - Confirm no function bodies were changed (only moved)\n 4. For all import sites updated:\n - Verify imports resolve to the correct new locations\n - Check that no import was missed\n 5. Verify the public API is preserved:\n - Any code that imported from the original file should still work unchanged\n - Re-exports in the original file should cover all moved symbols\n\n ## Output\n\n Write your verification report to `$ARTIFACTS_DIR/behavior-verification.md`:\n\n ### Verdict: PASS | FAIL\n\n ### Functions Verified\n | Function | Original Location | New Location | Behavior Preserved |\n |----------|------------------|--------------|-------------------|\n | funcName | file.ts:42 | new-file.ts:10 | Yes/No |\n\n ### Public API Check\n - [ ] All original exports still accessible from original import path\n - [ ] Re-exports correctly configured\n\n ### Import Sites Updated\n - [ ] All N import sites verified\n\n ### Issues Found\n [List any behavior changes detected, or \"None — refactoring is behavior-preserving\"]\n depends_on: [fix-failures]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 8: CREATE PR — Detailed description with before/after\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a pull request for the refactoring.\n\n ## Context\n\n - **Refactoring goal**: $ARGUMENTS\n - **Impact analysis**: Read `$ARTIFACTS_DIR/impact-analysis.md`\n - **Refactoring plan**: Read `$ARTIFACTS_DIR/refactor-plan.md`\n - **Validation**: $validate.output\n - **Behavior verification**: Read `$ARTIFACTS_DIR/behavior-verification.md`\n\n ## Instructions\n\n 1. Stage all changes and create a final commit if there are uncommitted changes\n 2. Push the branch: `git push -u origin HEAD`\n 3. Check if a PR already exists: `gh pr list --head $(git branch --show-current)`\n 4. Create the PR with the format below\n 5. Save the PR URL to `$ARTIFACTS_DIR/.pr-url`\n\n ## PR Format\n\n - **Title**: `refactor: [concise description]` (under 70 chars)\n - **Body**:\n\n ```markdown\n ## Refactoring: [goal]\n\n ### Motivation\n\n [Why this refactoring was needed — file sizes, complexity, maintainability]\n\n ### Before\n\n ```\n [Original file structure with line counts from the plan]\n ```\n\n ### After\n\n ```\n [New file structure with line counts]\n ```\n\n ### Changes\n\n [For each new module: what was extracted and why it's a cohesive unit]\n\n ### Safety\n\n - [x] Type check passes\n - [x] Lint passes\n - [x] Tests pass (all existing tests still green)\n - [x] Public API preserved (re-exports maintain backward compatibility)\n - [x] Behavior verification passed (read-only audit confirmed no logic changes)\n - [x] Each task committed separately for easy review/revert\n\n ### Review Guide\n\n Each commit represents one extraction step. Review commits individually for easiest review.\n All commits are behavior-preserving structural moves.\n ```\n depends_on: [verify-behavior]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n permissionDecision: deny\n permissionDecisionReason: \"PR creation node — do not modify source files. Use only git and gh commands.\"\n PostToolUse:\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Verify this command succeeded. If git push or gh pr create failed,\n read the error message carefully before retrying.\n", + "archon-piv-loop": "name: archon-piv-loop\ndescription: |\n Use when: User wants guided Plan-Implement-Validate development with human-in-the-loop.\n Triggers: \"piv\", \"piv loop\", \"plan implement validate\", \"guided development\",\n \"structured development\", \"build a feature\", \"develop with review\".\n NOT for: Autonomous implementation without planning (use archon-feature-development).\n NOT for: PRD creation (use archon-interactive-prd).\n NOT for: Ralph story-based implementation (use archon-ralph-dag).\n\n Interactive PIV loop workflow — the foundational AI coding methodology:\n 1. EXPLORE: Iterative conversation with human to understand the problem (arbitrary rounds)\n 2. PLAN: Create structured plan -> iterative review & revision (arbitrary rounds)\n 3. IMPLEMENT: Autonomous task-by-task implementation from plan (Ralph loop)\n 4. VALIDATE: Automated code review -> iterative human feedback & fixes (arbitrary rounds)\n\n The PIV loop comes AFTER a PRD exists. Each PIV loop focuses on ONE granular feature or bug fix.\n Input: A description of what to build, a path to an existing plan, or a GitHub issue number.\n\nprovider: claude\ninteractive: true\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: EXPLORE — Iterative exploration with human\n # Understand the idea, explore the codebase, converge on approach\n # Loops until the user says they're ready to create the plan.\n # ═══════════════════════════════════════════════════════════════\n\n - id: explore\n loop:\n prompt: |\n # PIV Loop — Exploration\n\n You are a senior engineering partner in an iterative exploration session.\n Your goal: DEEPLY UNDERSTAND what to build before any code is written.\n\n **User's request**: $ARGUMENTS\n **User's latest input**: $LOOP_USER_INPUT\n\n ---\n\n ## If this is the FIRST iteration (no user input yet):\n\n ### Step 1: Parse the Input\n\n Determine what the user provided:\n\n **If it's a file path** (ends in `.md`, `.plan.md`, or `.prd.md`):\n - Read the file\n - If it's an existing plan → summarize it and ask if they want to refine or proceed\n - If it's a PRD → identify the specific phase/feature to focus on\n\n **If it's a GitHub issue** (`#123` format):\n - Fetch it: `gh issue view {number} --json title,body,labels,comments`\n - Summarize the issue context\n\n **If it's free text**:\n - This is a feature idea or bug description. Use it directly.\n\n ### Step 2: Explore the Codebase\n\n Before asking questions, DO YOUR HOMEWORK:\n\n 1. **Read CLAUDE.md** — understand project conventions, architecture, and constraints\n 2. **Search for related code** — find existing implementations similar to what the user wants\n 3. **Read key files** — understand the current state of code the user wants to change\n 4. **Check recent git history** — `git log --oneline -20` for recent changes in the area\n\n ### Step 3: Present Your Understanding\n\n ```\n ## What I Understand\n\n You want to: {restated understanding in 2-3 sentences}\n\n ## What Already Exists\n\n - {file:line} — {what it does and how it relates}\n - {file:line} — {what it does and how it relates}\n - {pattern/component} — {how it could be extended or reused}\n\n ## Initial Architecture Thoughts\n\n Based on what exists, I'm thinking:\n - {approach 1 — extend existing X}\n - {approach 2 — if approach 1 doesn't work}\n - {key architectural decision that needs your input}\n ```\n\n ### Step 4: Ask Targeted Questions\n\n Ask 4-6 questions focused on DECISIONS, not information gathering:\n - Scope boundaries, architecture preferences, tech decisions\n - Constraints, existing code extension vs fresh build, testing expectations\n - Reference actual code you found — don't ask generic questions\n\n ---\n\n ## If the user has provided input (subsequent iterations):\n\n ### Step 1: Process Their Response\n\n Read their answers carefully. Identify:\n - Decisions they've made\n - Areas they want you to explore further\n - Questions they asked YOU back (answer these with evidence!)\n\n ### Step 2: Do Targeted Research\n\n Based on their response:\n - If they mentioned specific technologies → research best practices\n - If they pointed you to specific code → read it thoroughly\n - If they asked you to explore an area → do a thorough investigation\n - If they made architecture decisions → validate against the codebase\n\n ### Step 3: Present Updated Understanding\n\n Show what you learned, answer their questions with file:line references,\n and present your refined architecture recommendation.\n\n ### Step 4: Converge or Continue\n\n **If there are still important open questions:**\n Ask 2-4 focused questions about remaining ambiguities.\n\n **If the picture is clear and you have enough to create a plan:**\n Present a final implementation summary:\n\n ```\n ## Implementation Summary\n\n ### What We're Building\n {Clear, specific description}\n\n ### Scope Boundary\n - IN: {what's included}\n - OUT: {what's explicitly excluded}\n\n ### Architecture\n - {key decisions}\n\n ### Files That Will Change\n - `{file}` — {what changes and why}\n\n ### Success Criteria\n - [ ] {specific, testable criterion}\n - [ ] All validation passes\n\n ### Key Risks\n - {risk — and mitigation}\n ```\n\n Then tell the user: \"I have a clear picture. Say **ready** and I'll create\n the structured implementation plan, or share any final thoughts.\"\n\n **CRITICAL — READ THIS CAREFULLY**:\n - NEVER output PLAN_READY unless the user's LATEST message contains\n an EXPLICIT phrase like \"ready\", \"create the plan\", \"let's go\", \"proceed\", or \"I'm done\".\n - If the user asked a question → do NOT emit the signal. Answer the question.\n - If the user gave feedback or requested changes → do NOT emit the signal. Address it.\n - If the user said \"also check X\" or \"one more thing\" → do NOT emit the signal. Explore it.\n - If you are unsure whether the user is approving → do NOT emit the signal. Ask them.\n - The ONLY correct time to emit the signal is when the user's message CLEARLY means\n \"stop exploring, I'm ready for you to create the plan.\"\n until: PLAN_READY\n max_iterations: 15\n interactive: true\n gate_message: |\n Answer the questions above, ask me to explore specific areas,\n or say \"ready\" when you're satisfied with the exploration.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: PLAN — Create the structured implementation plan\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-plan\n model: sonnet\n depends_on: [explore]\n context: fresh\n prompt: |\n # PIV Loop — Create Structured Plan\n\n You are creating a structured implementation plan from a completed exploration phase.\n This plan will be the SOLE GUIDE for the implementation agent — it must be complete,\n specific, and actionable.\n\n **Original request**: $ARGUMENTS\n **Final exploration summary**: $explore.output\n\n ---\n\n ## Step 1: Read the Codebase (Again)\n\n Before writing the plan, verify your understanding is current:\n\n 1. **Read CLAUDE.md** — capture all relevant conventions\n 2. **Read every file you plan to change** — note exact current state\n 3. **Read example test files** — understand testing patterns\n 4. **Check for any recent changes** — `git log --oneline -10`\n\n ## Step 2: Plan File Location\n\n Save the plan to `$ARTIFACTS_DIR/plan.md`.\n The directory already exists (pre-created by the workflow executor).\n\n ## Step 3: Write the Plan\n\n Use this template. Fill EVERY section with specific, verified information.\n\n ```markdown\n # Feature: {Title}\n\n ## Summary\n {1-2 sentences: what changes and why}\n\n ## Mission\n {The core goal in one clear statement}\n\n ## Success Criteria\n - [ ] {Specific, testable criterion}\n - [ ] All validation passes (`bun run validate` or equivalent)\n - [ ] No regressions in existing tests\n\n ## Scope\n ### In Scope\n - {What we ARE building}\n ### Out of Scope\n - {What we are NOT building — and why}\n\n ## Codebase Context\n ### Key Files\n | File | Role | Action |\n |------|------|--------|\n | `{path}` | {what it does} | CREATE / UPDATE |\n\n ### Patterns to Follow\n {Actual code snippets from the codebase to mirror}\n\n ## Architecture\n - {Decision 1 — with rationale}\n - {Decision 2 — with rationale}\n\n ## Task List\n Execute in order. Each task is atomic and independently verifiable.\n\n ### Task 1: {ACTION} `{file path}`\n **Action**: CREATE / UPDATE\n **Details**: {Exact changes — specific enough for an agent with no context}\n **Pattern**: Follow `{source file}:{lines}`\n **Validate**: `{command to verify this task}`\n\n ## Testing Strategy\n | Test File | Test Cases | Validates |\n |-----------|-----------|-----------|\n | `{path}` | {cases} | {what it validates} |\n\n ## Validation Commands\n 1. Type check: `{command}`\n 2. Lint: `{command}`\n 3. Tests: `{command}`\n 4. Full validation: `{command}`\n\n ## Risks\n | Risk | Impact | Mitigation |\n |------|--------|------------|\n | {risk} | {HIGH/MED/LOW} | {specific mitigation} |\n ```\n\n ## Step 4: Verify the Plan\n\n 1. Check every file path referenced — verify they exist\n 2. Check every pattern cited — verify the code matches\n 3. Check task ordering — ensure dependencies are respected\n 4. Check completeness — could an agent with NO context implement this?\n\n ## Step 5: Report\n\n ```\n ## Plan Created\n\n **File**: `$ARTIFACTS_DIR/plan.md`\n **Tasks**: {count}\n **Files to change**: {count}\n\n Key decisions:\n - {decision 1}\n - {decision 2}\n\n Please review the plan and provide feedback.\n ```\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2b: PLAN — Iterative plan refinement\n # Review and revise the plan as many times as needed.\n # ═══════════════════════════════════════════════════════════════\n\n - id: refine-plan\n depends_on: [create-plan]\n loop:\n prompt: |\n # PIV Loop — Plan Refinement\n\n The user is reviewing the implementation plan and providing feedback.\n\n **User's feedback**: $LOOP_USER_INPUT\n\n ---\n\n ## Step 1: Read the Plan\n\n Read `$ARTIFACTS_DIR/plan.md` and CLAUDE.md for conventions.\n\n ## Step 2: Process Feedback\n\n **If there is no user feedback yet** (first iteration, $LOOP_USER_INPUT is empty):\n - Read the plan carefully\n - Present a summary of the plan's key decisions and task list\n - Ask the user to review and provide feedback\n - Do NOT emit the completion signal on the first iteration\n\n **If the user EXPLICITLY approved** (said \"approved\", \"looks good\", \"let's go\", etc.):\n - Make no changes\n - Output: \"Plan approved. Proceeding to implementation.\"\n - Signal completion: PLAN_APPROVED\n\n **If the user provided specific feedback:**\n - Parse each piece of feedback\n - Edit the plan file directly:\n - Add/remove/modify tasks as requested\n - Update success criteria if needed\n - Adjust testing strategy if needed\n - Re-verify file paths and patterns after changes\n\n **CRITICAL**: NEVER emit PLAN_APPROVED unless the user's latest\n message EXPLICITLY says \"approved\", \"looks good\", \"ship it\", or similar approval.\n Questions, feedback, and requests for changes are NOT approval.\n\n ## Step 3: Show Changes\n\n ```\n ## Plan Revised\n\n Changes made:\n - {change 1}\n - {change 2}\n\n Updated stats:\n - Tasks: {count}\n - Files to change: {count}\n\n Review the updated plan and provide more feedback, or say \"approved\" to proceed.\n ```\n until: PLAN_APPROVED\n max_iterations: 10\n interactive: true\n gate_message: |\n Review the plan document. Provide specific feedback on what to change,\n or say \"approved\" to begin implementation.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: IMPLEMENT — Setup\n # Read the plan, prepare the environment\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement-setup\n depends_on: [refine-plan]\n bash: |\n set -e\n\n PLAN_FILE=\"$ARTIFACTS_DIR/plan.md\"\n\n if [ ! -f \"$PLAN_FILE\" ]; then\n echo \"ERROR: No plan file found at $ARTIFACTS_DIR/plan.md\"\n exit 1\n fi\n\n # Install dependencies if needed\n if [ -f \"bun.lock\" ] || [ -f \"bun.lockb\" ]; then\n echo \"Installing dependencies...\"\n bun install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"package-lock.json\" ]; then\n npm ci 2>&1 | tail -3\n elif [ -f \"yarn.lock\" ]; then\n yarn install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"pnpm-lock.yaml\" ]; then\n pnpm install --frozen-lockfile 2>&1 | tail -3\n fi\n\n echo \"BRANCH=$(git branch --show-current)\"\n echo \"GIT_ROOT=$(git rev-parse --show-toplevel)\"\n echo \"PLAN_FILE=$PLAN_FILE\"\n\n echo \"=== PLAN_START ===\"\n cat \"$PLAN_FILE\"\n echo \"\"\n echo \"=== PLAN_END ===\"\n\n TASK_COUNT=$(grep -c \"^### Task [0-9]\" \"$PLAN_FILE\" 2>/dev/null || echo \"0\")\n if [ \"$TASK_COUNT\" -eq 0 ]; then\n echo \"ERROR: No '### Task N:' sections found in $PLAN_FILE. Plan may be malformed.\"\n exit 1\n fi\n echo \"TASK_COUNT=${TASK_COUNT}\"\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3b: IMPLEMENT — Task-by-Task Loop (Ralph pattern)\n # Fresh context each iteration. Reads plan from disk.\n # One task per iteration. Validates before committing.\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement\n depends_on: [implement-setup]\n idle_timeout: 600000\n model: claude-opus-4-6[1m]\n loop:\n prompt: |\n # PIV Loop — Implementation Agent\n\n You are an autonomous coding agent in a FRESH session — no memory of previous iterations.\n Your job: Read the plan from disk, implement ONE task, validate, commit, update tracking, exit.\n\n **Golden Rule**: If validation fails, fix it before committing. Never commit broken code.\n\n ---\n\n ## Phase 0: CONTEXT — Load State\n\n The setup node produced this context:\n\n $implement-setup.output\n\n **User's original request**: $USER_MESSAGE\n\n ---\n\n ### 0.1 Parse Plan File\n\n Extract the `PLAN_FILE=...` line from the context above.\n\n ### 0.2 Read Current State (from disk — not from context above)\n\n The context above is a snapshot from before the loop started. Previous iterations\n may have changed things. **You MUST re-read from disk:**\n\n 1. **Read the plan file** — your implementation guide\n 2. **Read progress tracking** — check if `$ARTIFACTS_DIR/progress.txt` exists\n 3. **Read CLAUDE.md** — project conventions and constraints\n\n ### 0.3 Check Git State\n\n ```bash\n git log --oneline -10\n git status\n ```\n\n ---\n\n ## Phase 1: SELECT — Pick Next Task\n\n From the plan file, identify tasks by `### Task N:` headers.\n Cross-reference with commits from previous iterations and progress tracking.\n\n **If ALL tasks are complete** → Skip to Phase 5 (Completion).\n\n ### Announce Selection\n\n ```\n -- Task Selected ------------------------------------------------\n Task: {N} — {task title}\n Action: {CREATE / UPDATE}\n File: {file path}\n -----------------------------------------------------------------\n ```\n\n ---\n\n ## Phase 2: IMPLEMENT — Execute the Task\n\n 1. Read the file you're about to change (if it exists)\n 2. Read the pattern file referenced in the plan\n 3. Make changes following the plan EXACTLY\n 4. Type-check after each file: `bun run type-check 2>&1 || true`\n\n ---\n\n ## Phase 3: VALIDATE — Verify the Task\n\n ```bash\n bun run type-check && bun run lint && bun run test && bun run format:check\n ```\n\n If validation fails: fix, re-run (up to 3 attempts). If unfixable, note in progress\n tracking and do NOT commit broken code.\n\n ---\n\n ## Phase 4: COMMIT — Save Changes\n\n ```bash\n git add -A\n git diff --cached --stat\n git commit -m \"$(cat <<'EOF'\n {type}: {task description}\n\n PIV Task {N}: {brief details}\n EOF\n )\"\n ```\n\n Track progress in `$ARTIFACTS_DIR/progress.txt`:\n ```\n ## Task {N}: {title} — COMPLETED\n Date: {ISO date}\n Files: {list}\n Commit: {short hash}\n ---\n ```\n\n ---\n\n ## Phase 5: COMPLETE — Check All Tasks\n\n If ALL tasks are done:\n 1. Run full validation: `bun run validate 2>&1`\n 2. Push: `git push -u origin HEAD`\n 3. Signal: `COMPLETE`\n\n If tasks remain, report status and end normally. The loop engine starts a fresh iteration.\n until: COMPLETE\n max_iterations: 15\n fresh_context: true\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: VALIDATE — Automated code review\n # Review all changes against the plan\n # ═══════════════════════════════════════════════════════════════\n\n - id: code-review\n model: sonnet\n depends_on: [implement]\n context: fresh\n prompt: |\n # PIV Loop — Automated Code Review\n\n The implementation phase is complete. Review ALL changes against the plan.\n\n **Implementation output**: $implement.output\n\n ---\n\n ## Step 1: Read the Plan\n\n Read `$ARTIFACTS_DIR/plan.md` to understand the intended implementation.\n\n ## Step 2: Review All Changes\n\n ```bash\n git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD\n git diff $BASE_BRANCH..HEAD --stat\n git diff $BASE_BRANCH..HEAD\n ```\n\n ## Step 3: Check Against Plan\n\n For EACH task: was it implemented correctly? Do success criteria hold?\n For EACH file: check quality, security, patterns, CLAUDE.md compliance.\n\n ## Step 4: Run Validation\n\n ```bash\n bun run validate 2>&1 || (bun run type-check && bun run lint && bun run test && bun run format:check)\n ```\n\n ## Step 5: Fix Obvious Issues\n\n Fix type errors, lint warnings, missing imports, formatting. Commit any fixes:\n ```bash\n git add -A && git commit -m \"fix: address code review findings\" || true\n ```\n\n ## Step 6: Present Review\n\n ```\n ## Code Review Complete\n\n ### Implementation Status\n | Task | Status | Notes |\n |------|--------|-------|\n | {task} | DONE / PARTIAL / MISSING | {notes} |\n\n ### Validation Results\n - Type-check: PASS / FAIL\n - Lint: PASS / FAIL\n - Tests: PASS / FAIL\n - Format: PASS / FAIL\n\n ### Code Quality Findings\n {Issues found, or \"No issues found.\"}\n\n ### Recommendation\n {READY FOR REVIEW / NEEDS FIXES}\n ```\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4b: VALIDATE — Iterative human feedback & fixes\n # The user tests the implementation and provides feedback.\n # Loops until the user approves.\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-feedback\n depends_on: [code-review]\n loop:\n prompt: |\n # PIV Loop — Address Validation Feedback\n\n The human has reviewed the implementation and provided feedback.\n\n **Human's feedback**: $LOOP_USER_INPUT\n\n ---\n\n ## Step 1: Read Context\n\n Read `$ARTIFACTS_DIR/plan.md` and CLAUDE.md for conventions.\n\n ## Step 2: Process Feedback\n\n **If there is no user feedback yet** (first iteration, $LOOP_USER_INPUT is empty):\n - Present the code review results and ask the user to test the implementation\n - Do NOT emit the completion signal on the first iteration\n\n **If the user EXPLICITLY approved** (said \"approved\", \"looks good\", \"ship it\", etc.):\n - Output: \"Implementation approved!\"\n - Signal: VALIDATED\n\n **CRITICAL**: NEVER emit VALIDATED unless the user's latest\n message EXPLICITLY says \"approved\", \"looks good\", \"ship it\", or similar approval.\n\n **If the user provided specific feedback:**\n 1. Read the relevant files\n 2. Understand each issue\n 3. Make the fixes\n 4. Type-check after each change\n\n ## Step 3: Full Validation\n\n ```bash\n bun run validate 2>&1 || (bun run type-check && bun run lint && bun run test && bun run format:check)\n ```\n\n ## Step 4: Commit Fixes\n\n ```bash\n git add -A\n git commit -m \"$(cat <<'EOF'\n fix: address review feedback\n\n Changes:\n - {fix 1}\n - {fix 2}\n EOF\n )\"\n ```\n\n ## Step 5: Report\n\n ```\n ## Feedback Addressed\n\n Changes made:\n - {fix 1}\n - {fix 2}\n\n Validation: {PASS / FAIL with details}\n\n Review again, or say \"approved\" to finalize.\n ```\n until: VALIDATED\n max_iterations: 10\n interactive: true\n gate_message: |\n Test the implementation yourself and review the code changes.\n Provide specific feedback on what needs fixing, or say \"approved\" to finalize.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: FINALIZE — Push, create PR, generate summary\n # ═══════════════════════════════════════════════════════════════\n\n - id: finalize\n model: sonnet\n depends_on: [fix-feedback]\n context: fresh\n prompt: |\n # PIV Loop — Finalize\n\n The implementation has been approved. Push changes and create a PR.\n\n ---\n\n ## Step 1: Push Changes\n\n ```bash\n git push -u origin HEAD 2>&1 || echo \"WARNING: Push failed — verify remote authentication and branch state before creating the PR.\"\n ```\n\n ## Step 2: Generate Summary\n\n ```bash\n git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD\n git diff --stat $(git merge-base HEAD $BASE_BRANCH)..HEAD\n ```\n\n Read `$ARTIFACTS_DIR/plan.md` and `$ARTIFACTS_DIR/progress.txt` for context.\n\n ## Step 3: Create PR (if not already created)\n\n ```bash\n gh pr view HEAD --json url 2>/dev/null || echo \"NO_PR\"\n ```\n\n If no PR exists:\n\n ```bash\n cat .github/pull_request_template.md 2>/dev/null || echo \"NO_TEMPLATE\"\n ```\n\n Create with `gh pr create --draft --base $BASE_BRANCH`:\n - Title from the plan's feature name\n - Body summarizing the implementation\n - Use a HEREDOC for the body\n\n ## Step 4: Output Summary\n\n ```\n ===============================================================\n PIV LOOP — COMPLETE\n ===============================================================\n\n Feature: {from plan}\n Plan: {plan file path}\n Branch: {branch name}\n PR: {url}\n\n -- Tasks Completed -----------------------------------------------\n {list from progress tracking}\n\n -- Commits -------------------------------------------------------\n {git log output}\n\n -- Files Changed -------------------------------------------------\n {git diff --stat output}\n\n -- Validation ----------------------------------------------------\n All checks passed.\n ===============================================================\n ```\n", + "archon-plan-to-pr": "name: archon-plan-to-pr\ndescription: |\n Use when: You have an existing implementation plan and want to execute it end-to-end.\n Input: Path to a plan file ($ARTIFACTS_DIR/plan.md or .agents/plans/*.md)\n Output: PR ready for merge with comprehensive review completed\n\n Full workflow:\n 1. Read plan, setup branch, extract scope limits\n 2. Verify plan research is still valid\n 3. Implement all tasks with type-checking\n 4. Run full validation suite\n 5. Create PR with template, mark ready\n 6. Comprehensive code review (5 parallel agents with scope limit awareness)\n 7. Synthesize and fix review findings\n 8. Final summary with decision matrix -> GitHub comment + follow-up recommendations\n\n NOT for: Creating plans from scratch (use archon-idea-to-pr), quick fixes, standalone reviews.\n\nnodes:\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 1: SETUP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: plan-setup\n command: archon-plan-setup\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 2: CONFIRM PLAN\n # ═══════════════════════════════════════════════════════════════════\n\n - id: confirm-plan\n command: archon-confirm-plan\n depends_on: [plan-setup]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 3: IMPLEMENT\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-tasks\n command: archon-implement-tasks\n depends_on: [confirm-plan]\n context: fresh\n model: opus[1m]\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 4: VALIDATE\n # ═══════════════════════════════════════════════════════════════════\n\n - id: validate\n command: archon-validate\n depends_on: [implement-tasks]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 5: FINALIZE PR\n # ═══════════════════════════════════════════════════════════════════\n\n - id: finalize-pr\n command: archon-finalize-pr\n depends_on: [validate]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 6: CODE REVIEW\n # ═══════════════════════════════════════════════════════════════════\n\n - id: review-scope\n command: archon-pr-review-scope\n depends_on: [finalize-pr]\n context: fresh\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [review-scope]\n context: fresh\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [sync]\n context: fresh\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [sync]\n context: fresh\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [sync]\n context: fresh\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [sync]\n context: fresh\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [sync]\n context: fresh\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 7: FIX REVIEW ISSUES\n # ═══════════════════════════════════════════════════════════════════\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════════\n # PHASE 8: FINAL SUMMARY & FOLLOW-UP\n # ═══════════════════════════════════════════════════════════════════\n\n - id: workflow-summary\n command: archon-workflow-summary\n depends_on: [implement-fixes]\n context: fresh\n", + "archon-ralph-dag": "name: archon-ralph-dag\ndescription: |\n Use when: User wants to run a Ralph implementation loop.\n Triggers: \"ralph\", \"run ralph\", \"ralph dag\", \"run ralph dag\".\n\n DAG workflow that:\n 1. Detects input: existing prd.json, existing prd.md (needs stories), or raw idea\n 2. Generates prd.md + prd.json if needed (explores codebase, breaks into stories)\n 3. Validates PRD files, reads project context, installs dependencies\n 4. Runs Ralph loop (fresh context per iteration) implementing one story per iteration\n 5. Creates PR and reports completion\n\n Accepts: An idea description, a path to an existing prd.md, or a directory with prd.md + prd.json\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # NODE 1: DETECT INPUT\n # Determines what the user provided: full PRD, partial PRD, or idea\n # ═══════════════════════════════════════════════════════════════\n\n - id: detect-input\n model: haiku\n prompt: |\n # Detect Ralph Input\n\n **User input**: $ARGUMENTS\n\n Determine what the user provided and prepare the PRD directory. Follow these steps exactly:\n\n ## Step 1: Detect worktree\n\n Run `git worktree list --porcelain` to check if you're in a worktree.\n If you see multiple entries, you ARE in a worktree. The first entry (the one without \"branch\" pointing to your current branch) is the **main repo root**. Save it — you'll need it to find files.\n\n ## Step 2: Classify the input\n\n Look at the user input above. It's one of three things:\n\n **Case A — Ralph directory path** (contains `.archon/ralph/`):\n Extract the directory. Check if both `prd.json` and `prd.md` exist there (try locally first, then in the main repo root if in a worktree).\n\n **Case B — File path** (ends in `.md`):\n This is an external PRD file. Find it:\n 1. Try the path as-is (relative to cwd)\n 2. Try it as an absolute path\n 3. If in a worktree, try it relative to the **main repo root** from Step 1\n Once found, read the file to confirm it's a PRD.\n\n **Case C — Free text**:\n Not a file path — it's a feature idea.\n\n ## Step 3: Auto-discover existing ralph PRDs\n\n If the input didn't point to a specific path, check if `.archon/ralph/` contains any `prd.json` files:\n ```bash\n find .archon/ralph -name \"prd.json\" -type f 2>/dev/null\n ```\n\n ## Step 4: Take action based on classification\n\n **If Case A and both files exist** → output `ready` (no further action needed)\n\n **If Case B (external PRD found)**:\n 1. Derive a kebab-case slug from the PRD filename or title (e.g., `workflow-lifecycle-overhaul`)\n 2. Create the ralph directory: `mkdir -p .archon/ralph/{slug}`\n 3. Copy the PRD content to `.archon/ralph/{slug}/prd.md`\n 4. Output `external_prd` with the new prd_dir\n\n **If Case C or auto-discovered ralph dir has prd.md but no prd.json** → output `needs_generation`\n\n ## Output\n\n Your final output MUST be exactly one JSON object:\n ```json\n {\"input_type\": \"ready|external_prd|needs_generation\", \"prd_dir\": \".archon/ralph/{slug}\"}\n ```\n output_format:\n type: object\n properties:\n input_type:\n type: string\n enum: [ready, external_prd, needs_generation]\n prd_dir:\n type: string\n required: [input_type, prd_dir]\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 2: GENERATE PRD\n # Scenario 1: User has an idea → generate prd.md + prd.json\n # Scenario 2: User has prd.md → generate prd.json with stories\n # Skipped if prd.json already exists\n # ═══════════════════════════════════════════════════════════════\n\n - id: generate-prd\n depends_on: [detect-input]\n when: \"$detect-input.output.input_type != 'ready'\"\n command: archon-ralph-generate\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 3: VALIDATE & SETUP\n # Finds PRD directory, reads all state files, installs deps,\n # verifies the environment is ready for implementation.\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate-prd\n depends_on: [detect-input, generate-prd]\n trigger_rule: one_success\n bash: |\n set -e\n\n # ── 1. Find PRD directory (passed from detect-input) ──────\n PRD_DIR=$detect-input.output.prd_dir\n\n # If detect-input didn't know the PRD dir (generated from scratch), discover it\n if [ -z \"$PRD_DIR\" ] || [ ! -f \"$PRD_DIR/prd.json\" ]; then\n FOUND=$(find .archon/ralph -name \"prd.json\" -type f 2>/dev/null | head -1)\n if [ -n \"$FOUND\" ]; then\n PRD_DIR=$(dirname \"$FOUND\")\n fi\n fi\n\n if [ -z \"$PRD_DIR\" ] || [ ! -f \"$PRD_DIR/prd.json\" ]; then\n echo \"ERROR: No prd.json found after generation step.\"\n echo \"Check the generate-prd node output for errors.\"\n exit 1\n fi\n\n if [ ! -f \"$PRD_DIR/prd.md\" ]; then\n echo \"ERROR: prd.md not found in $PRD_DIR\"\n exit 1\n fi\n\n # ── 2. Install dependencies (worktrees lack node_modules) ──\n if [ -f \"bun.lock\" ] || [ -f \"bun.lockb\" ]; then\n echo \"Installing dependencies (bun)...\"\n bun install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"package-lock.json\" ]; then\n echo \"Installing dependencies (npm)...\"\n npm ci 2>&1 | tail -3\n elif [ -f \"yarn.lock\" ]; then\n echo \"Installing dependencies (yarn)...\"\n yarn install --frozen-lockfile 2>&1 | tail -3\n elif [ -f \"pnpm-lock.yaml\" ]; then\n echo \"Installing dependencies (pnpm)...\"\n pnpm install --frozen-lockfile 2>&1 | tail -3\n fi\n\n # ── 3. Git state ──────────────────────────────────────────\n echo \"BRANCH=$(git branch --show-current)\"\n echo \"GIT_ROOT=$(git rev-parse --show-toplevel)\"\n\n # ── 4. Output PRD context ─────────────────────────────────\n echo \"PRD_DIR=$PRD_DIR\"\n echo \"=== PRD_JSON_START ===\"\n cat \"$PRD_DIR/prd.json\"\n echo \"\"\n echo \"=== PRD_JSON_END ===\"\n echo \"=== PRD_MD_START ===\"\n cat \"$PRD_DIR/prd.md\"\n echo \"\"\n echo \"=== PRD_MD_END ===\"\n echo \"=== PROGRESS_START ===\"\n if [ -f \"$PRD_DIR/progress.txt\" ]; then\n cat \"$PRD_DIR/progress.txt\"\n else\n echo \"(no progress yet)\"\n fi\n echo \"\"\n echo \"=== PROGRESS_END ===\"\n\n # ── 5. Summary ────────────────────────────────────────────\n TOTAL=$(grep -c '\"passes\"' \"$PRD_DIR/prd.json\" || true)\n DONE=$(grep -c '\"passes\": true' \"$PRD_DIR/prd.json\" || true)\n TOTAL=${TOTAL:-0}\n DONE=${DONE:-0}\n echo \"STORIES_TOTAL=$TOTAL\"\n echo \"STORIES_DONE=$DONE\"\n echo \"STORIES_REMAINING=$(( TOTAL - DONE ))\"\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 4: RALPH IMPLEMENTATION LOOP\n # Fresh context each iteration. Reads PRD state from disk.\n # One story per iteration. Validates before committing.\n # ═══════════════════════════════════════════════════════════════\n\n - id: implement\n depends_on: [validate-prd]\n idle_timeout: 600000\n model: opus[1m]\n loop:\n prompt: |\n # Ralph Agent — Autonomous Story Implementation\n\n You are an autonomous coding agent in a FRESH session — you have no memory of previous iterations.\n Your job: Read state from disk, implement ONE story, validate, commit, update tracking, exit.\n\n **Golden Rule**: If validation fails, fix it before committing. Never commit broken code. Never skip validation.\n\n ---\n\n ## Phase 0: CONTEXT — Load Project State\n\n The upstream setup node produced this context:\n\n $validate-prd.output\n\n **User message**: $USER_MESSAGE\n\n ---\n\n ### 0.1 Parse PRD Directory\n\n Extract the `PRD_DIR=...` line from the context above. This is the directory containing your PRD files.\n Store this path — use it for ALL file operations below.\n\n ### 0.2 Read Current State (from disk, not from context above)\n\n The context above is a snapshot from before the loop started. Previous iterations may have changed files.\n **You MUST re-read from disk to get the current state:**\n\n 1. **Read `{prd-dir}/progress.txt`** — your only link to previous iterations\n - Check the `## Codebase Patterns` section FIRST for learnings from prior iterations\n - Check recent entries for gotchas to avoid\n 2. **Read `{prd-dir}/prd.json`** — the source of truth for story completion state\n 3. **Read `{prd-dir}/prd.md`** — full requirements, technical patterns, acceptance criteria\n\n ### 0.3 Read Project Rules\n\n ```bash\n cat CLAUDE.md\n ```\n\n Note all coding standards, patterns, and rules. Follow them exactly.\n\n **PHASE_0_CHECKPOINT:**\n - [ ] PRD directory identified\n - [ ] progress.txt read (or noted as absent)\n - [ ] prd.json read — know which stories pass/fail\n - [ ] prd.md read — understand requirements\n - [ ] CLAUDE.md rules noted\n\n ---\n\n ## Phase 1: SELECT — Pick Next Story\n\n ### 1.1 Find Eligible Story\n\n From `prd.json`, find the **highest priority** story where:\n - `passes` is `false`\n - ALL stories in `dependsOn` have `passes: true`\n\n **If ALL stories have `passes: true`** → Skip to Phase 6 (Completion).\n\n **If no eligible stories exist** (all remaining are blocked):\n ```\n BLOCKED: No eligible stories. Remaining stories and their blockers:\n - {story-id}: blocked by {dep-id} (passes: false)\n ```\n End normally. The loop will terminate on max_iterations.\n\n ### 1.2 Announce Selection\n\n ```\n ── Story Selected ──────────────────────────────────\n ID: {story-id}\n Title: {story-title}\n Priority: {priority}\n Dependencies: {deps or \"none\"}\n\n Acceptance Criteria:\n - {criterion 1}\n - {criterion 2}\n - ...\n ────────────────────────────────────────────────────\n ```\n\n After announcing the selected story, emit the story started event:\n ```bash\n bun run cli workflow event emit --run-id $WORKFLOW_ID --type ralph_story_started --data '{\"story_id\":\"{story-id}\",\"title\":\"{story-title}\"}' || true\n ```\n\n **PHASE_1_CHECKPOINT:**\n - [ ] Eligible story found (or all complete / all blocked)\n - [ ] Acceptance criteria understood\n - [ ] Dependencies verified as complete\n\n ---\n\n ## Phase 2: IMPLEMENT — Code the Story\n\n ### 2.1 Explore Before Coding\n\n Before writing any code:\n 1. Read all files you plan to modify — understand current state\n 2. Check `## Codebase Patterns` in progress.txt for discovered patterns\n 3. Look for similar implementations in the codebase to mirror\n 4. Read the `technicalNotes` field from the story in prd.json\n\n ### 2.2 Implementation Rules\n\n **DO:**\n - Implement ONLY the selected story — one story per iteration\n - Follow existing code patterns exactly (naming, structure, imports, error handling)\n - Match the project's coding standards from CLAUDE.md\n - Write or update tests as required by acceptance criteria\n - Keep changes minimal and focused\n\n **DON'T:**\n - Refactor unrelated code\n - Add improvements not in the acceptance criteria\n - Change formatting of lines you didn't modify\n - Install new dependencies without justification from prd.md\n - Touch files unrelated to this story\n - Over-engineer — do the simplest thing that satisfies the criteria\n\n ### 2.3 Verify Types After Each File\n\n After modifying each file, run:\n ```bash\n bun run type-check\n ```\n\n **If types fail:**\n 1. Read the error carefully\n 2. Fix the type issue in your code\n 3. Re-run type-check\n 4. Do NOT proceed to the next file until types pass\n\n **PHASE_2_CHECKPOINT:**\n - [ ] Only the selected story was implemented\n - [ ] Types compile after each file change\n - [ ] Tests written/updated as needed\n - [ ] No unrelated changes\n\n ---\n\n ## Phase 3: VALIDATE — Full Verification\n\n ### 3.1 Static Analysis\n\n ```bash\n bun run type-check && bun run lint\n ```\n\n **Must pass with zero errors and zero warnings.**\n\n **If lint fails:**\n 1. Run `bun run lint:fix` for auto-fixable issues\n 2. Manually fix remaining issues\n 3. Re-run lint\n 4. Proceed only when clean\n\n ### 3.2 Tests\n\n ```bash\n bun run test\n ```\n\n **All tests must pass.**\n\n **If tests fail:**\n 1. Read the failure output\n 2. Determine: bug in your implementation or pre-existing failure?\n 3. If your bug → fix the implementation (not the test)\n 4. If pre-existing → note it but don't fix unrelated tests\n 5. Re-run tests\n 6. Repeat until green\n\n ### 3.3 Format Check\n\n ```bash\n bun run format:check\n ```\n\n **If formatting fails:**\n ```bash\n bun run format\n ```\n\n ### 3.4 Verify Acceptance Criteria\n\n Go through EACH acceptance criterion from the story:\n - Is it satisfied by your implementation?\n - Can you verify it (read the code, run a command, check a file)?\n\n If a criterion is NOT met, go back to Phase 2 and fix it.\n\n **PHASE_3_CHECKPOINT:**\n - [ ] Type-check passes\n - [ ] Lint passes (0 errors, 0 warnings)\n - [ ] All tests pass\n - [ ] Format is clean\n - [ ] Every acceptance criterion verified\n\n ---\n\n ## Phase 4: COMMIT — Save Changes\n\n ### 4.1 Review Staged Changes\n\n ```bash\n git add -A\n git status\n git diff --cached --stat\n ```\n\n Verify only expected files are staged. If unexpected files appear, investigate before committing.\n\n ### 4.2 Write Commit Message\n\n ```bash\n git commit -m \"$(cat <<'EOF'\n feat: {story-title}\n\n Implements {story-id} from PRD.\n\n Changes:\n - {change 1}\n - {change 2}\n - {change 3}\n EOF\n )\"\n ```\n\n **Commit message rules:**\n - Prefix: `feat:` for features, `fix:` for bugs, `refactor:` for refactors\n - Title: the story title (not the PRD name)\n - Body: list the actual changes made\n - Do NOT include AI attribution\n\n **PHASE_4_CHECKPOINT:**\n - [ ] Only expected files committed\n - [ ] Commit message is clear and accurate\n - [ ] Working directory is clean after commit\n\n ---\n\n ## Phase 5: TRACK — Update Progress Files\n\n ### 5.1 Update prd.json\n\n Set `passes: true` and add a note for the completed story:\n\n ```json\n {\n \"id\": \"{story-id}\",\n \"passes\": true,\n \"notes\": \"Implemented in iteration {N}. Files: {list}.\"\n }\n ```\n\n After updating prd.json, emit the story completed event:\n ```bash\n bun run cli workflow event emit --run-id $WORKFLOW_ID --type ralph_story_completed --data '{\"story_id\":\"{story-id}\",\"title\":\"{story-title}\"}' || true\n ```\n\n ### 5.2 Update progress.txt\n\n **Append** to `{prd-dir}/progress.txt`:\n\n ```\n ## {ISO Date} — {story-id}: {story-title}\n\n **Status**: PASSED\n **Files changed**:\n - {file1} — {what changed}\n - {file2} — {what changed}\n\n **Acceptance criteria verified**:\n - [x] {criterion 1}\n - [x] {criterion 2}\n\n **Learnings**:\n - {Any pattern discovered}\n - {Any gotcha encountered}\n - {Any deviation from expected approach}\n\n ---\n ```\n\n ### 5.3 Update Codebase Patterns (if applicable)\n\n If you discovered a **reusable pattern** that future iterations should know about, **prepend** it to the `## Codebase Patterns` section at the TOP of progress.txt.\n\n Format:\n ```\n ## Codebase Patterns\n\n ### {Pattern Name}\n - **Where**: `{file:lines}`\n - **Pattern**: {description}\n - **Example**: `{code snippet}`\n ```\n\n If the `## Codebase Patterns` section doesn't exist yet, create it at the top of the file.\n\n **PHASE_5_CHECKPOINT:**\n - [ ] prd.json updated with `passes: true`\n - [ ] progress.txt appended with iteration details\n - [ ] Codebase patterns updated (if applicable)\n\n ---\n\n ## Phase 6: COMPLETE — Check All Stories\n\n ### 6.1 Re-read prd.json\n\n ```bash\n cat {prd-dir}/prd.json\n ```\n\n Count stories where `passes: false`.\n\n ### 6.2 If ALL Stories Pass\n\n 1. **Push the branch:**\n ```bash\n git push -u origin HEAD\n ```\n\n 2. **Read the PR template:**\n Look for a PR template in the repo — check `.github/pull_request_template.md`, `.github/PULL_REQUEST_TEMPLATE.md`, and `docs/pull_request_template.md`. Read whichever one exists.\n\n If a template was found, fill in **every section** using the context from this implementation. Don't skip sections or leave placeholders — fill them honestly based on the actual changes (summary, architecture, validation evidence, security, compatibility, rollback, etc.).\n\n If no template was found, write a summary with: problem, what changed, stories table, and validation evidence.\n\n 3. **Create a draft PR** using `gh pr create --draft --base $BASE_BRANCH --title \"feat: {PRD feature name}\"` with the filled-in template as the body. Use a HEREDOC for the body.\n\n 4. **Output completion signal:**\n ```\n COMPLETE\n ```\n\n ### 6.3 If Stories Remain\n\n Report status and end normally:\n ```\n ── Iteration Complete ──────────────────────────────\n Story completed: {story-id} — {story-title}\n Stories remaining: {count}\n Next eligible: {next-story-id} — {next-story-title}\n ────────────────────────────────────────────────────\n ```\n\n The loop engine will start the next iteration with a fresh context.\n\n ---\n\n ## Handling Edge Cases\n\n ### Validation fails repeatedly\n - If type-check or tests fail 3+ times on the same error, step back\n - Re-read the acceptance criteria — you may be misunderstanding the requirement\n - Check if the story is too large (needs breaking down)\n - Note the blocker in progress.txt and end the iteration\n\n ### Story is too large for one iteration\n - Implement the minimum viable subset that satisfies the most critical acceptance criteria\n - Set `passes: true` only if ALL criteria are met\n - If you can't meet all criteria, leave `passes: false` and note what's done in progress.txt\n - The next iteration will pick it up and continue\n\n ### Pre-existing test failures\n - If tests were failing BEFORE your changes, note them but don't fix unrelated code\n - Run only the test files related to your changes if the full suite has pre-existing issues\n - Document pre-existing failures in progress.txt\n\n ### Dependency install fails\n - Check if `bun.lock` or equivalent exists\n - Try `bun install` without `--frozen-lockfile`\n - Note the issue in progress.txt\n\n ### Git state is dirty at iteration start\n - This shouldn't happen (fresh worktree), but if it does:\n - Run `git status` to understand what's dirty\n - If it's leftover from a failed previous iteration, commit or stash\n - Never discard changes silently\n\n ### Blocked stories — all remaining have unmet dependencies\n - Report the dependency chain in your output\n - Check if a dependency was incorrectly left as `passes: false`\n - If a dependency should be `passes: true` (the code exists and works), fix prd.json\n - Otherwise, end the iteration — the loop will exhaust max_iterations\n\n ---\n\n ## File Format Reference\n\n ### prd.json Schema\n\n ```json\n {\n \"feature\": \"Feature Name\",\n \"issueNumber\": 123,\n \"userStories\": [\n {\n \"id\": \"US-001\",\n \"title\": \"Short title\",\n \"description\": \"As a..., I want..., so that...\",\n \"acceptanceCriteria\": [\"criterion 1\", \"criterion 2\"],\n \"technicalNotes\": \"Implementation hints\",\n \"dependsOn\": [\"US-000\"],\n \"priority\": 1,\n \"passes\": false,\n \"notes\": \"\"\n }\n ]\n }\n ```\n\n ### progress.txt Format\n\n ```\n ## Codebase Patterns\n\n ### {Pattern Name}\n - Where: `file:lines`\n - Pattern: description\n - Example: `code`\n\n ---\n\n ## {Date} — {story-id}: {title}\n\n **Status**: PASSED\n **Files changed**: ...\n **Acceptance criteria verified**: ...\n **Learnings**: ...\n\n ---\n ```\n\n ---\n\n ## Success Criteria\n\n - **ONE_STORY**: Exactly one story implemented per iteration\n - **VALIDATED**: Type-check + lint + tests + format all pass before commit\n - **COMMITTED**: Changes committed with clear message\n - **TRACKED**: prd.json and progress.txt updated accurately\n - **PATTERNS_SHARED**: Discovered patterns added to progress.txt for future iterations\n - **NO_SCOPE_CREEP**: No unrelated changes, no refactoring, no \"improvements\"\n until: COMPLETE\n max_iterations: 15\n fresh_context: true\n\n # ═══════════════════════════════════════════════════════════════\n # NODE 5: COMPLETION REPORT\n # Reads final state and produces a summary.\n # ═══════════════════════════════════════════════════════════════\n\n - id: report\n depends_on: [implement]\n prompt: |\n # Completion Report\n\n The Ralph implementation loop has finished. Generate a completion report.\n\n ## Context\n\n **Loop output (last iteration):**\n\n $implement.output\n\n **Setup context:**\n\n $validate-prd.output\n\n ---\n\n ## Instructions\n\n ### 1. Read Final State\n\n Extract the `PRD_DIR=...` from the setup context above.\n Read the CURRENT files from disk:\n\n ```bash\n cat {prd-dir}/prd.json\n cat {prd-dir}/progress.txt\n ```\n\n ### 2. Gather Git Info\n\n ```bash\n git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD\n git diff --stat $(git merge-base HEAD $BASE_BRANCH)..HEAD\n ```\n\n ### 3. Check PR Status\n\n ```bash\n gh pr view HEAD --json url,number,state 2>/dev/null || echo \"No PR found\"\n ```\n\n ### 4. Generate Report\n\n Output this format:\n\n ```\n ═══════════════════════════════════════════════════════\n RALPH DAG — COMPLETION REPORT\n ═══════════════════════════════════════════════════════\n\n Feature: {feature name from prd.json}\n PRD: {prd-dir}\n Branch: {branch name}\n PR: {url or \"not created\"}\n\n ── Stories ─────────────────────────────────────────\n\n | ID | Title | Status |\n |----|-------|--------|\n {for each story from prd.json}\n\n Total: {N}/{M} stories passing\n\n ── Commits ─────────────────────────────────────────\n\n {git log output}\n\n ── Files Changed ─────────────────────────────────\n\n {git diff --stat output}\n\n ── Patterns Discovered ─────────────────────────────\n\n {from ## Codebase Patterns in progress.txt, or \"None\"}\n\n ═══════════════════════════════════════════════════════\n ```\n\n Keep it factual. No commentary — just the data.\n", + "archon-refactor-safely": "name: archon-refactor-safely\ndescription: |\n Use when: User wants to refactor code safely with continuous validation and behavior preservation.\n Triggers: \"refactor\", \"refactor safely\", \"split this file\", \"extract module\", \"break up\",\n \"decompose\", \"safe refactor\", \"split file\", \"extract into modules\".\n Does: Scans refactoring scope -> analyzes impact (read-only) -> plans ordered task list ->\n executes with type-check hooks after every edit -> validates full suite ->\n verifies behavior preservation (read-only) -> creates PR with before/after comparison.\n NOT for: Bug fixes (use archon-fix-github-issue), feature development (use archon-feature-development),\n general architecture sweeps (use archon-architect), PR reviews.\n\n Key safety features:\n - Analysis and verification nodes are read-only (denied_tools: [Write, Edit, Bash])\n - PreToolUse hooks check if each edit is in the plan\n - PostToolUse hooks force type-check after every file change\n - Behavior verification confirms no logic changes after refactoring\n\nprovider: claude\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: SCAN — Find files matching the refactoring target\n # ═══════════════════════════════════════════════════════════════\n\n - id: scan-scope\n bash: |\n echo \"=== REFACTORING TARGET ===\"\n echo \"User request: $ARGUMENTS\"\n echo \"\"\n\n echo \"=== FILE SIZE ANALYSIS (source files by size) ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts' \\\n -exec wc -l {} + 2>/dev/null | sort -rn | head -30\n echo \"\"\n\n echo \"=== FILES OVER 500 LINES ===\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts' \\\n -exec sh -c 'lines=$(wc -l < \"$1\"); if [ \"$lines\" -gt 500 ]; then echo \"$lines $1\"; fi' _ {} \\; 2>/dev/null | sort -rn\n echo \"\"\n\n echo \"=== FUNCTION COUNT PER FILE (top 20) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts'); do\n count=$(grep -cE '^\\s*(export\\s+)?(async\\s+)?function\\s|=>\\s*\\{' \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count functions: $f\"\n fi\n done | sort -rn | head -20\n echo \"\"\n\n echo \"=== EXPORT ANALYSIS (files with many exports) ===\"\n for f in $(find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts'); do\n count=$(grep -c \"^export \" \"$f\" 2>/dev/null) || count=0\n if [ \"$count\" -gt 5 ]; then\n echo \"$count exports: $f\"\n fi\n done | sort -rn | head -20\n timeout: 60000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: ANALYZE IMPACT — Read-only deep analysis\n # Maps call sites, identifies risk areas, understands dependencies\n # ═══════════════════════════════════════════════════════════════\n\n - id: analyze-impact\n prompt: |\n You are a senior software engineer analyzing code for a safe refactoring.\n\n ## Refactoring Request\n\n $ARGUMENTS\n\n ## Codebase Scan Results\n\n $scan-scope.output\n\n ## Instructions\n\n 1. Identify the PRIMARY file(s) targeted for refactoring based on the user's request\n and the scan results above\n 2. Read each target file thoroughly — understand every function, type, and export\n 3. For each target file, map ALL call sites:\n - Use Grep to find every import of the target file across the codebase\n - Track which specific exports are used and where\n - Note any dynamic imports or re-exports through index files\n 4. Identify risk areas:\n - Functions with complex internal dependencies (shared closures, module-level state)\n - Circular dependencies between functions in the file\n - Any module-level side effects (top-level `const`, initialization code)\n - Exports that are part of the public API vs internal-only\n 5. Check for existing tests:\n - Find test files for the target module(s)\n - Note what's tested and what isn't\n\n ## Output\n\n Write a thorough impact analysis to `$ARTIFACTS_DIR/impact-analysis.md` with:\n\n ### Target Files\n - File path, line count, function count\n - List of all exported symbols with brief descriptions\n\n ### Dependency Map\n - Which files import from the target (with specific imports used)\n - Which files the target imports from\n\n ### Risk Assessment\n - Module-level state or side effects\n - Complex internal dependencies between functions\n - Public API surface that must be preserved exactly\n\n ### Test Coverage\n - Existing test files and what they cover\n - Critical paths that must remain tested\n\n ### Recommended Decomposition Strategy\n - Suggested module boundaries (which functions group together)\n - Rationale for each grouping (cohesion, shared dependencies)\n depends_on: [scan-scope]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: PLAN REFACTOR — Ordered task list with rollback strategy\n # Read-only: produces the plan, does not execute it\n # ═══════════════════════════════════════════════════════════════\n\n - id: plan-refactor\n prompt: |\n You are planning a safe refactoring. You must produce a precise, ordered plan\n that another agent will follow literally.\n\n ## Impact Analysis\n\n $analyze-impact.output\n\n ## Refactoring Goal\n\n $ARGUMENTS\n\n ## Principles\n\n - **Behavior preservation**: The refactoring must NOT change any behavior — only structure\n - **Incremental**: Each step must leave the codebase in a compilable state\n - **Reversible**: Each step can be independently reverted\n - **No mixed concerns**: Do not combine refactoring with bug fixes or improvements\n - **Preserve public API**: All existing exports must remain accessible from the same import paths\n - **Maximum file size**: Target 500 lines or fewer per file after refactoring\n\n ## Instructions\n\n 1. Read the impact analysis from `$ARTIFACTS_DIR/impact-analysis.md`\n 2. Read the target file(s) to understand the current structure\n 3. Design the decomposition:\n - Group related functions into cohesive modules\n - Identify shared utilities, types, and constants\n - Plan the new file structure with descriptive names\n 4. Write an ordered task list where each task is:\n - Independent and leaves code compilable after completion\n - Specific about what to extract and where\n - Clear about import updates needed\n\n ## Output\n\n Write the plan to `$ARTIFACTS_DIR/refactor-plan.md` with:\n\n ### File Structure (Before)\n ```\n [current structure with line counts]\n ```\n\n ### File Structure (After)\n ```\n [planned structure with estimated line counts]\n ```\n\n ### Ordered Tasks\n\n For each task:\n ```\n ## Task N: [brief description]\n\n **Action**: CREATE | EXTRACT | UPDATE\n **Source**: [source file]\n **Target**: [target file]\n **What moves**:\n - function functionName (lines X-Y)\n - type TypeName (lines X-Y)\n\n **Import updates needed**:\n - [file]: change import from [old] to [new]\n\n **Rollback**: [how to undo this specific step]\n ```\n\n ### Validation Commands\n - Type check: `bun run type-check`\n - Lint: `bun run lint`\n - Tests: `bun run test`\n - Format: `bun run format:check`\n depends_on: [analyze-impact]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: EXECUTE REFACTOR — Implements the plan with guardrails\n # Hooks enforce type-check after every edit and plan adherence\n # ═══════════════════════════════════════════════════════════════\n\n - id: execute-refactor\n model: opus[1m]\n prompt: |\n You are executing a refactoring plan with strict safety guardrails.\n\n ## Plan\n\n Read the full plan from `$ARTIFACTS_DIR/refactor-plan.md` — follow it LITERALLY.\n\n ## Rules\n\n - **Follow the plan exactly** — do not add extra improvements or cleanups\n - **One task at a time** — complete each task fully before starting the next\n - **Type-check after every file change** — you'll be prompted to do this after each edit\n - **Preserve all behavior** — refactoring means moving code, not changing it\n - **Preserve the public API** — if the original file exported something, it must still be\n importable from the same path (use re-exports in the original file if needed)\n - **Update all import sites** — every file that imported from the original must be updated\n - **Commit after each logical task** — one commit per plan task with a clear message\n\n ## Process for Each Task\n\n 1. Read the plan task\n 2. Read the source file to understand current state\n 3. Create the new file (if extracting) with the functions/types being moved\n 4. Update the source file to remove the moved code and add imports from the new file\n 5. Update the original file's exports to re-export from the new module (API preservation)\n 6. Use Grep to find and update ALL import sites across the codebase\n 7. Run `bun run type-check` to verify (you'll be reminded by hooks)\n 8. Commit: `git add -A && git commit -m \"refactor: [task description]\"`\n 9. Move to next task\n\n ## Handling Problems\n\n - If type-check fails after a change: fix it immediately before proceeding\n - If a task is more complex than planned: complete it anyway, note the deviation\n - If you discover the plan missed an import site: update it and note it\n - NEVER skip a task — complete them in order\n depends_on: [plan-refactor]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n Before modifying this file: Is this file in your refactoring plan\n ($ARTIFACTS_DIR/refactor-plan.md)? If it's not a planned target file\n AND not a file that imports from the target, explain why you're touching it.\n Unplanned changes increase risk.\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just modified a file. STOP and do these things NOW before making any\n other changes:\n 1. Run `bun run type-check` to verify the change compiles\n 2. If type-check fails, fix the error immediately\n 3. Verify you preserved the exact same behavior — no logic changes, only structural moves\n Only proceed to the next change after type-check passes.\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Check the exit code. If type-check or any validation failed, fix the issue\n before continuing. Do not accumulate broken state.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 5: VALIDATE — Full test suite (bash, no AI escape hatch)\n # ═══════════════════════════════════════════════════════════════\n\n - id: validate\n bash: |\n echo \"=== TYPE CHECK ===\"\n bun run type-check 2>&1\n TC_EXIT=$?\n\n echo \"\"\n echo \"=== LINT ===\"\n bun run lint 2>&1\n LINT_EXIT=$?\n\n echo \"\"\n echo \"=== FORMAT CHECK ===\"\n bun run format:check 2>&1\n FMT_EXIT=$?\n\n echo \"\"\n echo \"=== TESTS ===\"\n bun run test 2>&1\n TEST_EXIT=$?\n\n echo \"\"\n echo \"=== FILE SIZE CHECK ===\"\n echo \"Files still over 500 lines:\"\n find . -name '*.ts' -not -path '*/node_modules/*' -not -path '*/.git/*' -not -path '*/dist/*' -not -name '*.test.ts' -not -name '*.d.ts' \\\n -exec sh -c 'lines=$(wc -l < \"$1\"); if [ \"$lines\" -gt 500 ]; then echo \"$lines $1\"; fi' _ {} \\; 2>/dev/null | sort -rn\n echo \"\"\n\n echo \"=== RESULTS ===\"\n echo \"Type check: $([ $TC_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Lint: $([ $LINT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Format: $([ $FMT_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n echo \"Tests: $([ $TEST_EXIT -eq 0 ] && echo 'PASS' || echo 'FAIL')\"\n\n if [ $TC_EXIT -eq 0 ] && [ $LINT_EXIT -eq 0 ] && [ $FMT_EXIT -eq 0 ] && [ $TEST_EXIT -eq 0 ]; then\n echo \"VALIDATION_STATUS: PASS\"\n else\n echo \"VALIDATION_STATUS: FAIL\"\n fi\n depends_on: [execute-refactor]\n timeout: 300000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 6: FIX VALIDATION FAILURES (if any)\n # Only does real work if validation failed\n # ═══════════════════════════════════════════════════════════════\n\n - id: fix-failures\n prompt: |\n Review the validation output below.\n\n ## Validation Output\n\n $validate.output\n\n ## Instructions\n\n If the output ends with \"VALIDATION_STATUS: PASS\", respond with\n \"All checks passed — no fixes needed.\" and stop.\n\n If there are failures:\n\n 1. Read the validation failures carefully\n 2. Fix ONLY what's broken — do not make additional improvements\n 3. If a fix requires changing behavior (not just fixing a type/lint error),\n revert the original change instead\n 4. Run the specific failing check after each fix to confirm it passes\n 5. After all fixes, run the full validation suite: `bun run validate`\n\n If there are files still over 500 lines, note them but do NOT attempt further\n splitting in this node — that would require a new plan cycle.\n depends_on: [validate]\n context: fresh\n hooks:\n PostToolUse:\n - matcher: \"Write|Edit\"\n response:\n systemMessage: >\n You just made a fix. Run the specific failing validation check NOW\n to verify your fix works. Do not batch fixes — verify each one.\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n additionalContext: >\n You are fixing validation failures only. Do not make any changes\n beyond what's needed to pass the failing checks. If in doubt, revert\n the original change that caused the failure.\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 7: VERIFY BEHAVIOR — Read-only confirmation\n # Ensures the refactoring preserved behavior by tracing call paths\n # ═══════════════════════════════════════════════════════════════\n\n - id: verify-behavior\n prompt: |\n You are a code reviewer verifying that a refactoring preserved exact behavior.\n You can ONLY read files — you cannot make any changes.\n\n ## Refactoring Plan\n\n Read the plan from `$ARTIFACTS_DIR/refactor-plan.md` to understand what was intended.\n\n ## Instructions\n\n 1. Use Grep and Glob to find all files in the new module locations listed in\n the plan, then Read each one. (Note: Bash is denied in this read-only node,\n so use Grep/Glob/Read to discover changes instead of git commands.)\n 2. For each new file created by the refactoring:\n - Verify the extracted functions match the originals exactly (no logic changes)\n - Check that all types and interfaces are preserved\n 3. For the original file(s):\n - Verify re-exports exist for all symbols that were previously exported\n - Confirm no function bodies were changed (only moved)\n 4. For all import sites updated:\n - Verify imports resolve to the correct new locations\n - Check that no import was missed\n 5. Verify the public API is preserved:\n - Any code that imported from the original file should still work unchanged\n - Re-exports in the original file should cover all moved symbols\n\n ## Output\n\n Write your verification report to `$ARTIFACTS_DIR/behavior-verification.md`:\n\n ### Verdict: PASS | FAIL\n\n ### Functions Verified\n | Function | Original Location | New Location | Behavior Preserved |\n |----------|------------------|--------------|-------------------|\n | funcName | file.ts:42 | new-file.ts:10 | Yes/No |\n\n ### Public API Check\n - [ ] All original exports still accessible from original import path\n - [ ] Re-exports correctly configured\n\n ### Import Sites Updated\n - [ ] All N import sites verified\n\n ### Issues Found\n [List any behavior changes detected, or \"None — refactoring is behavior-preserving\"]\n depends_on: [fix-failures]\n context: fresh\n denied_tools: [Write, Edit, Bash]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 8: CREATE PR — Detailed description with before/after\n # ═══════════════════════════════════════════════════════════════\n\n - id: create-pr\n prompt: |\n Create a pull request for the refactoring.\n\n ## Context\n\n - **Refactoring goal**: $ARGUMENTS\n - **Impact analysis**: Read `$ARTIFACTS_DIR/impact-analysis.md`\n - **Refactoring plan**: Read `$ARTIFACTS_DIR/refactor-plan.md`\n - **Validation**: $validate.output\n - **Behavior verification**: Read `$ARTIFACTS_DIR/behavior-verification.md`\n\n ## Instructions\n\n 1. Stage all changes and create a final commit if there are uncommitted changes\n 2. Push the branch: `git push -u origin HEAD`\n 3. Check if a PR already exists: `gh pr list --head $(git branch --show-current)`\n 4. Create the PR with the format below\n 5. Save the PR URL to `$ARTIFACTS_DIR/.pr-url`\n\n ## PR Format\n\n - **Title**: `refactor: [concise description]` (under 70 chars)\n - **Body**:\n\n ```markdown\n ## Refactoring: [goal]\n\n ### Motivation\n\n [Why this refactoring was needed — file sizes, complexity, maintainability]\n\n ### Before\n\n ```\n [Original file structure with line counts from the plan]\n ```\n\n ### After\n\n ```\n [New file structure with line counts]\n ```\n\n ### Changes\n\n [For each new module: what was extracted and why it's a cohesive unit]\n\n ### Safety\n\n - [x] Type check passes\n - [x] Lint passes\n - [x] Tests pass (all existing tests still green)\n - [x] Public API preserved (re-exports maintain backward compatibility)\n - [x] Behavior verification passed (read-only audit confirmed no logic changes)\n - [x] Each task committed separately for easy review/revert\n\n ### Review Guide\n\n Each commit represents one extraction step. Review commits individually for easiest review.\n All commits are behavior-preserving structural moves.\n ```\n depends_on: [verify-behavior]\n context: fresh\n hooks:\n PreToolUse:\n - matcher: \"Write|Edit\"\n response:\n hookSpecificOutput:\n hookEventName: PreToolUse\n permissionDecision: deny\n permissionDecisionReason: \"PR creation node — do not modify source files. Use only git and gh commands.\"\n PostToolUse:\n - matcher: \"Bash\"\n response:\n hookSpecificOutput:\n hookEventName: PostToolUse\n additionalContext: >\n Verify this command succeeded. If git push or gh pr create failed,\n read the error message carefully before retrying.\n", "archon-remotion-generate": "name: archon-remotion-generate\ndescription: |\n Use when: User wants to generate or modify a Remotion video composition using AI.\n Triggers: \"create a video\", \"generate video\", \"remotion\", \"make an animation\",\n \"video about\", \"animate\".\n Does: AI writes Remotion React code -> renders preview stills -> renders full video ->\n summarizes the output.\n Requires: A Remotion project in the working directory (src/index.ts, src/Root.tsx).\n Optional: Install the remotion-best-practices skill for higher quality output:\n npx skills add remotion-dev/skills\n\nnodes:\n # ── Layer 0: Check project structure ──────────────────────────────────\n - id: check-project\n bash: |\n if [ ! -f \"src/index.ts\" ] || [ ! -f \"src/Root.tsx\" ]; then\n echo \"ERROR: Not a Remotion project. Expected src/index.ts and src/Root.tsx.\"\n echo \"Run 'npx create-video@latest' first, then run this workflow from that directory.\"\n exit 1\n fi\n echo \"Remotion project detected.\"\n npx remotion compositions src/index.ts 2>&1 | tail -5\n echo \"\"\n echo \"PROJECT_READY\"\n timeout: 60000\n\n # ── Layer 1: Generate composition code ────────────────────────────────\n - id: generate\n prompt: |\n You are working in a Remotion video project. The project root is the current directory.\n\n Find and read the existing composition files to understand the project structure.\n Look in src/ for Root.tsx and any composition components.\n\n Now create or modify the composition to match this request:\n\n $ARGUMENTS\n\n Rules:\n - Use useCurrentFrame() and interpolate()/spring() for ALL animations\n - Never use CSS transitions, Math.random(), setTimeout, or Date.now()\n - Use AbsoluteFill for layout, Sequence for scene timing\n - Use the component from 'remotion' (not native ) for images\n - Keep dimensions 1920x1080 at 30 fps unless the user specifies otherwise\n - Update the Zod schema and defaultProps in Root.tsx if you change props\n - Use even numbers for width/height (required for MP4)\n - Always clamp interpolations: extrapolateLeft: 'clamp', extrapolateRight: 'clamp'\n\n After writing the code, read it back to verify it looks correct.\n depends_on: [check-project]\n skills:\n - remotion-best-practices\n allowed_tools:\n - Read\n - Write\n - Edit\n - Glob\n\n # ── Layer 2: Render preview stills ────────────────────────────────────\n - id: render-preview\n bash: |\n mkdir -p out\n COMP_ID=$(npx remotion compositions src/index.ts 2>&1 | grep -E '^\\S' | head -1 | awk '{print $1}')\n if [ -z \"$COMP_ID\" ]; then\n echo \"RENDER_FAILED: Could not detect composition ID\"\n exit 1\n fi\n echo \"Composition: $COMP_ID\"\n\n DURATION=$(npx remotion compositions src/index.ts 2>&1 | grep -E '^\\S' | head -1 | awk '{print $4}')\n MID_FRAME=$(( ${DURATION:-150} / 2 ))\n LATE_FRAME=$(( ${DURATION:-150} * 3 / 4 ))\n\n echo \"Rendering preview stills at frames 1, $MID_FRAME, $LATE_FRAME...\"\n npx remotion still src/index.ts \"$COMP_ID\" out/preview-early.png --frame=1 2>&1 | tail -2\n npx remotion still src/index.ts \"$COMP_ID\" out/preview-mid.png --frame=$MID_FRAME 2>&1 | tail -2\n npx remotion still src/index.ts \"$COMP_ID\" out/preview-late.png --frame=$LATE_FRAME 2>&1 | tail -2\n RESULT=$?\n\n if [ $RESULT -eq 0 ]; then\n echo \"\"\n echo \"RENDER_SUCCESS\"\n ls -la out/preview-*.png\n else\n echo \"RENDER_FAILED\"\n fi\n depends_on: [generate]\n timeout: 120000\n\n # ── Layer 3: Render full video ────────────────────────────────────────\n - id: render-video\n bash: |\n COMP_ID=$(npx remotion compositions src/index.ts 2>&1 | grep -E '^\\S' | head -1 | awk '{print $1}')\n echo \"Rendering full video: $COMP_ID\"\n npx remotion render src/index.ts \"$COMP_ID\" out/video.mp4 --codec=h264 --crf=18 2>&1 | tail -10\n RESULT=$?\n\n if [ $RESULT -eq 0 ]; then\n echo \"\"\n echo \"VIDEO_RENDER_SUCCESS\"\n ls -la out/video.mp4\n else\n echo \"VIDEO_RENDER_FAILED\"\n fi\n depends_on: [render-preview]\n timeout: 300000\n\n # ── Layer 4: Summary ──────────────────────────────────────────────────\n - id: summary\n prompt: |\n A Remotion video was generated and rendered.\n\n Original request: $ARGUMENTS\n\n Preview render: $render-preview.output\n Video render: $render-video.output\n\n Read the generated composition code and the preview stills (out/preview-early.png,\n out/preview-mid.png, out/preview-late.png) to verify the output.\n\n Summarize:\n 1. What the video contains (based on code and stills)\n 2. Whether the renders succeeded\n 3. Where the output file is (out/video.mp4)\n depends_on: [render-video]\n allowed_tools:\n - Read\n model: haiku\n", "archon-resolve-conflicts": "name: archon-resolve-conflicts\ndescription: |\n Use when: PR has merge conflicts that need resolution.\n Triggers: \"resolve conflicts\", \"fix merge conflicts\", \"rebase this PR\", \"resolve this\",\n \"fix conflicts\", \"merge conflicts\", \"rebase and fix\".\n Does: Fetches latest base branch -> analyzes conflicts -> auto-resolves simple conflicts ->\n presents options for complex conflicts -> commits and pushes resolution.\n NOT for: PRs without conflicts, general rebasing without conflicts, squashing commits.\n\n This workflow helps resolve merge conflicts by analyzing the conflicting changes,\n automatically resolving where intent is clear, and presenting options for complex conflicts.\n\nnodes:\n - id: resolve\n command: archon-resolve-merge-conflicts\n", "archon-smart-pr-review": "name: archon-smart-pr-review\ndescription: |\n Use when: User wants a smart, efficient PR review that adapts to PR complexity.\n Triggers: \"smart review\", \"review this PR\", \"review PR #123\", \"efficient review\",\n \"smart PR review\", \"quick review\".\n Does: Gathers PR scope -> classifies complexity -> routes to only relevant review agents ->\n synthesizes findings -> auto-fixes CRITICAL/HIGH issues.\n NOT for: When you explicitly want ALL review agents (use archon-comprehensive-pr-review instead).\n\n Unlike the comprehensive review, this workflow classifies the PR first and only runs\n the review agents that are relevant. A 3-line typo fix skips test-coverage and docs-impact.\n\nnodes:\n - id: scope\n command: archon-pr-review-scope\n\n - id: sync\n command: archon-sync-pr-with-main\n depends_on: [scope]\n\n - id: classify\n prompt: |\n You are a PR complexity classifier. Analyze the PR scope below and determine\n which review agents should run.\n\n ## PR Scope\n $scope.output\n\n ## Rules\n - **Code review**: Always run unless the diff is empty or only touches non-code files\n (e.g. README-only, config-only, or .yaml-only changes).\n - **Error handling**: Run if the diff touches code with try/catch, error handling,\n async/await, or adds new failure paths.\n - **Test coverage**: Run if the diff touches source code (not just tests, docs, or config).\n - **Comment quality**: Run if the diff adds or modifies comments, docstrings, JSDoc,\n or significant documentation within code files.\n - **Docs impact**: Run if the diff adds/removes/renames public APIs, commands, CLI flags,\n environment variables, or user-facing features.\n\n Classify the PR complexity:\n - **trivial**: Typo fixes, formatting, single-line changes, version bumps\n - **small**: 1-3 files, straightforward logic, no architectural changes\n - **medium**: 4-10 files, moderate logic changes, some cross-cutting concerns\n - **large**: 10+ files, architectural changes, new subsystems, complex refactors\n\n Provide your reasoning for each decision.\n depends_on: [scope]\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n run_code_review:\n type: string\n enum: [\"true\", \"false\"]\n run_error_handling:\n type: string\n enum: [\"true\", \"false\"]\n run_test_coverage:\n type: string\n enum: [\"true\", \"false\"]\n run_comment_quality:\n type: string\n enum: [\"true\", \"false\"]\n run_docs_impact:\n type: string\n enum: [\"true\", \"false\"]\n complexity:\n type: string\n enum: [\"trivial\", \"small\", \"medium\", \"large\"]\n reasoning:\n type: string\n required:\n - run_code_review\n - run_error_handling\n - run_test_coverage\n - run_comment_quality\n - run_docs_impact\n - complexity\n - reasoning\n\n - id: code-review\n command: archon-code-review-agent\n depends_on: [classify, sync]\n when: \"$classify.output.run_code_review == 'true'\"\n\n - id: error-handling\n command: archon-error-handling-agent\n depends_on: [classify, sync]\n when: \"$classify.output.run_error_handling == 'true'\"\n\n - id: test-coverage\n command: archon-test-coverage-agent\n depends_on: [classify, sync]\n when: \"$classify.output.run_test_coverage == 'true'\"\n\n - id: comment-quality\n command: archon-comment-quality-agent\n depends_on: [classify, sync]\n when: \"$classify.output.run_comment_quality == 'true'\"\n\n - id: docs-impact\n command: archon-docs-impact-agent\n depends_on: [classify, sync]\n when: \"$classify.output.run_docs_impact == 'true'\"\n\n - id: synthesize\n command: archon-synthesize-review\n depends_on: [code-review, error-handling, test-coverage, comment-quality, docs-impact]\n trigger_rule: one_success\n\n - id: implement-fixes\n command: archon-implement-review-fixes\n depends_on: [synthesize]\n\n # Optional: push notification when review completes.\n # To enable, create .archon/mcp/ntfy.json — see docs/mcp-servers.md\n - id: check-ntfy\n bash: \"test -f .archon/mcp/ntfy.json && echo 'true' || echo 'false'\"\n depends_on: [implement-fixes]\n\n - id: notify\n depends_on: [check-ntfy, synthesize, implement-fixes]\n when: \"$check-ntfy.output == 'true'\"\n trigger_rule: all_success\n mcp: .archon/mcp/ntfy.json\n allowed_tools: []\n prompt: |\n Send a push notification summarizing the PR review results.\n\n Review synthesis:\n $synthesize.output\n\n Fix results:\n $implement-fixes.output\n\n Send with:\n - title: \"PR Review Complete\"\n - message: 1-2 sentence summary — verdict and issue count. Short enough for a lock screen.\n - priority: 3 if ready to merge, 4 if needs fixes, 5 if critical issues remain\n", "archon-test-loop-dag": "name: archon-test-loop-dag\ndescription: |\n Use when: User explicitly says \"test-loop-dag\" or \"run test-loop-dag\".\n IMPORTANT: This is a DAG workflow with a loop node that iterates until completion.\n NOT for: General testing questions or debugging.\n Does: Initializes a counter, iterates until it reaches 3, then reports completion.\n\nnodes:\n - id: setup\n bash: |\n echo \"0\" > .archon/test-loop-dag-counter.txt\n echo \"Counter initialized to 0\"\n\n - id: loop-counter\n depends_on: [setup]\n loop:\n prompt: |\n You are testing the loop node functionality within a DAG workflow.\n\n ## Your Task\n\n 1. Read the file `.archon/test-loop-dag-counter.txt`\n 2. Parse the current counter value\n 3. Increment it by 1\n 4. Write the new value back to the file\n 5. Report the current iteration\n\n ## User Intent\n\n $USER_MESSAGE\n\n ## Completion Criteria\n\n - If the counter reaches 3 or higher, output: COMPLETE\n - Otherwise, just report your progress and end normally\n\n ## Important\n\n Be concise. Just do the task and report the counter value.\n until: COMPLETE\n max_iterations: 5\n fresh_context: false\n\n - id: report\n depends_on: [loop-counter]\n prompt: |\n The loop counter test has completed. The loop node output was:\n\n $loop-counter.output\n\n Read `.archon/test-loop-dag-counter.txt` and confirm the final counter value.\n Report: \"Test loop DAG completed successfully. Final counter: {value}\"\n", "archon-validate-pr": "name: archon-validate-pr\ndescription: |\n Use when: User wants a thorough PR validation that tests both main (bug present) and feature branch (bug fixed).\n Triggers: \"validate PR\", \"validate pr #123\", \"test this PR\", \"verify PR\", \"full PR validation\",\n \"validate pull request\", \"test PR end-to-end\".\n Does: Fetches PR info -> finds free ports -> parallel code review (main vs feature) ->\n E2E test on main (reproduce bug) -> E2E test on feature (verify fix) -> final verdict report.\n NOT for: Quick code-only reviews (use archon-smart-pr-review), fixing issues, general exploration.\n\n This workflow is designed for running in parallel — each instance finds its own free ports\n to avoid conflicts. Produces artifacts in $ARTIFACTS_DIR/ and posts a validation report.\n\nprovider: claude\nmodel: opus\n\nnodes:\n # ═══════════════════════════════════════════════════════════════\n # PHASE 1: SETUP — Fetch PR info and allocate ports\n # ═══════════════════════════════════════════════════════════════\n\n - id: fetch-pr\n bash: |\n # Extract PR number from arguments\n PR_NUMBER=$(echo \"$ARGUMENTS\" | grep -oE '/pull/[0-9]+' | grep -oE '[0-9]+' | head -1)\n # Fallback: extract first number if no URL path found (e.g., \"validate PR 42\")\n if [ -z \"$PR_NUMBER\" ]; then\n PR_NUMBER=$(echo \"$ARGUMENTS\" | grep -oE '[0-9]+' | head -1)\n fi\n if [ -z \"$PR_NUMBER\" ]; then\n # Try getting PR from current branch\n PR_NUMBER=$(gh pr view --json number -q '.number' 2>/dev/null)\n fi\n\n if [ -z \"$PR_NUMBER\" ]; then\n echo \"ERROR: No PR number found in arguments: $ARGUMENTS\"\n exit 1\n fi\n\n echo \"$PR_NUMBER\" > \"$ARTIFACTS_DIR/.pr-number\"\n\n # Fetch full PR details\n gh pr view \"$PR_NUMBER\" --json number,title,body,url,headRefName,baseRefName,files,additions,deletions,changedFiles,state,author,labels,isDraft\n\n - id: find-ports\n bash: |\n # Use Bun to let the OS pick truly free ports (cross-platform: Linux, macOS, Windows)\n BACKEND_PORT=$(bun -e \"const s = Bun.serve({port: 0, fetch: () => new Response('')}); console.log(s.port); s.stop()\")\n FRONTEND_PORT=$(bun -e \"const s = Bun.serve({port: 0, fetch: () => new Response('')}); console.log(s.port); s.stop()\")\n\n echo \"$BACKEND_PORT\" > \"$ARTIFACTS_DIR/.backend-port\"\n echo \"$FRONTEND_PORT\" > \"$ARTIFACTS_DIR/.frontend-port\"\n\n echo \"BACKEND_PORT=$BACKEND_PORT\"\n echo \"FRONTEND_PORT=$FRONTEND_PORT\"\n\n - id: resolve-paths\n bash: |\n # Resolve canonical repo path (main branch) vs worktree path (feature branch)\n CANONICAL_REPO=$(git rev-parse --path-format=absolute --git-common-dir 2>/dev/null | sed 's|/\\.git$||')\n WORKTREE_PATH=$(pwd)\n FEATURE_BRANCH=$(git branch --show-current)\n\n # Get PR branch info\n PR_NUMBER=$(cat \"$ARTIFACTS_DIR/.pr-number\")\n PR_HEAD=$(gh pr view \"$PR_NUMBER\" --json headRefName -q '.headRefName')\n PR_BASE=$(gh pr view \"$PR_NUMBER\" --json baseRefName -q '.baseRefName')\n\n echo \"$CANONICAL_REPO\" > \"$ARTIFACTS_DIR/.canonical-repo\"\n echo \"$WORKTREE_PATH\" > \"$ARTIFACTS_DIR/.worktree-path\"\n echo \"$FEATURE_BRANCH\" > \"$ARTIFACTS_DIR/.feature-branch\"\n echo \"$PR_HEAD\" > \"$ARTIFACTS_DIR/.pr-head\"\n echo \"$PR_BASE\" > \"$ARTIFACTS_DIR/.pr-base\"\n\n echo \"CANONICAL_REPO=$CANONICAL_REPO\"\n echo \"WORKTREE_PATH=$WORKTREE_PATH\"\n echo \"FEATURE_BRANCH=$FEATURE_BRANCH\"\n echo \"PR_HEAD=$PR_HEAD\"\n echo \"PR_BASE=$PR_BASE\"\n depends_on: [fetch-pr]\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 2: CODE REVIEW — Parallel analysis of main vs feature\n # ═══════════════════════════════════════════════════════════════\n\n - id: code-review-main\n command: archon-validate-pr-code-review-main\n depends_on: [fetch-pr, resolve-paths]\n context: fresh\n\n - id: code-review-feature\n command: archon-validate-pr-code-review-feature\n depends_on: [fetch-pr, resolve-paths, code-review-main]\n context: fresh\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 3: E2E TESTING — Sequential (after code reviews finish)\n # ═══════════════════════════════════════════════════════════════\n\n - id: classify-testability\n prompt: |\n You are a PR testability classifier. Determine whether this PR's changes can be\n validated via browser E2E testing, or if it requires code-review-only validation.\n\n ## PR Details\n\n $fetch-pr.output\n\n ## Rules\n\n - **e2e_testable**: Changes affect the Web UI (components, hooks, styles, API routes\n that serve the frontend, SSE streaming, layout, user-visible behavior). These can be\n validated by starting Archon and using agent-browser to interact with the UI.\n - **code_review_only**: Changes are purely backend logic, CLI-only, workflow engine,\n database schemas, git operations, build tooling, tests, documentation, or other\n non-UI code. No visual validation possible.\n\n Consider: even if a change is backend, if it affects what the frontend displays\n (e.g., API response format changes, SSE event changes), it IS e2e_testable.\n depends_on: [fetch-pr]\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n testable:\n type: string\n enum: [\"e2e_testable\", \"code_review_only\"]\n reasoning:\n type: string\n test_plan:\n type: string\n required: [testable, reasoning, test_plan]\n\n - id: e2e-test-main\n command: archon-validate-pr-e2e-main\n depends_on: [classify-testability, find-ports, resolve-paths, code-review-main, code-review-feature]\n when: \"$classify-testability.output.testable == 'e2e_testable'\"\n context: fresh\n idle_timeout: 1800000\n\n - id: e2e-test-feature\n command: archon-validate-pr-e2e-feature\n depends_on: [e2e-test-main, find-ports, resolve-paths]\n when: \"$classify-testability.output.testable == 'e2e_testable'\"\n context: fresh\n idle_timeout: 1800000\n\n # ═══════════════════════════════════════════════════════════════\n # PHASE 4: FINAL REPORT — Synthesize all findings\n # ═══════════════════════════════════════════════════════════════\n\n - id: cleanup-processes\n bash: |\n # Safety net: kill any orphaned processes from E2E testing\n # This runs after E2E nodes complete (or timeout/fail) to prevent process accumulation\n BACKEND_PORT=$(cat \"$ARTIFACTS_DIR/.backend-port\" 2>/dev/null | tr -d '\\n')\n FRONTEND_PORT=$(cat \"$ARTIFACTS_DIR/.frontend-port\" 2>/dev/null | tr -d '\\n')\n\n if [ -z \"$BACKEND_PORT\" ] || [ -z \"$FRONTEND_PORT\" ]; then\n echo \"No port files found — skipping cleanup\"\n exit 0\n fi\n\n echo \"Cleaning up ports $BACKEND_PORT and $FRONTEND_PORT...\"\n\n # Kill by all recorded PID files\n for pidfile in \"$ARTIFACTS_DIR\"/.e2e-*-pid; do\n if [ -f \"$pidfile\" ]; then\n PID=$(cat \"$pidfile\" | tr -d '\\n')\n echo \"Killing PID $PID from $pidfile\"\n kill \"$PID\" 2>/dev/null || taskkill //F //T //PID \"$PID\" 2>/dev/null || true\n fi\n done\n\n # Kill by port (cross-platform fallback)\n for PORT in $BACKEND_PORT $FRONTEND_PORT; do\n fuser -k \"$PORT/tcp\" 2>/dev/null || true\n lsof -ti:\"$PORT\" 2>/dev/null | xargs kill -9 2>/dev/null || true\n netstat -ano 2>/dev/null | grep \":$PORT \" | grep LISTENING | awk '{print $5}' | sort -u | while read pid; do\n taskkill //F //T //PID \"$pid\" 2>/dev/null || true\n done\n done\n\n # pkill fallback: catch processes that escaped PID/port cleanup\n pkill -f \"PORT=$BACKEND_PORT.*bun\" 2>/dev/null || true\n pkill -f \"vite.*port.*$FRONTEND_PORT\" 2>/dev/null || true\n\n # Close this workflow's browser session only (scoped by session ID)\n BROWSER_SESSION=$(cat \"$ARTIFACTS_DIR/.browser-session\" 2>/dev/null | tr -d '\\n')\n if [ -n \"$BROWSER_SESSION\" ]; then\n agent-browser --session \"$BROWSER_SESSION\" close 2>/dev/null || true\n fi\n\n # Remove main E2E worktree if it still exists (safety net)\n CANONICAL_REPO=$(cat \"$ARTIFACTS_DIR/.canonical-repo\" 2>/dev/null | tr -d '\\n')\n MAIN_E2E_PATH=$(cat \"$ARTIFACTS_DIR/.e2e-main-worktree\" 2>/dev/null | tr -d '\\n')\n if [ -n \"$MAIN_E2E_PATH\" ] && [ -n \"$CANONICAL_REPO\" ] && [ -d \"$MAIN_E2E_PATH\" ]; then\n echo \"Removing leftover main E2E worktree: $MAIN_E2E_PATH\"\n git -C \"$CANONICAL_REPO\" worktree remove \"$MAIN_E2E_PATH\" --force 2>/dev/null || rm -rf \"$MAIN_E2E_PATH\"\n fi\n\n sleep 1\n echo \"Process cleanup complete\"\n depends_on: [e2e-test-main, e2e-test-feature]\n trigger_rule: all_done\n\n - id: final-report\n command: archon-validate-pr-report\n depends_on: [code-review-main, code-review-feature, e2e-test-main, e2e-test-feature, classify-testability, cleanup-processes]\n trigger_rule: all_done\n context: fresh\n", - "archon-workflow-builder": "name: archon-workflow-builder\ndescription: |\n Use when: User wants to create a new custom workflow for their project.\n Triggers: \"build me a workflow\", \"create a workflow\", \"generate a workflow\",\n \"new workflow\", \"make a workflow for\", \"workflow builder\".\n Does: Scans codebase -> extracts intent (JSON) -> generates YAML -> validates -> saves.\n NOT for: Editing existing workflows or creating non-workflow files.\n\nnodes:\n - id: scan-codebase\n bash: |\n echo \"=== Existing Commands ===\"\n if [ -d \".archon/commands\" ]; then\n find .archon/commands -type f -name \"*.md\" 2>/dev/null | head -30\n else\n echo \"(no .archon/commands/ directory)\"\n fi\n\n echo \"\"\n echo \"=== Existing Workflows ===\"\n if [ -d \".archon/workflows\" ]; then\n find .archon/workflows -type f \\( -name \"*.yaml\" -o -name \"*.yml\" \\) 2>/dev/null | head -30\n else\n echo \"(no .archon/workflows/ directory)\"\n fi\n\n echo \"\"\n echo \"=== Package Info ===\"\n if [ -f \"package.json\" ]; then\n grep -E '\"name\"|\"scripts\"' package.json | head -10\n else\n echo \"(no package.json)\"\n fi\n\n echo \"\"\n echo \"=== Project Context (CLAUDE.md first 50 lines) ===\"\n if [ -f \"CLAUDE.md\" ]; then\n head -50 CLAUDE.md\n else\n echo \"(no CLAUDE.md)\"\n fi\n\n - id: extract-intent\n prompt: |\n You are a workflow design classifier. Given a user's description of what they want\n a workflow to do, extract structured intent.\n\n ## User's Request\n $ARGUMENTS\n\n ## Codebase Context\n $scan-codebase.output\n\n ## Instructions\n\n Analyze the user's request and the existing codebase to determine:\n 1. A kebab-case workflow name (e.g., \"lint-and-test\", \"deploy-staging\")\n 2. A description following the Archon pattern (Use when / Triggers / Does / NOT for)\n 3. Trigger phrases the router should match\n 4. A list of proposed nodes with their types and purposes\n 5. Whether this should be a simple DAG or include a loop node\n\n Be specific and concrete. Each proposed node should have a clear type\n (bash, prompt, command, or loop) and a one-line description of what it does.\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n workflow_name:\n type: string\n description:\n type: string\n trigger_phrases:\n type: string\n proposed_nodes:\n type: string\n execution_mode:\n type: string\n enum: [\"dag\", \"loop\"]\n required: [workflow_name, description, trigger_phrases, proposed_nodes, execution_mode]\n depends_on: [scan-codebase]\n\n - id: generate-yaml\n prompt: |\n You are an Archon workflow author. Generate a complete, valid workflow YAML file\n based on the structured intent provided.\n\n ## Intent\n - **Name**: $extract-intent.output.workflow_name\n - **Description**: $extract-intent.output.description\n - **Trigger Phrases**: $extract-intent.output.trigger_phrases\n - **Proposed Nodes**: $extract-intent.output.proposed_nodes\n - **Execution Mode**: $extract-intent.output.execution_mode\n\n ## Original User Request\n $ARGUMENTS\n\n ## Archon Workflow YAML Schema Reference\n\n A workflow YAML file has this structure:\n\n ```yaml\n name: workflow-name\n description: |\n Use when: ...\n Triggers: ...\n Does: ...\n NOT for: ...\n\n # Optional top-level settings:\n # provider: claude (or codex)\n # model: sonnet (or haiku, opus, etc.)\n # interactive: true (forces foreground execution in web UI)\n\n nodes:\n - id: node-id-kebab-case\n # Choose ONE of: prompt, bash, command, loop\n\n # --- prompt node (AI-executed) ---\n prompt: |\n Instructions for the AI...\n # Optional: model, allowed_tools, denied_tools, output_format, context, idle_timeout\n\n # --- bash node (shell script, no AI, stdout = $.output) ---\n bash: |\n #!/bin/bash\n set -e\n echo \"result\"\n\n # --- command node (references a .archon/commands/ file) ---\n command: command-name\n\n # --- loop node (iterative AI execution) ---\n loop:\n prompt: |\n Instructions repeated each iteration...\n until: COMPLETION_SIGNAL\n max_iterations: 10\n fresh_context: true # optional: reset context each iteration\n\n # Common options for all node types:\n depends_on: [other-node-id] # DAG edges\n when: \"$.output == 'value'\" # conditional execution\n trigger_rule: all_success # all_success | one_success | all_done\n timeout: 120000 # ms, for bash nodes\n ```\n\n ## Variable Reference\n - `$ARGUMENTS` — user's input text\n - `$ARTIFACTS_DIR` — pre-created directory for workflow artifacts\n - `$.output` — stdout from a bash node or AI response from a prompt node\n - `$.output.field` — JSON field from a node with output_format\n - `$BASE_BRANCH` — base git branch\n\n ## Rules\n 1. The `name:` field MUST match: $extract-intent.output.workflow_name\n 2. The `description:` MUST follow the \"Use when / Triggers / Does / NOT for\" pattern\n 3. Every node MUST have a unique kebab-case `id`\n 4. Use `depends_on` to define execution order\n 5. Use `bash` nodes for deterministic operations (file checks, git commands, installs)\n 6. Use `prompt` nodes for AI reasoning tasks\n 7. Use `output_format` on prompt nodes when downstream nodes need structured data\n 8. Use `allowed_tools: []` on classification/analysis nodes that don't need tools\n 9. Use `denied_tools: [Edit, Bash]` when a node should only use Write (not edit existing files)\n 10. Prefer `model: haiku` for simple classification tasks to save cost\n\n ## Output\n\n Write the complete workflow YAML to: `$ARTIFACTS_DIR/generated-workflow.yaml`\n\n Use the Write tool. Do NOT use Edit or Bash. The file must be valid YAML and follow\n all the patterns above.\n denied_tools: [Edit, Bash]\n depends_on: [extract-intent]\n\n - id: validate-yaml\n bash: |\n FILE=\"$ARTIFACTS_DIR/generated-workflow.yaml\"\n\n if [ ! -f \"$FILE\" ]; then\n echo \"ERROR: generated-workflow.yaml not found at $FILE\"\n exit 1\n fi\n\n if [ ! -s \"$FILE\" ]; then\n echo \"ERROR: generated-workflow.yaml is empty\"\n exit 1\n fi\n\n if ! grep -q \"^name:\" \"$FILE\"; then\n echo \"ERROR: missing 'name:' field\"\n exit 1\n fi\n\n if ! grep -q \"^nodes:\" \"$FILE\"; then\n echo \"ERROR: missing 'nodes:' field\"\n exit 1\n fi\n\n echo \"VALID\"\n depends_on: [generate-yaml]\n\n - id: save-or-report\n prompt: |\n You are a workflow installer. Save the generated workflow and report to the user.\n\n ## Workflow Details\n - **Name**: $extract-intent.output.workflow_name\n - **Trigger Phrases**: $extract-intent.output.trigger_phrases\n\n ## Instructions\n\n 1. Read the generated workflow from `$ARTIFACTS_DIR/generated-workflow.yaml`\n 2. Create the directory `.archon/workflows/` if it doesn't exist (use Bash: `mkdir -p .archon/workflows/`)\n 3. Save the workflow to `.archon/workflows/$extract-intent.output.workflow_name.yaml`\n Use the Write tool to write the file.\n 4. Report to the user:\n - Workflow name and file location\n - Trigger phrases that will invoke it\n - How to run it: `bun run cli workflow run $extract-intent.output.workflow_name \"your input\"`\n - How to test it: `bun run cli validate workflows $extract-intent.output.workflow_name`\n depends_on: [validate-yaml]\n", + "archon-workflow-builder": "name: archon-workflow-builder\ndescription: |\n Use when: User wants to create a new custom workflow for their project.\n Triggers: \"build me a workflow\", \"create a workflow\", \"generate a workflow\",\n \"new workflow\", \"make a workflow for\", \"workflow builder\".\n Does: Scans codebase -> extracts intent (JSON) -> generates YAML -> validates -> saves.\n NOT for: Editing existing workflows or creating non-workflow files.\n\nnodes:\n - id: scan-codebase\n bash: |\n echo \"=== Existing Commands ===\"\n if [ -d \".archon/commands\" ]; then\n find .archon/commands -type f -name \"*.md\" 2>/dev/null | head -30\n else\n echo \"(no .archon/commands/ directory)\"\n fi\n\n echo \"\"\n echo \"=== Existing Workflows ===\"\n if [ -d \".archon/workflows\" ]; then\n find .archon/workflows -type f \\( -name \"*.yaml\" -o -name \"*.yml\" \\) 2>/dev/null | head -30\n else\n echo \"(no .archon/workflows/ directory)\"\n fi\n\n echo \"\"\n echo \"=== Package Info ===\"\n if [ -f \"package.json\" ]; then\n grep -E '\"name\"|\"scripts\"' package.json | head -10\n else\n echo \"(no package.json)\"\n fi\n\n echo \"\"\n echo \"=== Project Context (CLAUDE.md first 50 lines) ===\"\n if [ -f \"CLAUDE.md\" ]; then\n head -50 CLAUDE.md\n else\n echo \"(no CLAUDE.md)\"\n fi\n\n - id: extract-intent\n prompt: |\n You are a workflow design classifier. Given a user's description of what they want\n a workflow to do, extract structured intent.\n\n ## User's Request\n $ARGUMENTS\n\n ## Codebase Context\n $scan-codebase.output\n\n ## Instructions\n\n Analyze the user's request and the existing codebase to determine:\n 1. A kebab-case workflow name (e.g., \"lint-and-test\", \"deploy-staging\")\n 2. A description following the Archon pattern (Use when / Triggers / Does / NOT for)\n 3. Trigger phrases the router should match\n 4. A list of proposed nodes with their types and purposes\n 5. Whether this should be a simple DAG or include a loop node\n\n Be specific and concrete. Each proposed node should have a clear type\n (bash, prompt, command, script, loop, or approval) and a one-line\n description of what it does.\n model: haiku\n allowed_tools: []\n output_format:\n type: object\n properties:\n workflow_name:\n type: string\n description:\n type: string\n trigger_phrases:\n type: string\n proposed_nodes:\n type: string\n execution_mode:\n type: string\n enum: [\"dag\", \"loop\"]\n required: [workflow_name, description, trigger_phrases, proposed_nodes, execution_mode]\n depends_on: [scan-codebase]\n\n - id: generate-yaml\n prompt: |\n You are an Archon workflow author. Generate a complete, valid workflow YAML file\n based on the structured intent provided.\n\n ## Intent\n - **Name**: $extract-intent.output.workflow_name\n - **Description**: $extract-intent.output.description\n - **Trigger Phrases**: $extract-intent.output.trigger_phrases\n - **Proposed Nodes**: $extract-intent.output.proposed_nodes\n - **Execution Mode**: $extract-intent.output.execution_mode\n\n ## Original User Request\n $ARGUMENTS\n\n ## Archon Workflow YAML Schema Reference\n\n A workflow YAML file has this structure:\n\n ```yaml\n name: workflow-name\n description: |\n Use when: ...\n Triggers: ...\n Does: ...\n NOT for: ...\n\n # Optional top-level settings:\n # provider: claude (or codex)\n # model: sonnet (or haiku, opus, etc.)\n # interactive: true (forces foreground execution in web UI)\n\n nodes:\n - id: node-id-kebab-case\n # Choose ONE of: prompt, bash, command, script, loop, approval\n\n # --- prompt node (AI-executed) ---\n prompt: |\n Instructions for the AI...\n # Optional: model, allowed_tools, denied_tools, output_format, context, idle_timeout\n\n # --- bash node (shell script, no AI, stdout = $.output) ---\n bash: |\n #!/bin/bash\n set -e\n echo \"result\"\n\n # --- command node (references a .archon/commands/ file) ---\n command: command-name\n\n # --- script node (TypeScript via bun, or Python via uv — no AI, stdout = $.output) ---\n # Use for deterministic data transforms the shell would mangle (JSON parsing, etc.)\n script: |\n // JSON is valid JS expression syntax — assign directly (String.raw breaks on backticks)\n const data = $other-node.output;\n console.log(JSON.stringify({ count: data.items.length }));\n runtime: bun # required: 'bun' (.ts/.js) or 'uv' (.py)\n # deps: [requests] # uv only\n # Or reference a named script in .archon/scripts/:\n # script: extract-labels # no extension; bun resolves .ts/.js, uv resolves .py\n\n # --- loop node (iterative AI execution) ---\n loop:\n prompt: |\n Instructions repeated each iteration...\n until: COMPLETION_SIGNAL\n max_iterations: 10\n fresh_context: true # optional: reset context each iteration\n\n # --- approval node (human gate — pauses workflow) ---\n approval:\n message: \"Review the plan above. Approve to continue.\"\n # capture_response: true # store reviewer comment as $.output\n\n # Common options for all node types:\n depends_on: [other-node-id] # DAG edges\n when: \"$.output == 'value'\" # conditional execution\n trigger_rule: all_success # all_success | one_success | all_done\n timeout: 120000 # ms, for bash and script nodes\n ```\n\n ## Variable Reference\n - `$ARGUMENTS` — user's input text\n - `$ARTIFACTS_DIR` — pre-created directory for workflow artifacts\n - `$.output` — stdout from a bash/script node or AI response from a prompt node\n - `$.output.field` — JSON field from a node with output_format\n - `$BASE_BRANCH` — base git branch\n\n ## Rules\n 1. The `name:` field MUST match: $extract-intent.output.workflow_name\n 2. The `description:` MUST follow the \"Use when / Triggers / Does / NOT for\" pattern\n 3. Every node MUST have a unique kebab-case `id`\n 4. Use `depends_on` to define execution order\n 5. Use `bash` nodes for deterministic shell operations (file checks, git commands, installs)\n 6. Use `script` nodes for typed data transforms (TypeScript JSON parsing, Python with deps)\n — stdout is captured as output, stderr is forwarded as a warning.\n $nodeId.output is NOT shell-quoted in script bodies.\n - **TypeScript/bun**: assign directly — `const data = $nodeId.output;`\n (JSON is valid JS expression syntax; avoid String.raw — it breaks on backticks)\n - **Python/uv**: use json.loads — `import json; data = json.loads(\"\"\"$nodeId.output\"\"\")`\n Never interpolate into shell syntax.\n 7. Use `prompt` nodes for AI reasoning tasks\n 8. Use `approval` nodes to pause for human review at risky gates (plan→execute boundary, destructive actions)\n 9. Use `output_format` on prompt nodes when downstream nodes need structured data\n 10. Use `allowed_tools: []` on classification/analysis nodes that don't need tools\n 11. Use `denied_tools: [Edit, Bash]` when a node should only use Write (not edit existing files)\n 12. Prefer `model: haiku` for simple classification tasks to save cost\n\n ## Output\n\n Write the complete workflow YAML to: `$ARTIFACTS_DIR/generated-workflow.yaml`\n\n Use the Write tool. Do NOT use Edit or Bash. The file must be valid YAML and follow\n all the patterns above.\n denied_tools: [Edit, Bash]\n depends_on: [extract-intent]\n\n - id: validate-yaml\n bash: |\n FILE=\"$ARTIFACTS_DIR/generated-workflow.yaml\"\n\n if [ ! -f \"$FILE\" ]; then\n echo \"ERROR: generated-workflow.yaml not found at $FILE\"\n exit 1\n fi\n\n if [ ! -s \"$FILE\" ]; then\n echo \"ERROR: generated-workflow.yaml is empty\"\n exit 1\n fi\n\n if ! grep -q \"^name:\" \"$FILE\"; then\n echo \"ERROR: missing 'name:' field\"\n exit 1\n fi\n\n if ! grep -q \"^nodes:\" \"$FILE\"; then\n echo \"ERROR: missing 'nodes:' field\"\n exit 1\n fi\n\n echo \"VALID\"\n depends_on: [generate-yaml]\n\n - id: save-or-report\n prompt: |\n You are a workflow installer. Save the generated workflow and report to the user.\n\n ## Workflow Details\n - **Name**: $extract-intent.output.workflow_name\n - **Trigger Phrases**: $extract-intent.output.trigger_phrases\n\n ## Instructions\n\n 1. Read the generated workflow from `$ARTIFACTS_DIR/generated-workflow.yaml`\n 2. Create the directory `.archon/workflows/` if it doesn't exist (use Bash: `mkdir -p .archon/workflows/`)\n 3. Save the workflow to `.archon/workflows/$extract-intent.output.workflow_name.yaml`\n Use the Write tool to write the file.\n 4. Report to the user:\n - Workflow name and file location\n - Trigger phrases that will invoke it\n - How to run it: `bun run cli workflow run $extract-intent.output.workflow_name \"your input\"`\n - How to test it: `bun run cli validate workflows $extract-intent.output.workflow_name`\n depends_on: [validate-yaml]\n", }; diff --git a/packages/workflows/src/executor-preamble.test.ts b/packages/workflows/src/executor-preamble.test.ts index 4739770940..a5b16dfb83 100644 --- a/packages/workflows/src/executor-preamble.test.ts +++ b/packages/workflows/src/executor-preamble.test.ts @@ -68,6 +68,14 @@ mock.module('./event-emitter', () => ({ getWorkflowEventEmitter: mock(() => mockEmitter), })); +// --------------------------------------------------------------------------- +// Bootstrap provider registry (executor calls isRegisteredProvider at workflow level) +// --------------------------------------------------------------------------- + +import { registerBuiltinProviders, clearRegistry } from '@archon/providers'; +clearRegistry(); +registerBuiltinProviders(); + // --------------------------------------------------------------------------- // Import after mocks // --------------------------------------------------------------------------- diff --git a/packages/workflows/src/executor-shared.test.ts b/packages/workflows/src/executor-shared.test.ts index 17c93cc605..85d6211a37 100644 --- a/packages/workflows/src/executor-shared.test.ts +++ b/packages/workflows/src/executor-shared.test.ts @@ -252,6 +252,50 @@ describe('substituteWorkflowVariables', () => { ); expect(prompt).toBe('Fix: '); }); + + it('replaces $LOOP_PREV_OUTPUT with the previous iteration output', () => { + const { prompt } = substituteWorkflowVariables( + 'Last pass said:\n$LOOP_PREV_OUTPUT', + 'run-1', + 'msg', + '/tmp', + 'main', + 'docs/', + undefined, + undefined, + undefined, + 'QA failed: 2 type errors in users.ts' + ); + expect(prompt).toBe('Last pass said:\nQA failed: 2 type errors in users.ts'); + }); + + it('clears $LOOP_PREV_OUTPUT when not provided (first iteration)', () => { + const { prompt } = substituteWorkflowVariables( + 'Previous output: $LOOP_PREV_OUTPUT (end)', + 'run-1', + 'msg', + '/tmp', + 'main', + 'docs/' + ); + expect(prompt).toBe('Previous output: (end)'); + }); + + it('does not affect prompts that omit $LOOP_PREV_OUTPUT', () => { + const { prompt } = substituteWorkflowVariables( + 'Plain prompt with no loop variable.', + 'run-1', + 'msg', + '/tmp', + 'main', + 'docs/', + undefined, + undefined, + undefined, + 'unused previous output' + ); + expect(prompt).toBe('Plain prompt with no loop variable.'); + }); }); describe('buildPromptWithContext', () => { diff --git a/packages/workflows/src/executor-shared.ts b/packages/workflows/src/executor-shared.ts index e88700d9cb..ff4d3836de 100644 --- a/packages/workflows/src/executor-shared.ts +++ b/packages/workflows/src/executor-shared.ts @@ -275,6 +275,9 @@ export const CONTEXT_VAR_PATTERN_STR = * - $LOOP_USER_INPUT - User feedback from interactive loop approval. Only populated on the * first iteration of a resumed interactive loop; empty string on all other iterations. * - $REJECTION_REASON - Reviewer feedback from approval node rejection (on_reject prompts only). + * - $LOOP_PREV_OUTPUT - Cleaned output of the previous loop iteration. Empty string on the + * first iteration (no prior output exists). Useful for fresh_context loops that need + * to reference what the previous pass produced or why it failed. * * When issueContext is undefined, context variables are replaced with empty string * to avoid sending literal "$CONTEXT" to the AI. @@ -288,7 +291,8 @@ export function substituteWorkflowVariables( docsDir: string, issueContext?: string, loopUserInput?: string, - rejectionReason?: string + rejectionReason?: string, + loopPrevOutput?: string ): { prompt: string; contextSubstituted: boolean } { // Fail fast if the prompt references $BASE_BRANCH but no base branch could be resolved if (!baseBranch && prompt.includes('$BASE_BRANCH')) { @@ -310,7 +314,8 @@ export function substituteWorkflowVariables( .replace(/\$BASE_BRANCH/g, baseBranch) .replace(/\$DOCS_DIR/g, resolvedDocsDir) .replace(/\$LOOP_USER_INPUT/g, loopUserInput ?? '') - .replace(/\$REJECTION_REASON/g, rejectionReason ?? ''); + .replace(/\$REJECTION_REASON/g, rejectionReason ?? '') + .replace(/\$LOOP_PREV_OUTPUT/g, loopPrevOutput ?? ''); // Check if context variables exist (use fresh regex to avoid lastIndex issues) const hasContextVariables = new RegExp(CONTEXT_VAR_PATTERN_STR).test(result); diff --git a/packages/workflows/src/executor.test.ts b/packages/workflows/src/executor.test.ts index 0c8b626d5a..92d9cf5b81 100644 --- a/packages/workflows/src/executor.test.ts +++ b/packages/workflows/src/executor.test.ts @@ -298,6 +298,43 @@ describe('executeWorkflow', () => { expect(sentMessage).toContain('--branch'); }); + it('skips path-lock check when mutates_checkout is false', async () => { + const getActiveSpy = mock(async () => + makeRun({ id: 'other-run', status: 'running' as const }) + ); + const store = makeStore({ getActiveWorkflowRunByPath: getActiveSpy }); + const deps = makeDeps(store); + const result = await executeWorkflow( + deps, + makePlatform(), + 'conv-1', + '/tmp', + makeWorkflow({ mutates_checkout: false }), + 'test message', + 'db-conv-1' + ); + // Guard skipped: spy never called, run succeeds + expect(getActiveSpy).not.toHaveBeenCalled(); + expect(result.workflowRunId).toBe('run-123'); + }); + + it('still enforces path lock when mutates_checkout is true', async () => { + const otherRun = makeRun({ id: 'other-run-456', status: 'running' as const }); + const store = makeStore({ getActiveWorkflowRunByPath: mock(async () => otherRun) }); + const deps = makeDeps(store); + const result = await executeWorkflow( + deps, + makePlatform(), + 'conv-1', + '/tmp', + makeWorkflow({ mutates_checkout: true }), + 'test message', + 'db-conv-1' + ); + expect(result.success).toBe(false); + expect(result.error).toContain('already active'); + }); + it('still returns failure when guard self-cancel update throws (best-effort)', async () => { const selfRun = makeRun({ id: 'self-run', status: 'pending' }); const otherRun = makeRun({ id: 'other-run', status: 'running' }); @@ -431,10 +468,11 @@ describe('executeWorkflow', () => { expect(mockExecuteDagWorkflow).toHaveBeenCalledTimes(1); }); - it('infers claude provider when workflow sets a claude model alias', async () => { + it('passes workflow.model through unchanged when workflow.provider is unset', async () => { const store = makeStore(); const deps = makeDeps(store); - // config.assistant defaults to 'claude', model 'sonnet' is a claude alias + // Provider falls back to config.assistant ('claude'); model is forwarded + // verbatim. The SDK is the source of truth for what model strings work. await executeWorkflow( deps, makePlatform(), @@ -447,7 +485,26 @@ describe('executeWorkflow', () => { expect(mockExecuteDagWorkflow).toHaveBeenCalledTimes(1); }); - it('throws when model is incompatible with explicit provider', async () => { + it('passes provider+model through to the SDK without re-routing on model name', async () => { + // Provider is explicit; the model string is forwarded verbatim to + // whichever SDK the resolved provider names. A workflow that sets + // provider:codex with a Claude-looking model gets the request handed + // to the codex SDK as-is — the SDK decides whether to accept it. + const store = makeStore(); + const deps = makeDeps(store); + await executeWorkflow( + deps, + makePlatform(), + 'conv-1', + '/tmp', + makeWorkflow({ provider: 'codex', model: 'sonnet' }), + 'test message', + 'db-conv-1' + ); + expect(mockExecuteDagWorkflow).toHaveBeenCalledTimes(1); + }); + + it('throws when workflow.provider is not a registered provider', async () => { const store = makeStore(); const deps = makeDeps(store); await expect( @@ -456,11 +513,11 @@ describe('executeWorkflow', () => { makePlatform(), 'conv-1', '/tmp', - makeWorkflow({ provider: 'codex', model: 'sonnet' }), + makeWorkflow({ provider: 'claud', model: 'sonnet' }), 'test message', 'db-conv-1' ) - ).rejects.toThrow('not compatible'); + ).rejects.toThrow(/unknown provider 'claud'/); }); }); diff --git a/packages/workflows/src/executor.ts b/packages/workflows/src/executor.ts index 39b75e00c7..77226621bf 100644 --- a/packages/workflows/src/executor.ts +++ b/packages/workflows/src/executor.ts @@ -13,7 +13,7 @@ import { executeDagWorkflow } from './dag-executor'; import { logWorkflowStart, logWorkflowError } from './logger'; import { formatDuration, parseDbTimestamp } from './utils/duration'; import { getWorkflowEventEmitter } from './event-emitter'; -import { inferProviderFromModel, isModelCompatible } from './model-validation'; +import { isRegisteredProvider, getRegisteredProviders } from '@archon/providers'; import { classifyError } from './executor-shared'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ @@ -276,29 +276,21 @@ export async function executeWorkflow( const docsDir = config.docsPath ?? 'docs/'; - // Resolve provider and model once (used by all nodes) - // When workflow sets a model but not a provider, infer provider from the model. - // e.g. model: sonnet → provider: claude, even if config.assistant is codex. - let resolvedProvider: string; - let providerSource: string; - if (workflow.provider) { - resolvedProvider = workflow.provider; - providerSource = 'workflow definition'; - } else if (workflow.model) { - resolvedProvider = inferProviderFromModel(workflow.model, config.assistant); - providerSource = 'inferred from workflow model'; - } else { - resolvedProvider = config.assistant; - providerSource = 'config'; - } - const assistantDefaults = config.assistants[resolvedProvider]; - const resolvedModel = workflow.model ?? (assistantDefaults?.model as string | undefined); - if (!isModelCompatible(resolvedProvider, resolvedModel)) { + // Resolve provider and model once (used by all nodes). + // Provider is explicit: node.provider ?? workflow.provider ?? config.assistant. + // Model strings pass through to the SDK as-is — the SDK validates at request time. + const resolvedProvider: string = workflow.provider ?? config.assistant; + const providerSource = workflow.provider ? 'workflow definition' : 'config'; + if (!isRegisteredProvider(resolvedProvider)) { throw new Error( - `Model "${resolvedModel}" is not compatible with provider "${resolvedProvider}". ` + - 'Update your workflow or config.' + `Workflow '${workflow.name}': unknown provider '${resolvedProvider}'. ` + + `Registered: ${getRegisteredProviders() + .map(p => p.id) + .join(', ')}` ); } + const assistantDefaults = config.assistants[resolvedProvider]; + const resolvedModel = workflow.model ?? (assistantDefaults?.model as string | undefined); getLog().info( { @@ -477,92 +469,97 @@ export async function executeWorkflow( // Path-lock guard: ensure no other workflow run holds this working_path. // + // Skipped when `workflow.mutates_checkout` is false — the author asserts + // that concurrent runs will not race (e.g. all writes are per-run-scoped). + // // Runs after workflowRun is finalized (pre-created, resumed, or freshly // created) so we always have self-ID + started_at for the deterministic // older-wins tiebreaker. The query treats `pending` rows older than 5 min // as orphaned, so leaks from crashed dispatches or resume orphans don't // permanently block the path. - try { - const activeWorkflow = await deps.store.getActiveWorkflowRunByPath(cwd, { - id: workflowRun.id, - startedAt: new Date(parseDbTimestamp(workflowRun.started_at)), - }); - if (activeWorkflow) { - // The lock query found another active row that wins the older-wins - // tiebreaker. Mark our own row terminal so it falls out of the - // active set immediately — without this, our row sits as - // pending/running and blocks the path until the 5-min stale window - // (or never, if we'd already promoted it to running via resume). + if (workflow.mutates_checkout !== false) { + try { + const activeWorkflow = await deps.store.getActiveWorkflowRunByPath(cwd, { + id: workflowRun.id, + startedAt: new Date(parseDbTimestamp(workflowRun.started_at)), + }); + if (activeWorkflow) { + // The lock query found another active row that wins the older-wins + // tiebreaker. Mark our own row terminal so it falls out of the + // active set immediately — without this, our row sits as + // pending/running and blocks the path until the 5-min stale window + // (or never, if we'd already promoted it to running via resume). + await deps.store + .updateWorkflowRun(workflowRun.id, { status: 'cancelled' }) + .catch((cleanupErr: Error) => { + getLog().warn( + { err: cleanupErr, workflowRunId: workflowRun?.id, cwd }, + 'workflow.guard_self_cancel_failed' + ); + }); + + const elapsedMs = Date.now() - parseDbTimestamp(activeWorkflow.started_at); + const duration = formatDuration(elapsedMs); + const shortId = activeWorkflow.id.slice(0, 8); + + // Status-aware copy. The lock query returns running, paused, and + // fresh-pending rows — telling the user to "wait for it to finish" + // is wrong for `paused` (waiting on user action via approve/reject). + let stateLine: string; + let actionLines: string; + if (activeWorkflow.status === 'paused') { + stateLine = `paused waiting for user input (${duration} since started, run \`${shortId}\`)`; + actionLines = + `• Approve it: \`/workflow approve ${shortId}\`\n` + + `• Reject it: \`/workflow reject ${shortId}\`\n` + + `• Cancel it: \`/workflow cancel ${shortId}\`\n` + + '• Use a different branch: `--branch `'; + } else { + const verb = activeWorkflow.status === 'pending' ? 'starting' : 'running'; + stateLine = `${verb} ${duration}, run \`${shortId}\``; + actionLines = + '• Wait for it to finish: `/workflow status`\n' + + `• Cancel it: \`/workflow cancel ${shortId}\`\n` + + '• Use a different branch: `--branch `'; + } + await sendCriticalMessage( + platform, + conversationId, + `❌ **This worktree is in use** by \`${activeWorkflow.workflow_name}\` ` + + `(${stateLine}).\n${actionLines}` + ); + return { + success: false, + error: `Workflow already active on this path (${activeWorkflow.status}): ${activeWorkflow.workflow_name}`, + }; + } + } catch (error) { + const err = error as Error; + getLog().error( + { err, conversationId, cwd, pendingRunId: workflowRun.id }, + 'db_active_workflow_check_failed' + ); + // Release the lock token. workflowRun is finalized at this point + // (pre-created or resumed or freshly created) and would otherwise sit + // as pending/running, blocking the path. For pending the 5-min stale + // window would clear it eventually; for a row already promoted to + // running (e.g., resumed), nothing would clear it without manual + // intervention. await deps.store .updateWorkflowRun(workflowRun.id, { status: 'cancelled' }) .catch((cleanupErr: Error) => { getLog().warn( - { err: cleanupErr, workflowRunId: workflowRun?.id, cwd }, - 'workflow.guard_self_cancel_failed' + { err: cleanupErr, workflowRunId: workflowRun?.id }, + 'workflow.guard_query_failure_cleanup_failed' ); }); - - const elapsedMs = Date.now() - parseDbTimestamp(activeWorkflow.started_at); - const duration = formatDuration(elapsedMs); - const shortId = activeWorkflow.id.slice(0, 8); - - // Status-aware copy. The lock query returns running, paused, and - // fresh-pending rows — telling the user to "wait for it to finish" - // is wrong for `paused` (waiting on user action via approve/reject). - let stateLine: string; - let actionLines: string; - if (activeWorkflow.status === 'paused') { - stateLine = `paused waiting for user input (${duration} since started, run \`${shortId}\`)`; - actionLines = - `• Approve it: \`/workflow approve ${shortId}\`\n` + - `• Reject it: \`/workflow reject ${shortId}\`\n` + - `• Cancel it: \`/workflow cancel ${shortId}\`\n` + - '• Use a different branch: `--branch `'; - } else { - const verb = activeWorkflow.status === 'pending' ? 'starting' : 'running'; - stateLine = `${verb} ${duration}, run \`${shortId}\``; - actionLines = - '• Wait for it to finish: `/workflow status`\n' + - `• Cancel it: \`/workflow cancel ${shortId}\`\n` + - '• Use a different branch: `--branch `'; - } await sendCriticalMessage( platform, conversationId, - `❌ **This worktree is in use** by \`${activeWorkflow.workflow_name}\` ` + - `(${stateLine}).\n${actionLines}` + '❌ **Workflow blocked**: Unable to verify if another workflow is running (database error). Please try again in a moment.' ); - return { - success: false, - error: `Workflow already active on this path (${activeWorkflow.status}): ${activeWorkflow.workflow_name}`, - }; + return { success: false, error: 'Database error checking for active workflow' }; } - } catch (error) { - const err = error as Error; - getLog().error( - { err, conversationId, cwd, pendingRunId: workflowRun.id }, - 'db_active_workflow_check_failed' - ); - // Release the lock token. workflowRun is finalized at this point - // (pre-created or resumed or freshly created) and would otherwise sit - // as pending/running, blocking the path. For pending the 5-min stale - // window would clear it eventually; for a row already promoted to - // running (e.g., resumed), nothing would clear it without manual - // intervention. - await deps.store - .updateWorkflowRun(workflowRun.id, { status: 'cancelled' }) - .catch((cleanupErr: Error) => { - getLog().warn( - { err: cleanupErr, workflowRunId: workflowRun?.id }, - 'workflow.guard_query_failure_cleanup_failed' - ); - }); - await sendCriticalMessage( - platform, - conversationId, - '❌ **Workflow blocked**: Unable to verify if another workflow is running (database error). Please try again in a moment.' - ); - return { success: false, error: 'Database error checking for active workflow' }; } // Resolve external artifact and log directories diff --git a/packages/workflows/src/loader.test.ts b/packages/workflows/src/loader.test.ts index 127b2690b7..219670f6bf 100644 --- a/packages/workflows/src/loader.test.ts +++ b/packages/workflows/src/loader.test.ts @@ -28,7 +28,7 @@ mock.module('@archon/paths', () => ({ createLogger: mock(() => mockLogger), })); -// Bootstrap provider registry (needed by isModelCompatible in dag-node schema) +// Bootstrap provider registry (needed by isRegisteredProvider checks at load time) import { registerBuiltinProviders, clearRegistry } from '@archon/providers'; clearRegistry(); registerBuiltinProviders(); @@ -120,6 +120,110 @@ describe('Workflow Loader', () => { expect(result.workflows[0].workflow.worktree).toBeUndefined(); }); + it('should parse explicit tags array', async () => { + const workflowDir = join(testDir, '.archon', 'workflows'); + await mkdir(workflowDir, { recursive: true }); + const yaml = `name: review-mr\ndescription: GitLab MR review\ntags: [GitLab, Review]\nnodes:\n - id: n\n prompt: p\n`; + await writeFile(join(workflowDir, 'review-mr.yaml'), yaml); + const result = await discoverWorkflows(testDir, { loadDefaults: false }); + expect(result.workflows[0].workflow.tags).toEqual(['GitLab', 'Review']); + }); + + it('should omit tags when not present', async () => { + const workflowDir = join(testDir, '.archon', 'workflows'); + await mkdir(workflowDir, { recursive: true }); + const yaml = `name: test\ndescription: no tags\nnodes:\n - id: n\n prompt: p\n`; + await writeFile(join(workflowDir, 'test.yaml'), yaml); + const result = await discoverWorkflows(testDir, { loadDefaults: false }); + expect(result.workflows[0].workflow.tags).toBeUndefined(); + }); + + it('should preserve explicit empty tags array (suppresses inference)', async () => { + const workflowDir = join(testDir, '.archon', 'workflows'); + await mkdir(workflowDir, { recursive: true }); + const yaml = `name: test\ndescription: no tags wanted\ntags: []\nnodes:\n - id: n\n prompt: p\n`; + await writeFile(join(workflowDir, 'test.yaml'), yaml); + const result = await discoverWorkflows(testDir, { loadDefaults: false }); + expect(result.workflows[0].workflow.tags).toEqual([]); + }); + + it('should trim and dedupe tags', async () => { + const workflowDir = join(testDir, '.archon', 'workflows'); + await mkdir(workflowDir, { recursive: true }); + const yaml = `name: test\ndescription: messy tags\ntags: ["GitLab", "GitLab ", " GitLab ", "Review"]\nnodes:\n - id: n\n prompt: p\n`; + await writeFile(join(workflowDir, 'test.yaml'), yaml); + const result = await discoverWorkflows(testDir, { loadDefaults: false }); + expect(result.workflows[0].workflow.tags).toEqual(['GitLab', 'Review']); + }); + + it('should filter non-string tag entries', async () => { + const workflowDir = join(testDir, '.archon', 'workflows'); + await mkdir(workflowDir, { recursive: true }); + // YAML coerces unquoted scalars: 123 → number, null → null + const yaml = `name: test\ndescription: mixed\ntags:\n - GitLab\n - 123\n - null\n - Review\nnodes:\n - id: n\n prompt: p\n`; + await writeFile(join(workflowDir, 'test.yaml'), yaml); + const result = await discoverWorkflows(testDir, { loadDefaults: false }); + expect(result.workflows[0].workflow.tags).toEqual(['GitLab', 'Review']); + }); + + it('should reduce all-blank tags to empty array (still suppresses inference)', async () => { + const workflowDir = join(testDir, '.archon', 'workflows'); + await mkdir(workflowDir, { recursive: true }); + const yaml = `name: test\ndescription: blanks\ntags: ["", " "]\nnodes:\n - id: n\n prompt: p\n`; + await writeFile(join(workflowDir, 'test.yaml'), yaml); + const result = await discoverWorkflows(testDir, { loadDefaults: false }); + expect(result.workflows[0].workflow.tags).toEqual([]); + }); + + it('should ignore tags when not an array', async () => { + const workflowDir = join(testDir, '.archon', 'workflows'); + await mkdir(workflowDir, { recursive: true }); + // Authoring mistake: scalar instead of list — discarded, workflow still loads + const yaml = `name: test\ndescription: scalar tags\ntags: GitLab\nnodes:\n - id: n\n prompt: p\n`; + await writeFile(join(workflowDir, 'test.yaml'), yaml); + const result = await discoverWorkflows(testDir, { loadDefaults: false }); + expect(result.workflows).toHaveLength(1); + expect(result.workflows[0].workflow.tags).toBeUndefined(); + }); + + it('should parse mutates_checkout: false correctly', async () => { + const workflowDir = join(testDir, '.archon', 'workflows'); + await mkdir(workflowDir, { recursive: true }); + const yaml = `name: test\ndescription: read-only workflow\nmutates_checkout: false\nnodes:\n - id: n\n prompt: p\n`; + await writeFile(join(workflowDir, 'test.yaml'), yaml); + const result = await discoverWorkflows(testDir, { loadDefaults: false }); + expect(result.workflows[0].workflow.mutates_checkout).toBe(false); + }); + + it('should parse mutates_checkout: true correctly', async () => { + const workflowDir = join(testDir, '.archon', 'workflows'); + await mkdir(workflowDir, { recursive: true }); + const yaml = `name: test\ndescription: explicit true\nmutates_checkout: true\nnodes:\n - id: n\n prompt: p\n`; + await writeFile(join(workflowDir, 'test.yaml'), yaml); + const result = await discoverWorkflows(testDir, { loadDefaults: false }); + expect(result.workflows[0].workflow.mutates_checkout).toBe(true); + }); + + it('should omit mutates_checkout when not set', async () => { + const workflowDir = join(testDir, '.archon', 'workflows'); + await mkdir(workflowDir, { recursive: true }); + const yaml = `name: test\ndescription: no field\nnodes:\n - id: n\n prompt: p\n`; + await writeFile(join(workflowDir, 'test.yaml'), yaml); + const result = await discoverWorkflows(testDir, { loadDefaults: false }); + expect(result.workflows[0].workflow.mutates_checkout).toBeUndefined(); + }); + + it('should warn and omit mutates_checkout for invalid value', async () => { + const workflowDir = join(testDir, '.archon', 'workflows'); + await mkdir(workflowDir, { recursive: true }); + // YAML string "yes" is not a boolean — should be dropped and field omitted + const yaml = `name: test\ndescription: typo\nmutates_checkout: "yes"\nnodes:\n - id: n\n prompt: p\n`; + await writeFile(join(workflowDir, 'test.yaml'), yaml); + const result = await discoverWorkflows(testDir, { loadDefaults: false }); + expect(result.workflows).toHaveLength(1); + expect(result.workflows[0].workflow.mutates_checkout).toBeUndefined(); + }); + it('should parse valid DAG workflow YAML', async () => { const workflowDir = join(testDir, '.archon', 'workflows'); await mkdir(workflowDir, { recursive: true }); @@ -222,13 +326,13 @@ nodes: expect(workflows[0].provider).toBeUndefined(); }); - it('should treat invalid provider as undefined (executor handles fallback)', async () => { + it('should reject unknown provider at load time', async () => { const workflowDir = join(testDir, '.archon', 'workflows'); await mkdir(workflowDir, { recursive: true }); const yamlInvalidProvider = `name: invalid-provider description: Invalid provider specified -provider: invalid +provider: claud nodes: - id: test command: test @@ -236,33 +340,37 @@ nodes: await writeFile(join(workflowDir, 'test.yaml'), yamlInvalidProvider); const result = await discoverWorkflows(testDir, { loadDefaults: false }); - const workflows = result.workflows.map(ws => ws.workflow); - // Unknown providers are accepted (validated against registry at execution time) - expect(workflows).toHaveLength(1); - expect(workflows[0].provider).toBe('invalid'); + expect(result.workflows).toHaveLength(0); + expect(result.errors).toHaveLength(1); + expect(result.errors[0].errorType).toBe('validation_error'); + expect(result.errors[0].error).toContain("Unknown provider 'claud'"); }); - it('should reject claude model with codex provider at load time', async () => { + it('should accept any model string with a known provider (SDK validates at run time)', async () => { + // Whatever the user wrote in `model:` passes through to the SDK; the + // SDK is the source of truth for what model strings exist. Errors + // surface at run time, not load time. const workflowDir = join(testDir, '.archon', 'workflows'); await mkdir(workflowDir, { recursive: true }); - const invalidYaml = `name: invalid-model -description: Invalid model/provider pairing -provider: codex -model: sonnet + const yaml = `name: any-model +description: Any model string with a known provider +provider: claude +model: claude-opus-4-7[1m] nodes: - id: test command: test `; - await writeFile(join(workflowDir, 'invalid.yaml'), invalidYaml); + await writeFile(join(workflowDir, 'any-model.yaml'), yaml); const result = await discoverWorkflows(testDir, { loadDefaults: false }); + const workflows = result.workflows.map(ws => ws.workflow); - expect(result.workflows).toHaveLength(0); - expect(result.errors).toHaveLength(1); - expect(result.errors[0].errorType).toBe('validation_error'); - expect(result.errors[0].error).toContain('not compatible'); + expect(result.errors).toHaveLength(0); + expect(workflows).toHaveLength(1); + expect(workflows[0].provider).toBe('claude'); + expect(workflows[0].model).toBe('claude-opus-4-7[1m]'); }); it('should parse codex options fields', async () => { diff --git a/packages/workflows/src/loader.ts b/packages/workflows/src/loader.ts index e4d53bfdc2..3109c680d7 100644 --- a/packages/workflows/src/loader.ts +++ b/packages/workflows/src/loader.ts @@ -4,7 +4,7 @@ import type { WorkflowDefinition, WorkflowLoadError, DagNode, WorkflowNodeHooks } from './schemas'; import { isLoopNode, isApprovalNode, isCancelNode, isScriptNode } from './schemas'; import { createLogger } from '@archon/paths'; -import { isModelCompatible } from './model-validation'; +import { isRegisteredProvider, getRegisteredProviders } from '@archon/providers'; import { dagNodeSchema, BASH_NODE_AI_FIELDS, @@ -61,28 +61,27 @@ function parseDagNode(raw: unknown, index: number, errors: string[]): DagNode | const node = result.data; // Warn about AI-specific fields on non-AI nodes (runtime behavior, not schema errors) - let nodeType: string | undefined; - let aiFields: readonly string[] | undefined; + let nonAiNode: { type: string; fields: readonly string[] } | undefined; if (isCancelNode(node)) { - nodeType = 'cancel'; - aiFields = BASH_NODE_AI_FIELDS; + nonAiNode = { type: 'cancel', fields: BASH_NODE_AI_FIELDS }; } else if (isApprovalNode(node)) { - nodeType = 'approval'; - aiFields = BASH_NODE_AI_FIELDS; + nonAiNode = { type: 'approval', fields: BASH_NODE_AI_FIELDS }; } else if (isLoopNode(node)) { - nodeType = 'loop'; - aiFields = LOOP_NODE_AI_FIELDS; + nonAiNode = { type: 'loop', fields: LOOP_NODE_AI_FIELDS }; } else if (isScriptNode(node)) { - nodeType = 'script'; - aiFields = SCRIPT_NODE_AI_FIELDS; + nonAiNode = { type: 'script', fields: SCRIPT_NODE_AI_FIELDS }; } else if ('bash' in node && typeof node.bash === 'string') { - nodeType = 'bash'; - aiFields = BASH_NODE_AI_FIELDS; + nonAiNode = { type: 'bash', fields: BASH_NODE_AI_FIELDS }; } - if (nodeType !== undefined && aiFields !== undefined) { - const presentAiFields = aiFields.filter(f => (raw as Record)[f] !== undefined); + if (nonAiNode) { + const presentAiFields = nonAiNode.fields.filter( + f => (raw as Record)[f] !== undefined + ); if (presentAiFields.length > 0) { - getLog().warn({ id: node.id, fields: presentAiFields }, `${nodeType}_node_ai_fields_ignored`); + getLog().warn( + { id: node.id, fields: presentAiFields }, + `${nonAiNode.type}_node_ai_fields_ignored` + ); } } @@ -278,17 +277,36 @@ export function parseWorkflow(content: string, filename: string): ParseResult { typeof raw.provider === 'string' && raw.provider.length > 0 ? raw.provider : undefined; const model = typeof raw.model === 'string' ? raw.model : undefined; - // Validate model/provider compatibility at workflow level - if (provider && model && !isModelCompatible(provider, model)) { + // Validate provider identity at load time, both at the workflow level and + // per node. Model strings are NOT validated — they pass through to the SDK + // at run time, which is the source of truth for what model names exist + // (vendor SDKs ship new models faster than Archon can update). + if (provider && !isRegisteredProvider(provider)) { return { workflow: null, error: { filename, - error: `Model "${model}" is not compatible with provider "${provider}"`, + error: `Unknown provider '${provider}'. Registered: ${getRegisteredProviders() + .map(p => p.id) + .join(', ')}`, errorType: 'validation_error', }, }; } + for (const node of dagNodes) { + if (node.provider !== undefined && !isRegisteredProvider(node.provider)) { + return { + workflow: null, + error: { + filename, + error: `Node '${node.id}': unknown provider '${node.provider}'. Registered: ${getRegisteredProviders() + .map(p => p.id) + .join(', ')}`, + errorType: 'validation_error', + }, + }; + } + } // Validate modelReasoningEffort — warn and ignore invalid values (preserve original behavior) const modelReasoningEffortResult = modelReasoningEffortSchema.safeParse( @@ -361,6 +379,40 @@ export function parseWorkflow(content: string, filename: string): ParseResult { } } + // Parse mutates_checkout — boolean, omitted means true (run the path-lock guard). + // Same parse/warn pattern as `interactive` (invalid non-boolean values are dropped). + // When false, the executor skips the path-lock guard and allows concurrent runs on the same checkout. + let mutatesCheckout: boolean | undefined; + if (raw.mutates_checkout !== undefined) { + if (typeof raw.mutates_checkout === 'boolean') { + mutatesCheckout = raw.mutates_checkout; + } else { + getLog().warn( + { filename, value: raw.mutates_checkout }, + 'invalid_mutates_checkout_value_ignored' + ); + } + } + + // Parse optional tags — type-narrow, trim, and dedupe so authors can't + // ship ["GitLab", "GitLab ", "gitlab"] as three distinct values. + // An explicit empty array is preserved (suppresses keyword inference in the + // UI); an absent or invalid block leaves `tags` undefined (falls back to + // inference). Same warn-and-ignore pattern as the worktree block above. + let tags: string[] | undefined; + if (Array.isArray(raw.tags)) { + tags = [ + ...new Set( + raw.tags + .filter((t): t is string => typeof t === 'string') + .map(t => t.trim()) + .filter(t => t.length > 0) + ), + ]; + } else if (raw.tags !== undefined) { + getLog().warn({ filename, value: raw.tags }, 'invalid_tags_block_ignored'); + } + return { workflow: { name: raw.name, @@ -371,8 +423,10 @@ export function parseWorkflow(content: string, filename: string): ParseResult { webSearchMode, additionalDirectories, interactive, + ...(mutatesCheckout !== undefined ? { mutates_checkout: mutatesCheckout } : {}), nodes: dagNodes, ...(worktreePolicy ? { worktree: worktreePolicy } : {}), + ...(tags !== undefined ? { tags } : {}), }, error: null, }; diff --git a/packages/workflows/src/model-validation.test.ts b/packages/workflows/src/model-validation.test.ts deleted file mode 100644 index 2247fd7c05..0000000000 --- a/packages/workflows/src/model-validation.test.ts +++ /dev/null @@ -1,80 +0,0 @@ -import { describe, it, expect, beforeAll } from 'bun:test'; -import { registerBuiltinProviders, clearRegistry } from '@archon/providers'; -import { isModelCompatible, inferProviderFromModel } from './model-validation'; - -// Bootstrap registry once for all tests (idempotent) -beforeAll(() => { - clearRegistry(); - registerBuiltinProviders(); -}); - -describe('model-validation (registry-driven)', () => { - describe('isModelCompatible', () => { - it('should accept any model when model is undefined', () => { - expect(isModelCompatible('claude')).toBe(true); - expect(isModelCompatible('codex')).toBe(true); - }); - - it('should accept Claude models with claude provider', () => { - expect(isModelCompatible('claude', 'sonnet')).toBe(true); - expect(isModelCompatible('claude', 'opus')).toBe(true); - expect(isModelCompatible('claude', 'haiku')).toBe(true); - expect(isModelCompatible('claude', 'inherit')).toBe(true); - expect(isModelCompatible('claude', 'claude-opus-4-6')).toBe(true); - }); - - it('should reject non-Claude models with claude provider', () => { - expect(isModelCompatible('claude', 'gpt-5.3-codex')).toBe(false); - expect(isModelCompatible('claude', 'gpt-4')).toBe(false); - }); - - it('should accept Codex/OpenAI models with codex provider', () => { - expect(isModelCompatible('codex', 'gpt-5.3-codex')).toBe(true); - expect(isModelCompatible('codex', 'gpt-5.2-codex')).toBe(true); - expect(isModelCompatible('codex', 'gpt-4')).toBe(true); - expect(isModelCompatible('codex', 'o1-mini')).toBe(true); - }); - - it('should reject Claude models with codex provider', () => { - expect(isModelCompatible('codex', 'sonnet')).toBe(false); - expect(isModelCompatible('codex', 'opus')).toBe(false); - expect(isModelCompatible('codex', 'claude-opus-4-6')).toBe(false); - }); - - it('should handle empty string model', () => { - // Empty string is falsy, so treated as "no model specified" - expect(isModelCompatible('claude', '')).toBe(true); - expect(isModelCompatible('codex', '')).toBe(true); - }); - - it('should throw on unknown providers (fail-fast)', () => { - expect(() => isModelCompatible('my-llm', 'any-model')).toThrow(/Unknown provider 'my-llm'/); - }); - }); - - describe('inferProviderFromModel', () => { - it('should return default when model is undefined', () => { - expect(inferProviderFromModel(undefined, 'claude')).toBe('claude'); - expect(inferProviderFromModel(undefined, 'codex')).toBe('codex'); - }); - - it('should return default when model is empty string', () => { - expect(inferProviderFromModel('', 'claude')).toBe('claude'); - expect(inferProviderFromModel('', 'codex')).toBe('codex'); - }); - - it('should infer claude from Claude model names', () => { - expect(inferProviderFromModel('sonnet', 'codex')).toBe('claude'); - expect(inferProviderFromModel('opus', 'codex')).toBe('claude'); - expect(inferProviderFromModel('haiku', 'codex')).toBe('claude'); - expect(inferProviderFromModel('inherit', 'codex')).toBe('claude'); - expect(inferProviderFromModel('claude-opus-4-6', 'codex')).toBe('claude'); - }); - - it('should infer codex from non-Claude model names', () => { - expect(inferProviderFromModel('gpt-5.3-codex', 'claude')).toBe('codex'); - expect(inferProviderFromModel('gpt-4', 'claude')).toBe('codex'); - expect(inferProviderFromModel('o1-mini', 'claude')).toBe('codex'); - }); - }); -}); diff --git a/packages/workflows/src/model-validation.ts b/packages/workflows/src/model-validation.ts deleted file mode 100644 index 0140defce5..0000000000 --- a/packages/workflows/src/model-validation.ts +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Registry-driven model validation. - * - * All provider/model compatibility checks delegate to ProviderRegistration entries - * in the provider registry. No hardcoded provider knowledge lives here. - */ -import { getRegistration, getRegisteredProviders, isRegisteredProvider } from '@archon/providers'; - -/** - * Infer provider from a model name by iterating BUILT-IN registrations only. - * Community providers must be selected explicitly via `provider:` in YAML. - * - * Returns undefined if no built-in provider matches (caller falls back to config default). - */ -export function inferProviderFromModel(model: string | undefined, defaultProvider: string): string { - if (!model) return defaultProvider; - - for (const reg of getRegisteredProviders()) { - if (reg.builtIn && reg.isModelCompatible(model)) return reg.id; - } - - // No built-in matched — fall back to default - return defaultProvider; -} - -/** - * Check if a model is compatible with a provider using the registry. - * Returns true if no model is specified (any provider accepts no-model). - * Throws on unknown providers (fail-fast — matches getProviderCapabilities behavior). - */ -export function isModelCompatible(provider: string, model?: string): boolean { - if (!model) return true; - if (!isRegisteredProvider(provider)) { - throw new Error( - `Unknown provider '${provider}'. Registered providers: ${getRegisteredProviders() - .map(p => p.id) - .join(', ')}` - ); - } - return getRegistration(provider).isModelCompatible(model); -} diff --git a/packages/workflows/src/schemas/dag-node.ts b/packages/workflows/src/schemas/dag-node.ts index d41c6270c3..794f14ea78 100644 --- a/packages/workflows/src/schemas/dag-node.ts +++ b/packages/workflows/src/schemas/dag-node.ts @@ -15,7 +15,6 @@ import { stepRetryConfigSchema } from './retry'; import { loopNodeConfigSchema } from './loop'; import { workflowNodeHooksSchema } from './hooks'; import { isValidCommandName } from '../command-validation'; -import { isModelCompatible } from '../model-validation'; // --------------------------------------------------------------------------- // TriggerRule @@ -365,10 +364,13 @@ export const LOOP_NODE_AI_FIELDS: readonly string[] = BASH_NODE_AI_FIELDS.filter * - Non-empty id * - Exactly one of command/prompt/bash/loop (mutual exclusivity) * - command name validity (via isValidCommandName) - * - Model/provider compatibility (via isModelCompatible) * - idle_timeout must be a finite positive number * - retry not allowed on loop nodes * - timeout on bash must be positive + * + * Note: provider identity is validated in loader.ts (workflow-level) and + * dag-executor.ts (node-level). Model strings are passed through to the SDK + * unchanged — the SDK is the source of truth for what model names exist. */ export const dagNodeSchema = dagNodeBaseSchema .extend({ @@ -522,24 +524,6 @@ export const dagNodeSchema = dagNodeBaseSchema path: ['idle_timeout'], }); } - - // Provider/model compatibility (AI nodes only) - if (!hasBash && !hasLoop && !hasScript && data.provider && data.model) { - try { - if (!isModelCompatible(data.provider, data.model)) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - message: `model "${data.model}" is not compatible with provider "${data.provider}"`, - }); - } - } catch (e) { - // isModelCompatible throws on unknown providers — surface as a validation issue - ctx.addIssue({ - code: z.ZodIssueCode.custom, - message: (e as Error).message, - }); - } - } }) .transform((data): DagNode => { const id = data.id.trim(); diff --git a/packages/workflows/src/schemas/workflow.ts b/packages/workflows/src/schemas/workflow.ts index 40771af578..d177a38ef3 100644 --- a/packages/workflows/src/schemas/workflow.ts +++ b/packages/workflows/src/schemas/workflow.ts @@ -68,6 +68,14 @@ export const workflowBaseSchema = z.object({ betas: z.array(z.string().min(1)).nonempty("'betas' must be a non-empty array").optional(), sandbox: sandboxSettingsSchema.optional(), worktree: workflowWorktreePolicySchema.optional(), + /** + * When `false`, the engine skips the path-exclusive lock for this workflow, + * allowing N concurrent runs on the same live checkout. The author asserts + * that concurrent runs will not race (e.g. all writes are per-run-scoped). + * Defaults to `true` (safe: serialize runs on the same path). + */ + mutates_checkout: z.boolean().optional(), + tags: z.array(z.string().min(1)).optional(), }); export type WorkflowBase = z.infer;