diff --git a/.agents/skills/archon/SKILL.md b/.agents/skills/archon/SKILL.md new file mode 100644 index 0000000000..628071bfcd --- /dev/null +++ b/.agents/skills/archon/SKILL.md @@ -0,0 +1,255 @@ +--- +name: archon +description: | + Use when the user wants Codex to run or monitor Archon workflows, initialize + Archon in a repo, create or edit Archon commands/workflows, inspect Archon CLI + behavior, or customize Archon for Codex usage rather than handling the task + directly in the current session. + Triggers: "use archon", "run archon", "archon workflow", "archon assist", + "codex archon assist", "have archon handle this", "use archon codex", + "archon init", "create an archon workflow", "create an archon command", + "archon config", "archon variables", "archon cli". + Also use when the user wants help choosing the Codex-safe Archon workflow or + authoring/customization surface for a task. + NOT for: Direct local implementation when the user wants Codex to do the work here + without handing off to Archon or without using Archon surfaces. +--- + +# Archon For Codex + +Archon runs long-form workflows through its own CLI and workflow engine. In Codex, +this skill exists to route work into the right Archon workflow and to avoid +Claude-specific workflow names or assumptions. + +This skill is intentionally narrower than the full Archon product surface: + +- it is Codex-first +- it covers workflow operation, debugging, and Archon customization +- it does not try to duplicate setup/install or broad platform-adapter docs + +Direct workflow routing comes first. + +- If the task clearly matches a specific Codex-safe workflow, run that workflow. +- Use `archon-assist-codex` only when no narrower Codex-safe workflow fits. +- Do not route guided implementation or interactive review loops through assist + first just to "get into Archon." + +## First Step + +Check the available workflows before suggesting or running one: + +```bash +archon workflow list --json +``` + +If `archon` is unavailable, report that the Archon CLI is not installed or not on +`PATH`. Do not perform setup unless the user explicitly asks. + +## Routing + +Choose the smallest surface that matches the user's need: + +| Intent | Action | +| --- | --- | +| pick or run a Codex-safe workflow | continue in this file | +| monitor an active workflow | read `references/monitoring.md` | +| debug a confusing, failed, or stalled run | read `references/log-debugging.md` | +| relay an interactive workflow cleanly | read `references/interactive-workflows.md` | +| initialize `.archon/` in a repo | read `references/repo-init.md` | +| inspect variable substitution | read `references/variables.md` | +| create or edit Archon commands | read `references/authoring-commands.md` | +| create or edit Archon workflow YAML | read `references/workflow-dag.md` | +| inspect Archon CLI surfaces | read `references/cli-commands.md` | +| inspect or modify Archon config | read `references/configuration.md` | +| inspect Codex vs Claude capability boundaries | read `references/codex-capability-crosswalk.md` | + +## Codex Naming Convention + +Prefer Archon workflows ending in `-codex` when they exist. That suffix indicates +the workflow has been tuned or separated for Codex behavior. + +Known Codex-specific lanes in this repo: + +- `archon-assist-codex` for general Archon help, debugging, exploration, and + one-off questions when no narrower Codex-safe lane fits +- `archon-piv-loop-codex` for guided Plan-Implement-Validate workflows with + Codex + +If the user asks for a general Archon task and a Codex-specific workflow exists, +prefer that workflow over the Claude/default variant. + +If the user explicitly names a Claude-tuned workflow, respect that request but +warn when the workflow includes Claude-only features that Codex ignores. + +## Codex Limitations In Archon + +Archon already warns when a Codex workflow node contains Claude-only features. +Plan around those limits instead of assuming they work: + +- node-level `skills` +- node-level `hooks` +- node-level `mcp` +- node-level `allowed_tools` +- node-level `denied_tools` + +When a workflow relies on those features, prefer a `-codex` workflow if one +exists. Otherwise tell the user the workflow may run with degraded behavior on +Codex. + +## Running Workflows + +Use explicit workflow names whenever possible. + +General Codex assist: + +```bash +archon workflow run archon-assist-codex --branch "" +``` + +Guided Codex PIV: + +```bash +archon workflow run archon-piv-loop-codex --branch "" +``` + +Rules: + +1. Use `--branch` unless the user explicitly wants `--no-worktree`. +2. Use descriptive branch names, for example `assist/codex-readme` or + `piv/codex-auth-refactor`. +3. For substantial implementation work, interactive refinement, or any guided + human-in-the-loop build request, prefer `archon-piv-loop-codex` over + `archon-assist-codex`. +4. For read-only questions or exploration, `--no-worktree` is acceptable. +5. Prefer one Archon workflow per command rather than combining unrelated tasks. +6. Treat Archon workflows as long-running jobs. Keep the run ID, working path, + and current status available for follow-up checks instead of assuming the + launch command alone is the full observability surface. + +## Interactive Operator Protocol + +Use this protocol for interactive workflows such as `archon-piv-loop-codex`. + +### Launch + +1. Run the workflow directly with `archon workflow run ...`. +2. Capture: + - workflow name + - run ID + - working path + - branch name if available +3. Immediately verify the launched run with `archon workflow status --json`. + +### State Machine + +Treat Codex as the human-facing operator for the workflow run until it reaches a +terminal state. + +| Status | Action | +| --- | --- | +| `running` | keep monitoring; report only meaningful changes | +| `paused` | fetch the latest workflow output, relay it directly, wait for the user's answer | +| `completed` | report the terminal result and stop | +| `failed` | report the failure evidence and stop | + +### Post-Transition Rule + +After every `archon workflow run`, `archon workflow approve`, `archon workflow reject`, +or `archon workflow resume`: + +1. check `archon workflow status --json` +2. continue until the run is back at one of: + - `paused` + - `completed` + - `failed` + +Do not stop after recording approval or rejection alone. The control loop is not +done until the workflow either pauses again or reaches a terminal state. + +### Pause Detection Rule + +For interactive loops, treat a new human checkpoint as real only when the run is +currently `paused`. + +Track the paused fingerprint: + +- `approval.nodeId` +- `approval.iteration` +- `approval.message` + +Important nuance: + +- approval metadata can persist while the run is `running` +- do not treat `metadata.approval` by itself as proof that the loop has paused again +- workflow truth comes from the current `status`, not from stale approval metadata + +### Surface Boundaries + +- `archon workflow run ...` is the correct direct CLI surface for interactive workflows +- `archon chat ...` is single-shot orchestration, not a persistent multi-turn workflow chat +- web foreground runs can resume from natural-language replies in the same thread +- CLI `workflow approve` and `workflow reject` auto-resume the run +- `/workflow approve` is a different surface; do not assume it behaves like the CLI command + +## Monitoring + +Start with: + +```bash +archon workflow status --json +``` + +Default live-monitoring cadence: + +- check once shortly after launch to confirm the run exists +- if the user is actively waiting, re-check about every 30 seconds + +Rationale: + +- the web client already has a 15 second fallback poll, but CLI monitoring is + heavier because each check is a full Archon CLI invocation with database + access + +State handling: + +- `running`: keep monitoring, surface only meaningful progress +- `paused`: read the latest workflow output and relay it transparently +- `completed` or `failed`: report the terminal result and stop polling +- `running` with unchanged `last_activity_at` plus no new JSONL activity for 5 + minutes: report a possible stall, not a confirmed failure + +When an interactive workflow pauses, do not summarize the workflow's question. +Read the latest output and pass the user's answer back through the Archon +approval or reject command rather than trying to continue locally. + +When a paused checkpoint is tied to a mutable artifact such as a plan-review +loop, reopen the current artifact from disk before relaying any state summary. +Do not assume a previously read file path or artifact contents are still the +latest truth. + +If the user explicitly wants unattended follow-up and the current Codex surface +supports thread heartbeat automations, attach one to the current thread and have +it report only meaningful changes: approval gates, terminal state changes, or a +possible stall. If automation is unavailable on the current surface, continue +with in-session polling instead. + +Read `references/monitoring.md` for the detailed monitoring contract and +`references/interactive-workflows.md` for the transparent-relay loop. + +## Repo Guidance + +Do not assume Codex auto-loaded `CLAUDE.md` even if a fallback filename is +configured globally. If repo conventions are load-bearing for the delegated task, +read `CLAUDE.md` explicitly before recommending or running the workflow. + +For Archon customization requests, keep the boundary clear: + +- use the shared Archon authoring docs for commands, workflows, variables, and + repo initialization +- use `references/configuration.md` for repo and global Archon config changes +- use `references/codex-capability-crosswalk.md` whenever provider capability + differences are load-bearing +- do not imply that Claude-only per-node controls automatically become Codex + node features +- keep `archon chat` documented as single-shot orchestration rather than a + persistent workflow conversation diff --git a/.agents/skills/archon/agents/openai.yaml b/.agents/skills/archon/agents/openai.yaml new file mode 100644 index 0000000000..aef294ba55 --- /dev/null +++ b/.agents/skills/archon/agents/openai.yaml @@ -0,0 +1,8 @@ +interface: + display_name: "Archon" + short_description: "Run Codex-safe Archon workflows from this repo." + default_prompt: "Use Archon to " + +policy: + allow_implicit_invocation: true + diff --git a/.agents/skills/archon/examples/command-template.md b/.agents/skills/archon/examples/command-template.md new file mode 100644 index 0000000000..035f6c9cd8 --- /dev/null +++ b/.agents/skills/archon/examples/command-template.md @@ -0,0 +1,55 @@ +--- +description: Template for a Codex-safe Archon command +argument-hint: +--- + +# Command Name + +**Workflow ID**: $WORKFLOW_ID + +User request: $ARGUMENTS +Artifacts directory: $ARTIFACTS_DIR +Base branch: $BASE_BRANCH + +## Phase 1: Load + +Gather the context you actually need: + +- read any required files from the repository +- read prior artifacts from `$ARTIFACTS_DIR` if this command depends on earlier steps +- confirm the expected output before making changes + +### Phase 1 Checkpoint + +- [ ] request understood +- [ ] required inputs loaded +- [ ] expected output identified + +## Phase 2: Execute + +Perform the main task of this command. + +Keep the prompt explicit about: + +- what to inspect +- what to change or produce +- how to validate the result + +### Phase 2 Checkpoint + +- [ ] main task completed +- [ ] relevant validation run or intentionally skipped with reason + +## Phase 3: Report + +If downstream nodes need durable output, write it into `$ARTIFACTS_DIR/output.md` +with: + +- what was done +- key findings or decisions +- blockers or follow-up notes + +### Phase 3 Checkpoint + +- [ ] durable output written when needed +- [ ] summary ready for the next step or the user diff --git a/.agents/skills/archon/examples/dag-workflow.yaml b/.agents/skills/archon/examples/dag-workflow.yaml new file mode 100644 index 0000000000..56c013b83e --- /dev/null +++ b/.agents/skills/archon/examples/dag-workflow.yaml @@ -0,0 +1,80 @@ +# Example Codex-safe Archon workflow +# +# Demonstrates: +# - bash node +# - structured output +# - conditional routing +# - command node +# - loop node +# - validation node +# +# This is a reference example. Adjust nodes, dependencies, and outputs to the +# actual task rather than copying it literally. + +name: codex-smart-issue-fix +description: | + Classify a GitHub issue, route to the right investigation path, implement the + change with Codex, and summarize validation results. +provider: codex +model: gpt-5.4 + +nodes: + - id: fetch-issue + bash: | + issue_num=$(echo "$ARGUMENTS" | grep -oE '[0-9]+' | head -1) + if [ -z "$issue_num" ]; then + echo "No issue number found in: $ARGUMENTS" + exit 1 + fi + gh issue view "$issue_num" --json title,body,labels + timeout: 15000 + + - id: classify + prompt: | + Classify this issue as bug or feature and return structured output. + + $fetch-issue.output + depends_on: [fetch-issue] + output_format: + type: object + properties: + issue_type: + type: string + enum: [bug, feature] + title: + type: string + required: [issue_type, title] + + - id: investigate-bug + command: investigate-bug + depends_on: [classify] + when: "$classify.output.issue_type == 'bug'" + + - id: plan-feature + command: plan-feature + depends_on: [classify] + when: "$classify.output.issue_type == 'feature'" + + - id: implement + depends_on: [investigate-bug, plan-feature] + trigger_rule: one_success + loop: + prompt: | + Implement the next required change for: $classify.output.title + + Read the relevant artifacts from $ARTIFACTS_DIR. + When implementation and validation are complete, output: + DONE + until: DONE + max_iterations: 5 + fresh_context: false + until_bash: "bun run test" + + - id: summarize + prompt: | + Summarize the implementation and validation outcome for: + $classify.output.title + + Implementation result: + $implement.output + depends_on: [implement] diff --git a/.agents/skills/archon/references/authoring-commands.md b/.agents/skills/archon/references/authoring-commands.md new file mode 100644 index 0000000000..41bc9ed014 --- /dev/null +++ b/.agents/skills/archon/references/authoring-commands.md @@ -0,0 +1,118 @@ +# Authoring Archon Command Files For Codex + +Command files are Markdown prompt templates. They are shared Archon primitives, +not Claude-only assets. + +## File Location + +```text +.archon/commands/ +├── my-command.md +├── review-code.md +└── defaults/ + └── archon-assist-codex.md +``` + +Commands are referenced by name without the `.md` extension from workflow YAML. + +## File Format + +```markdown +--- +description: One-line description of what this command does +argument-hint: +--- + +# Command Title + +**Workflow ID**: $WORKFLOW_ID + +User request: $ARGUMENTS +Artifacts: $ARTIFACTS_DIR + +## Phase 1: Load + +[Gather the needed context] + +## Phase 2: Execute + +[Do the work] + +## Phase 3: Report + +[Summarize or write artifacts] +``` + +The full file content, including frontmatter, becomes the prompt. + +## Frontmatter Fields + +| Field | Required | Description | +| --- | --- | --- | +| `description` | recommended | Human-readable description used in listings | +| `argument-hint` | optional | Expected argument shape such as `` or `(no arguments)` | + +## Discovery And Priority + +When a workflow references `command: my-command`, Archon resolves in this order: + +1. `.archon/commands/my-command.md` +2. `.archon/commands/defaults/my-command.md` +3. bundled defaults shipped with Archon + +First match wins. + +## Variable Use + +Most common variables: + +- `$ARGUMENTS` +- `$ARTIFACTS_DIR` +- `$WORKFLOW_ID` +- `$BASE_BRANCH` + +See `variables.md` for the full reference. + +## Recommended Structure + +For non-trivial commands, keep the prompt phased: + +1. load context +2. analyze or execute +3. validate if relevant +4. report or write artifacts + +Use short checklists when they materially help the workflow stay deterministic. + +## Artifact Conventions + +If downstream nodes need the result, write it into `$ARTIFACTS_DIR` instead of +leaving it only in free-form assistant output. + +Common patterns: + +- `$ARTIFACTS_DIR/plan.md` +- `$ARTIFACTS_DIR/investigation.md` +- `$ARTIFACTS_DIR/implementation.md` +- `$ARTIFACTS_DIR/validation.md` + +## Authoring Rules For Codex + +- keep commands provider-neutral unless a prompt truly depends on provider + behavior +- do not assume Claude-only node controls such as per-node hooks or skills +- do not hardcode local-only paths when `$ARTIFACTS_DIR` is the intended output + surface +- do not assume prior conversational memory when the calling node uses fresh + context + +## Anti-Patterns + +- vague instructions that do not define expected outputs +- commands that produce no durable artifact when downstream nodes need one +- prompts that assume Claude-specific tools or behavior without saying so +- monolithic prompts with no clear responsibility + +## Example + +See `examples/command-template.md` for a Codex-safe starter template. diff --git a/.agents/skills/archon/references/cli-commands.md b/.agents/skills/archon/references/cli-commands.md new file mode 100644 index 0000000000..28d0e91ada --- /dev/null +++ b/.agents/skills/archon/references/cli-commands.md @@ -0,0 +1,158 @@ +# Archon CLI Command Reference For Codex + +Use this when the user wants the real Archon CLI surface rather than a skill +summary. + +All commands except `version` and `chat` are normally run from within a git +repository. + +## Workflow Commands + +### `archon workflow list` + +```bash +archon workflow list +archon workflow list --json +``` + +Use this first when choosing a Codex-safe workflow. + +### `archon workflow run [message]` + +Examples: + +```bash +archon workflow run archon-assist-codex --branch assist/codex-readme "Explain the current workflow surface" +archon workflow run archon-piv-loop-codex --branch piv/codex-auth "Implement auth from the approved plan" +archon workflow run my-workflow --branch feat/dark-mode "Add dark mode" +archon workflow run quick-fix --no-worktree "Fix the typo in README" +archon workflow run my-workflow --resume +``` + +Key flags: + +| Flag | Description | +| --- | --- | +| `--branch ` | create or reuse a worktree branch | +| `--from ` | choose the base branch for a new worktree | +| `--no-worktree` | run in the live checkout | +| `--resume` | resume the last failed run | +| `--cwd ` | override working directory | + +Important: + +- default behavior creates an isolated worktree automatically +- `--branch` and `--no-worktree` conflict +- `--resume` and `--branch` conflict + +### `archon workflow status` + +```bash +archon workflow status +archon workflow status --verbose +archon workflow status --json +``` + +Use `--json` as the source of truth for current workflow state. + +### `archon workflow approve` + +```bash +archon workflow approve "" +``` + +Use for paused workflows that need human feedback. The CLI approve path records +the response and resumes the run. + +### `archon workflow reject` + +```bash +archon workflow reject "" +``` + +Use for paused workflows that need rejection or rework feedback. + +### `archon workflow resume` + +```bash +archon workflow resume +``` + +Use when the run failed and should be resumed from its failure point. + +## Validation Commands + +### `archon validate workflows [name]` + +```bash +archon validate workflows +archon validate workflows my-workflow +archon validate workflows my-workflow --json +``` + +This checks workflow syntax, dependency structure, resource resolution, and +provider-compatibility warnings. + +### `archon validate commands [name]` + +```bash +archon validate commands +archon validate commands my-command +``` + +Use after creating or editing command files. + +## Isolation Commands + +### `archon isolation list` + +```bash +archon isolation list +``` + +Shows active worktree environments. + +### `archon isolation cleanup` + +```bash +archon isolation cleanup +archon isolation cleanup 14 +archon isolation cleanup --merged +``` + +## Other Commands + +### `archon complete ` + +```bash +archon complete feature-auth +archon complete feature-auth --force +``` + +Completes a branch lifecycle by removing the worktree and branch state. + +### `archon version` + +```bash +archon version +``` + +### `archon chat ` + +```bash +archon chat "What workflows are available?" +``` + +Important: + +- `archon chat` is single-shot orchestration +- it is not a persistent multi-turn workflow conversation +- interactive workflow control should stay on `archon workflow run/status/approve/reject` + +## Useful Environment Variables + +| Variable | Purpose | +| --- | --- | +| `ARCHON_HOME` | override Archon home directory | +| `LOG_LEVEL` | control Archon process log verbosity | +| `DATABASE_URL` | use PostgreSQL instead of SQLite | diff --git a/.agents/skills/archon/references/codex-capability-crosswalk.md b/.agents/skills/archon/references/codex-capability-crosswalk.md new file mode 100644 index 0000000000..490e770aa5 --- /dev/null +++ b/.agents/skills/archon/references/codex-capability-crosswalk.md @@ -0,0 +1,137 @@ +# Codex Capability Crosswalk For Archon + +Use this document when the question is not "how do I write workflow YAML?" but +"what actually survives the Claude-to-Codex translation?" + +This is a capability crosswalk, not a feature-sales guide. Treat it as the +truth table for Codex-safe Archon authoring. + +## Crosswalk + +| Feature | Claude in workflow YAML | Codex in workflow YAML | Codex real surface | Meaning | +| --- | --- | --- | --- | --- | +| `provider` | supported | supported | workflow or node field | real parity | +| `model` | supported | supported | workflow or node field | real parity, including loop node provider/model overrides | +| `output_format` | supported | supported | workflow YAML | real parity with structured-output caveats | +| `retry` | supported | supported | workflow YAML | real parity except loop-node retry stays invalid | +| `hooks` | supported per-node | ignored | none | no parity | +| `mcp` | supported per-node | ignored per-node | Codex global config | global-only, not equivalent | +| `skills` | supported per-node | ignored per-node | global or repo `.agents/skills/` | global/repo discovery, not equivalent | +| `allowed_tools` | supported per-node | ignored | Codex config / MCP config | global-only, not equivalent | +| `denied_tools` | supported per-node | ignored | Codex config / MCP config | global-only, not equivalent | +| `modelReasoningEffort` | not the same field | supported for Codex | workflow YAML or Archon config | workflow-level override with config fallback | +| `webSearchMode` | not the same field | supported for Codex | workflow YAML or Archon config | workflow-level override with config fallback | +| `additionalDirectories` | not the same field | supported for Codex | workflow YAML or Archon config | workflow-level override with config fallback | + +## Feature Notes + +### `provider` and `model` + +These are real workflow controls for Codex. + +- node-level `provider` and `model` overrides are honored for normal nodes +- loop nodes also resolve and pass node-level `provider` and `model` overrides + into loop execution + +That means the parity boundary is not "loops ignore model overrides." The real +boundary is in Claude-only controls such as hooks, per-node MCP, per-node +skills, and node-level tool restrictions. + +### `output_format` + +This is real Codex parity, not a degraded fallback. + +Archon maps workflow `output_format:` to the Codex client's structured-output +path. Downstream field references such as `$node.output.field` are valid when +Codex returns structured output as expected. + +Operational nuance: + +- if Codex returns non-JSON output, Archon warns that field-based downstream + conditions may misbehave +- this is still a supported feature, but not a promise that every prompt will + always produce clean structured output + +### `retry` + +This remains a shared workflow feature for command, prompt, and bash nodes. +Loop-node retry is still not valid. + +### `modelReasoningEffort`, `webSearchMode`, and `additionalDirectories` + +These are real workflow-level Codex tuning fields. + +- if the workflow sets them, execution uses the workflow value +- if the workflow omits them, execution falls back to + `config.assistants.codex.*` +- they remain workflow-level controls, not node-level controls + +Current precedence: + +1. workflow YAML +2. `config.assistants.codex.*` +3. SDK defaults + +Archon config still matters as the default source when the workflow does not set +these fields: + +- `assistants.codex.modelReasoningEffort` +- `assistants.codex.webSearchMode` +- `assistants.codex.additionalDirectories` + +### `hooks` + +There is no Codex node-level equivalent in Archon. If a workflow depends on +hooks for guardrails or tool interception, do not call it Codex-safe. + +### `mcp` + +Important distinction: + +- Claude: `mcp:` is a node-level workflow surface +- Codex: Archon ignores `mcp:` on a node + +For Codex, MCP belongs in Codex configuration rather than workflow YAML. That +means the workflow cannot assume one node has one MCP setup and another node has +a different one in the same fine-grained way. + +### `skills` + +Important distinction: + +- Claude: `skills:` is a node-level workflow surface +- Codex: Archon ignores `skills:` on a node + +Codex skill discovery is global or repo-local, not a workflow node isolation +mechanism. + +### `allowed_tools` and `denied_tools` + +These are Claude node-level controls. Archon warns and ignores them on Codex. + +If the desired effect is tool restriction for Codex, that belongs in Codex's +own configuration surface, not in Archon workflow YAML as a per-node contract. + +## Codex Global Surfaces That Are Real + +These are real Codex-side configuration surfaces even though they are not node +parity features: + +- global or repo skill discovery under `.agents/skills/` +- Codex MCP configuration in Codex config +- Codex assistant defaults in Archon config: + - `assistants.codex.model` + - `assistants.codex.modelReasoningEffort` + - `assistants.codex.webSearchMode` + - `assistants.codex.additionalDirectories` + - `assistants.codex.codexBinaryPath` + +## Authoring Rules + +1. If a workflow depends on per-node hooks, per-node MCP, per-node skills, or + per-node tool restrictions, do not present it as Codex-safe. +2. If Codex has a global-only equivalent, document that boundary explicitly. +3. Prefer a dedicated `-codex` workflow when the original workflow depends on + Claude-oriented node controls. +4. Treat this crosswalk as code-backed contract, not as an aspirational parity + promise. diff --git a/.agents/skills/archon/references/configuration.md b/.agents/skills/archon/references/configuration.md new file mode 100644 index 0000000000..c1cb594fd3 --- /dev/null +++ b/.agents/skills/archon/references/configuration.md @@ -0,0 +1,200 @@ +# Archon Configuration Guide For Codex + +Use this guide when the user wants to view, explain, create, or modify Archon +configuration for Codex-first usage. + +## Configuration Levels + +Archon has two config levels: + +- global config: `~/.archon/config.yaml` +- repo config: `/.archon/config.yaml` + +Precedence is: + +1. environment variables +2. repo config +3. global config +4. built-in defaults + +## When To Use Which Level + +Use repo config when the change is project-specific: + +- prefer Codex only in one repo +- set one repo's base branch +- set one repo's `copyFiles` +- disable bundled defaults for one repo +- add repo-specific Codex defaults + +Use workflow YAML when one Codex workflow should run with its own tuning: + +- set workflow-specific `modelReasoningEffort` +- set workflow-specific `webSearchMode` +- set workflow-specific `additionalDirectories` + +Use global config when the change is user-wide: + +- prefer Codex by default across repos +- set default Codex model or reasoning effort +- set global Codex `webSearchMode` +- set global `additionalDirectories` +- set global streaming or bot preferences + +## Reading Current Config + +Global config: + +```bash +sed -n '1,220p' ~/.archon/config.yaml +``` + +Repo config: + +```bash +sed -n '1,220p' .archon/config.yaml +``` + +If a file does not exist: + +- global config is auto-created on first Archon run +- repo config is optional; Archon falls back to defaults + +## Most Important Codex Settings + +### Global config example + +```yaml +defaultAssistant: codex + +assistants: + codex: + model: gpt-5.4 + modelReasoningEffort: medium + webSearchMode: live + additionalDirectories: + - /absolute/path/to/other/repo + codexBinaryPath: /absolute/path/to/codex +``` + +### Repo config example + +```yaml +assistant: codex + +assistants: + codex: + model: gpt-5.4 + modelReasoningEffort: high + webSearchMode: live + +worktree: + baseBranch: main + copyFiles: + - .env + - .env.local + +commands: + folder: .archon/commands + +defaults: + loadDefaultCommands: true + loadDefaultWorkflows: true +``` + +## Key Fields + +### Global config fields + +| Field | Meaning | +| --- | --- | +| `defaultAssistant` | default assistant when a repo does not override it | +| `assistants.codex.model` | default Codex model | +| `assistants.codex.modelReasoningEffort` | default Codex reasoning effort | +| `assistants.codex.webSearchMode` | default Codex web search mode | +| `assistants.codex.additionalDirectories` | extra writable directories for Codex sessions | +| `assistants.codex.codexBinaryPath` | explicit Codex CLI path, mainly relevant in compiled Archon builds | +| `botName` | bot display name | +| `streaming.*` | platform response mode | +| `concurrency.maxConversations` | max parallel conversations | + +### Repo config fields + +| Field | Meaning | +| --- | --- | +| `assistant` | repo-level assistant override | +| `assistants.codex.*` | repo-level Codex defaults | +| `commands.folder` | extra command folder search path | +| `worktree.baseBranch` | base branch used for worktree creation and `$BASE_BRANCH` | +| `worktree.copyFiles` | ignored files copied into new worktrees | +| `defaults.loadDefaultCommands` | whether bundled commands are loaded at runtime | +| `defaults.loadDefaultWorkflows` | whether bundled workflows are loaded at runtime | +| `docs.path` | repo docs path used by workflow surfaces that care about docs | +| `env` | per-project env vars merged into workflow execution config; most relevant when a workflow surface consumes `config.envVars` | + +## Workflow-Level Codex Overrides + +For Codex, these workflow YAML fields override Archon config for that workflow: + +- `modelReasoningEffort` +- `webSearchMode` +- `additionalDirectories` + +Runtime precedence for those fields is: + +1. workflow YAML +2. `assistants.codex.*` in Archon config +3. SDK defaults + +That means: + +- use Archon config for shared defaults across many Codex workflows +- use workflow YAML when one workflow needs a different execution profile +- do not expect node-level versions of those fields + +## Editing Guidance + +When modifying config: + +- preserve unrelated keys +- keep repo config focused on non-default behavior +- use repo config for project-specific overrides instead of widening the global + config unnecessarily +- do not confuse `assistant` with `defaultAssistant` + - `assistant` is repo-level + - `defaultAssistant` is global + +## Environment Variable Overrides + +These override config files: + +| Env Var | Overrides | +| --- | --- | +| `DEFAULT_AI_ASSISTANT` | assistant preference | +| `BOT_DISPLAY_NAME` | `botName` | +| `TELEGRAM_STREAMING_MODE` | `streaming.telegram` | +| `DISCORD_STREAMING_MODE` | `streaming.discord` | +| `SLACK_STREAMING_MODE` | `streaming.slack` | +| `MAX_CONCURRENT_CONVERSATIONS` | `concurrency.maxConversations` | +| `ARCHON_HOME` | Archon base path | + +## Operational Notes For Codex + +- Codex MCP configuration is not controlled by Archon workflow `mcp:` node + fields +- Codex skill discovery is not controlled by Archon workflow `skills:` node + fields +- if the user wants those behaviors, route them through Codex config and the + Codex capability crosswalk instead of pretending repo config creates node + parity + +## Validation + +After a config change, verify the expected behavior with readback: + +```bash +archon workflow list --json +``` + +For repo-specific changes, read the effective repo config again and confirm the +intended keys are present with the expected values. diff --git a/.agents/skills/archon/references/interactive-workflows.md b/.agents/skills/archon/references/interactive-workflows.md new file mode 100644 index 0000000000..3d0c97b3a1 --- /dev/null +++ b/.agents/skills/archon/references/interactive-workflows.md @@ -0,0 +1,100 @@ +# Interactive Workflows For Codex + +Use this guide when the workflow is interactive and the user is effectively +talking to the workflow through Codex. + +Interactive workflows in this repo include: + +- `archon-piv-loop-codex` +- `archon-interactive-prd` + +## Core Rule + +Be a transparent relay. + +- show the workflow's latest question or summary directly +- do not rewrite or "improve" the workflow's wording +- pass the user's answer back as directly as possible +- keep operating the run until it pauses again or reaches a terminal state + +## Basic Loop + +1. Launch the workflow and capture: + - run ID + - workflow name + - working path +2. Verify the launched run with `archon workflow status --json`. +3. When the run becomes `paused`, read the latest workflow output. +4. Relay that output directly to the user. +5. When the user answers, resume with `archon workflow approve` or + `archon workflow reject`. +6. Immediately re-check `archon workflow status --json`. +7. Repeat until the run reaches `paused`, `completed`, or `failed`. + +## Commands + +```bash +archon workflow status --json +archon workflow approve "" +archon workflow reject "" +``` + +## When Paused + +When the workflow is paused: + +- read the latest assistant output from the run log +- show it directly +- wait for the user +- pass their response through verbatim unless a safety or formatting issue + requires intervention + +Treat the paused fingerprint as: + +- `approval.nodeId` +- `approval.iteration` +- `approval.message` + +If the workflow pauses again with a new fingerprint, that is a new human +checkpoint even if the wording looks similar. + +Do not replace the workflow's structured questions with your own summary. + +If the paused node is reviewing a mutable artifact, reopen the current artifact +from disk before you speak for the workflow. For example, a plan-review pause +should use the latest saved plan rather than a stale earlier read. + +## When Still Running + +Long research or implementation nodes can stay `running` for a while without +needing user input. + +- keep checking status on the monitoring cadence +- do not treat "still running" by itself as a problem +- if activity stops for the stall window, flag a possible stall and say what + evidence stopped moving + +Important nuance: + +- interactive-loop approval metadata can remain present while the run is + `running` +- that does not mean the workflow is paused again +- only treat the loop as back when the run status itself is `paused` + +## Where To Read The Latest Output + +Use the per-run JSONL when status alone is not enough: + +```bash +find "${ARCHON_HOME:-$HOME/.archon}/workspaces" -name ".jsonl" 2>/dev/null +tail -n 40 "" +``` + +Read `log-debugging.md` when you need the full trace. + +## Surface Boundaries + +- `archon workflow run ...` is the direct CLI surface for this interaction model +- `archon chat ...` is not a persistent multi-turn workflow conversation +- web foreground workflows can resume from natural-language replies in the same thread +- CLI `workflow approve` and `workflow reject` resume immediately after recording the decision diff --git a/.agents/skills/archon/references/log-debugging.md b/.agents/skills/archon/references/log-debugging.md new file mode 100644 index 0000000000..95c2cf5a07 --- /dev/null +++ b/.agents/skills/archon/references/log-debugging.md @@ -0,0 +1,154 @@ +# Archon Log Debugging For Codex + +Use this guide when the main job is understanding what Archon just did during a +workflow run, why it failed, why it paused, or where the useful evidence lives. + +## Three Evidence Layers + +Archon exposes overlapping but non-interchangeable evidence surfaces. + +### 1. Status and run details + +Use this first for the current high-level truth. + +- `archon workflow status --json` +- `archon workflow status --verbose` +- web or API run details when available + +Best for: + +- run ID +- current status +- `last_activity_at` +- working path +- approval context + +### 2. Per-run workflow JSONL + +Use this when status is ambiguous or when you need the full workflow trace. + +Default location: + +```text +${ARCHON_HOME:-$HOME/.archon}/workspaces///logs/.jsonl +``` + +Best for: + +- assistant output +- tool calls +- node boundaries +- validation events +- workflow pause or failure context + +### 3. Runtime process logs + +Use this when the issue looks like Archon runtime behavior rather than workflow +logic. + +Examples: + +```bash +LOG_LEVEL=debug archon workflow status --json +LOG_LEVEL=debug archon workflow run "" +``` + +Best for: + +- database errors +- config loading failures +- adapter or API problems +- unexpected process behavior + +## Triage Order + +1. `archon workflow status --json` +2. `archon workflow status --verbose` or the web/API run details +3. per-run JSONL +4. runtime logs with `LOG_LEVEL=debug` + +## Finding The Run + +For active runs: + +```bash +archon workflow status +archon workflow status --verbose +archon workflow status --json +``` + +If you already know the run ID: + +```bash +find "${ARCHON_HOME:-$HOME/.archon}/workspaces" -name ".jsonl" 2>/dev/null +``` + +## Reading The JSONL + +Set a shell variable first: + +```bash +LOG_FILE="${ARCHON_HOME:-$HOME/.archon}/workspaces///logs/.jsonl" +``` + +Common reads: + +```bash +tail -n 40 "$LOG_FILE" +rg '"type":"workflow_error"|"type":"node_error"' "$LOG_FILE" +rg '"type":"assistant"' "$LOG_FILE" | tail -n 5 +rg '"type":"validation"' "$LOG_FILE" +``` + +## Common Event Families + +Representative JSONL event types include: + +- `workflow_start` +- `workflow_complete` +- `workflow_error` +- `assistant` +- `tool` +- `validation` +- `node_start` +- `node_complete` +- `node_skipped` +- `node_error` + +Use them as breadcrumbs rather than assuming the UI event names will match +exactly. + +## Filtering Patterns + +Assistant messages: + +```bash +rg '"type":"assistant"' "$LOG_FILE" +``` + +Tool calls: + +```bash +rg '"type":"tool"' "$LOG_FILE" +``` + +Skipped nodes: + +```bash +rg '"type":"node_skipped"' "$LOG_FILE" +``` + +If `jq` is available: + +```bash +jq -r 'select(.type=="assistant") | .content' "$LOG_FILE" | tail -n 1 +jq -c 'select(.type=="node_error") | {ts, step, error}' "$LOG_FILE" +``` + +## Interpretation Rules + +- status and UI/API surfaces are intentionally lean +- the JSONL trace is the authoritative assistant and tool history for one run +- current pause state should still come from `archon workflow status --json` +- use runtime logs only when the issue looks like Archon itself rather than a + workflow node decision diff --git a/.agents/skills/archon/references/monitoring.md b/.agents/skills/archon/references/monitoring.md new file mode 100644 index 0000000000..f6110f9695 --- /dev/null +++ b/.agents/skills/archon/references/monitoring.md @@ -0,0 +1,129 @@ +# Monitoring Archon Workflows From Codex + +Use this guide when the user wants ongoing updates about a live Archon workflow. + +## First Check + +Start with: + +```bash +archon workflow status --json +``` + +This command currently includes `last_activity_at`, which makes it usable as the +first stall-detection surface. + +Treat this command as the source of truth for current workflow state. Do not +infer current pause state from old terminal output alone. + +## Default Cadence + +Use this cadence during active live monitoring: + +- first check shortly after launch +- then about every 30 seconds while the user is actively waiting + +Why not every 15 seconds? + +- the web app already has a 15 second client-side fallback poll +- CLI monitoring is heavier because each check is a full Archon CLI invocation + with database access + +If the user is not actively waiting, reduce noise and check less often. + +## Evidence Order + +1. `archon workflow status --json` +2. web or API run details if available +3. per-run JSONL when status is ambiguous, paused, failed, or possibly stalled +4. runtime logs only when the problem looks like Archon itself + +## Progress States + +### Healthy running + +Report only meaningful changes such as: + +- current workflow changed +- status changed +- a new approval gate appeared +- artifacts appeared +- the run clearly moved to a new phase or node family + +Do not spam the user with identical "still running" updates. + +### Paused + +Treat `paused` as action-required. + +- open the latest workflow output +- relay it directly +- wait for the user response + +Track the paused fingerprint: + +- `approval.nodeId` +- `approval.iteration` +- `approval.message` + +If any of those change on a later `paused` check, the loop has returned with a +new checkpoint. + +Important nuance: + +- interactive-loop approval metadata can persist after approval while the run is + back in `running` +- do not treat persisted `metadata.approval` as proof of a fresh pause +- current `status` wins + +### After approve or reject + +After every approval, rejection, or manual resume: + +1. re-run `archon workflow status --json` +2. continue checking until the run reaches: + - `paused` + - `completed` + - `failed` + +Recording approval is not the end of the operator loop. The next required state +change must be observed. + +### Possible stall + +Default heuristic: + +- run status is still `running` +- `last_activity_at` has not advanced for at least 5 minutes +- the JSONL tail shows no new assistant, tool, or node activity in the same + 5 minute window + +Report this as a possible stall, not a confirmed failure. + +### Terminal + +When the run becomes `completed`, `failed`, or `cancelled`: + +- report the terminal status +- include the most relevant evidence +- stop polling + +## Optional Heartbeat Automation + +If the user explicitly wants unattended follow-up and the current Codex surface +supports thread heartbeat automations, prefer a thread-attached heartbeat that: + +- watches a specific run ID +- posts only on meaningful change +- flags a possible stall using the heuristic above +- stops once the run reaches a terminal state + +Suggested reporting triggers: + +- status transition +- approval gate reached +- terminal result +- possible stall + +If heartbeat automation is unavailable on the current Codex surface, keep the +monitoring in-session instead of pretending the automation exists. diff --git a/.agents/skills/archon/references/repo-init.md b/.agents/skills/archon/references/repo-init.md new file mode 100644 index 0000000000..081d17b392 --- /dev/null +++ b/.agents/skills/archon/references/repo-init.md @@ -0,0 +1,90 @@ +# Initializing Archon In A Repository For Codex + +Use this when the task is to add `.archon/` to a repository so Codex can create +or customize Archon commands and workflows there. + +## Directory Structure + +Create this in the repository root: + +```text +.archon/ +├── commands/ +├── workflows/ +├── mcp/ # optional; mainly relevant for Claude node-local MCP config +└── config.yaml # optional +``` + +Minimum setup: + +```bash +mkdir -p .archon/commands .archon/workflows +``` + +## Minimal Repo Config + +Create `.archon/config.yaml` only when the repo needs non-default behavior: + +```yaml +assistant: codex + +worktree: + baseBranch: main + copyFiles: + - .env + - .env.local + +defaults: + loadDefaultCommands: true + loadDefaultWorkflows: true +``` + +Notes: + +- `assistant: codex` makes this repo prefer Codex under Archon +- `worktree.copyFiles` is only needed when worktrees need copied local files +- bundled defaults do not need to be copied into the repo to be available + +## Bundled Default Behavior + +Archon ships bundled workflows and commands. Repo-local files override bundled +files with the same name. + +- `archon workflow list` shows discovered workflows +- repo `.archon/workflows/*` overrides bundled workflows with the same name +- repo `.archon/commands/*` overrides bundled commands with the same name + +## Optional MCP Directory + +Keep `.archon/mcp/` optional in Codex-first guidance. + +Why: + +- Archon supports node-local `mcp:` for Claude workflows +- Codex does not use `mcp:` as a node-local parity surface +- Codex MCP configuration belongs in Codex config rather than in workflow YAML + +## Global Config Reminder + +Global config lives at `~/.archon/config.yaml`. + +If the goal is a Codex-first Archon environment more broadly, that file can use: + +```yaml +defaultAssistant: codex +assistants: + codex: + model: gpt-5.4 + modelReasoningEffort: medium + webSearchMode: live +``` + +## Verification + +After initialization: + +```bash +archon workflow list --json +``` + +The repo should now expose bundled workflows plus any repo-local custom ones. diff --git a/.agents/skills/archon/references/variables.md b/.agents/skills/archon/references/variables.md new file mode 100644 index 0000000000..f7e8d737b4 --- /dev/null +++ b/.agents/skills/archon/references/variables.md @@ -0,0 +1,70 @@ +# Variable Substitution Reference For Codex + +Variables are placeholders in command files and workflow prompts. Archon +replaces them at execution time. + +## Variable Table + +| Variable | Scope | Description | +| --- | --- | --- | +| `$ARGUMENTS` | all modes | The original user message passed to the workflow | +| `$USER_MESSAGE` | all modes | Alias for `$ARGUMENTS` | +| `$WORKFLOW_ID` | all modes | Unique workflow run ID | +| `$ARTIFACTS_DIR` | all modes | Pre-created artifact directory for the current run | +| `$BASE_BRANCH` | all modes | Base branch name, auto-detected or configured via `worktree.baseBranch` | +| `$DOCS_DIR` | all modes | Repo docs directory, from `docs.path` or default `docs/` | +| `$CONTEXT` | all modes | GitHub issue or PR context when the platform provides it | +| `$EXTERNAL_CONTEXT` | all modes | Alias for `$CONTEXT` | +| `$ISSUE_CONTEXT` | all modes | Alias for `$CONTEXT` | +| `$LOOP_USER_INPUT` | interactive loop resumes | User feedback injected on the first resumed iteration, empty otherwise | +| `$REJECTION_REASON` | approval `on_reject` prompts | Reviewer feedback captured when an approval node rejects and re-prompts | +| `$nodeId.output` | DAG only | Full output from a completed upstream node | +| `$nodeId.output.field` | DAG only | JSON field access on structured output from an upstream node | + +## Where Variables Are Substituted + +- command files in `.archon/commands/*.md` +- inline `prompt:` fields +- `loop.prompt:` fields +- approval `on_reject.prompt` fields +- `bash:` scripts in DAG nodes + +In `bash:` nodes, `$nodeId.output` values are automatically shell-quoted before +injection. + +## Substitution Order + +1. standard workflow variables such as `$WORKFLOW_ID`, `$ARGUMENTS`, + `$ARTIFACTS_DIR`, `$BASE_BRANCH`, `$DOCS_DIR`, and `$CONTEXT` +2. node output references such as `$nodeId.output` and `$nodeId.output.field` + +## Structured Output Notes + +`$nodeId.output.field` only works when the upstream node produced structured +output through `output_format:`. + +For Codex, `output_format:` is a real supported workflow surface. It maps to the +Codex client's structured-output path rather than being a Claude-only feature. + +## Context Auto-Append + +If a prompt template does not mention `$CONTEXT`, `$EXTERNAL_CONTEXT`, or +`$ISSUE_CONTEXT` anywhere but Archon has external context available, Archon may +append that context automatically after a separator. + +## Literal Dollar Signs + +Use `\\$` to produce a literal `$` without substitution. + +## Unknown References + +Unknown node references resolve to an empty string with a warning in the logs. +Do not depend on missing-node references as control flow. + +## Interactive Workflow Notes + +- `$LOOP_USER_INPUT` is only populated when an interactive loop resumes after an + approval round-trip +- `$REJECTION_REASON` is only populated for an approval node's `on_reject` + branch +- outside those contexts, both variables resolve to an empty string diff --git a/.agents/skills/archon/references/workflow-dag.md b/.agents/skills/archon/references/workflow-dag.md new file mode 100644 index 0000000000..3035a55be4 --- /dev/null +++ b/.agents/skills/archon/references/workflow-dag.md @@ -0,0 +1,230 @@ +# Workflow Authoring For Codex + +Archon workflows use a DAG format: nodes with explicit dependencies. This is a +shared Archon surface for both Claude and Codex, but some node fields behave +differently by provider. + +## Schema + +```yaml +name: my-workflow +description: What this workflow does + +provider: codex # optional; 'claude' or 'codex' +model: gpt-5.4 # optional model override + +nodes: + - id: first-node + command: my-command + - id: second-node + prompt: "Use the output: $first-node.output" + depends_on: [first-node] +``` + +## Seven Node Types + +Each node must define exactly one of: + +- `command` +- `prompt` +- `bash` +- `script` +- `loop` +- `approval` +- `cancel` + +### Command Node + +```yaml +- id: investigate + command: investigate-issue +``` + +### Prompt Node + +```yaml +- id: classify + prompt: "Classify this issue: $ARGUMENTS" +``` + +### Bash Node + +```yaml +- id: fetch-data + bash: "gh issue view 42 --json title,body" + timeout: 15000 +``` + +### Script Node + +```yaml +- id: summarize + script: scripts/summarize_issue.py + runtime: uv + deps: + - pyyaml +``` + +### Loop Node + +```yaml +- id: implement + loop: + prompt: "Implement the next task. When complete: DONE" + until: DONE + max_iterations: 10 + fresh_context: true + until_bash: "bun run test" +``` + +### Approval Node + +```yaml +- id: approve-plan + approval: + message: "Approve the plan draft?" + capture_response: true + on_reject: + prompt: "Revise the plan using this feedback: $REJECTION_REASON" +``` + +### Cancel Node + +```yaml +- id: stop-run + cancel: "Human rejected the proposal" +``` + +## Shared Node Fields + +| Field | Description | +| --- | --- | +| `id` | unique node identifier | +| `depends_on` | upstream node IDs | +| `when` | condition expression | +| `trigger_rule` | join semantics for dependencies | +| `context` | `fresh` or `shared` assistant-session behavior | +| `idle_timeout` | per-node or per-iteration idle timeout | + +## Provider-Aware Node Fields + +These fields are shared and meaningful for Codex: + +| Field | Codex status | Notes | +| --- | --- | --- | +| `provider` | supported | workflow-level or node-level | +| `model` | supported | workflow-level and node-level, including loop nodes | +| `output_format` | supported | structured output works on Codex | +| `retry` | supported except loop nodes | loop-node retry is still a hard error | + +These fields are not Codex per-node parity features: + +| Field | Codex status | Notes | +| --- | --- | --- | +| `hooks` | ignored | Claude-only node control | +| `mcp` | ignored per-node | Codex MCP is global, not node-local | +| `skills` | ignored per-node | Codex skill discovery is global or repo-level | +| `allowed_tools` | ignored | Claude-only node control | +| `denied_tools` | ignored | Claude-only node control | + +## Workflow-Level Codex Fields + +These are workflow-level controls, not node-level controls: + +| Field | Codex status | Notes | +| --- | --- | --- | +| `interactive` | supported | workflow-level switch for approval delivery and interactive loop behavior | +| `modelReasoningEffort` | supported | workflow-level override, with `assistants.codex.modelReasoningEffort` as fallback | +| `webSearchMode` | supported | workflow-level override, with `assistants.codex.webSearchMode` as fallback | +| `additionalDirectories` | supported | workflow-level override, with `assistants.codex.additionalDirectories` as fallback | + +Precedence for these workflow-level Codex tuning fields is: + +1. workflow YAML +2. `assistants.codex.*` from Archon config +3. SDK defaults + +## Conditions + +Use `when:` for simple routing: + +```yaml +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output.issue_type == 'bug'" +``` + +Supported operators in workflow conditions remain the same regardless of +provider. + +## Structured Output + +Structured output is a real Codex-safe feature: + +```yaml +- id: classify + prompt: "Classify this issue" + output_format: + type: object + properties: + issue_type: + type: string + enum: [bug, feature] + required: [issue_type] +``` + +This enables downstream references such as `$classify.output.issue_type`. + +## Loop Notes + +Loop nodes support: + +- `loop.prompt` +- `until` +- `max_iterations` +- `fresh_context` +- `interactive` +- `gate_message` +- `until_bash` + +Do not treat loop nodes as a place for advanced per-node Codex controls. Fields +such as `hooks`, `mcp`, `skills`, tool restrictions, and retry either do not +apply or are ignored. + +## Approval And Cancel Notes + +- `approval` pauses the workflow for human input +- `approval.on_reject.prompt` can use `$REJECTION_REASON` +- `approval.capture_response` preserves the reviewer response for downstream use +- `cancel` ends the workflow intentionally with a human-readable reason + +## Resume On Failure + +```bash +archon workflow run my-workflow --resume +``` + +Completed nodes are skipped on resume. + +## Validation + +Before treating a workflow as done, validate it: + +```bash +archon validate workflows +``` + +This checks YAML structure, dependency references, command existence, and +provider compatibility warnings. + +## Authoring Rule For Codex + +If a workflow depends on per-node hooks, per-node MCP, per-node skills, or +per-node tool restrictions, do not present it as Codex-safe. Use a Codex +variant or document the degraded behavior explicitly. + +## Example + +See `examples/dag-workflow.yaml` for a Codex-safe reference workflow that keeps +to shared or explicitly supported Codex surfaces. diff --git a/.archon/commands/defaults/archon-assist-codex.md b/.archon/commands/defaults/archon-assist-codex.md new file mode 100644 index 0000000000..82688d0181 --- /dev/null +++ b/.archon/commands/defaults/archon-assist-codex.md @@ -0,0 +1,89 @@ +--- +description: General Codex assistance - questions, debugging, one-off tasks, exploration +argument-hint: +--- + +# Codex Assist Mode + +**Request**: $ARGUMENTS + +--- + +You are helping with a request that did not match a more specific Codex-safe +workflow. + +This lane is the fallback, not the default bootstrap for all Codex usage. + +## Instructions + +1. **Understand the request** - Identify whether this is a question, debugging + task, repo exploration, a one-off change, or a CI/problem investigation. + - If the request is substantial multi-file implementation work, guided + development, interactive refinement, or any task that clearly wants a + human-in-the-loop build/review cycle, stop and route to + `archon-piv-loop-codex` instead of continuing in assist mode. + - If the user is explicitly asking to run a specific Archon workflow, honor + that direct workflow route rather than staying in assist mode. +2. **Ground yourself in the repo** - Search the codebase, read the relevant + files, and understand the current implementation before acting. +3. **Read repo guidance explicitly when needed** + - Read `AGENTS.md` if it exists. + - Read `CLAUDE.md` if it exists and the task depends on repo conventions, + architecture guidance, or workflow rules stored there. + - Do not assume `CLAUDE.md` was automatically loaded by Codex. +4. **Use Codex capabilities directly** - Read and edit files, run commands, + inspect git state, and validate relevant changes. + - If you are going to write files in assist mode, prove the assigned + worktree first with `pwd`, `git rev-parse --show-toplevel`, and + `git branch --show-current`. + - If you claim file changes, prove they landed in the current worktree with + `git status --short` or `git diff --name-only` before closing out. + - If the repo is clean after claimed edits, treat that as a workflow/path + mismatch and report it explicitly instead of claiming success. +5. **Call out routing gaps** - If this should have been a narrower Codex + workflow, mention: + "Note: Using archon-assist-codex. Consider creating or using a more specific + Codex workflow for this use case." + +## Workflow Log Debugging + +When the request is mainly about a failed, paused, or confusing workflow run: + +1. **Check the active surface first** + - Terminal or server output for Archon runtime logs + - Web UI run details or `archon workflow status --verbose` for current run + state +2. **Open the raw per-run JSONL when you need the full trace** + - Default path: + `~/.archon/workspaces///logs/.jsonl` + - If `ARCHON_HOME` is set, use that base directory instead of `~/.archon` +3. **Increase verbosity when current output is too thin** + - `archon --verbose workflow run "..."` + - `LOG_LEVEL=debug ` for Archon process logs +4. **Use the detailed reference for repeated log analysis** + - Read `.claude/skills/archon/references/log-debugging.md` + +## Guardrails + +- Prefer small, reversible changes. +- Use project-defined validation commands when relevant. +- Report validation failures honestly. +- Do not present assist mode as the required entrypoint for Archon. If a direct + workflow lane fits, use it. +- Do not rely on Claude-only workflow-node features such as `skills`, `hooks`, + `mcp`, `allowed_tools`, or `denied_tools`. +- If the user explicitly wants the Claude-oriented assist lane instead, say so + and route them to `archon-assist`. + +## Capabilities + +You have full Codex capabilities as configured by Archon: +- Read and write files +- Run commands +- Search the codebase +- Make code changes +- Answer questions + +## Request + +$ARGUMENTS diff --git a/.archon/scripts/detect-project.ts b/.archon/scripts/detect-project.ts new file mode 100644 index 0000000000..a02f775bb9 --- /dev/null +++ b/.archon/scripts/detect-project.ts @@ -0,0 +1,229 @@ +#!/usr/bin/env bun + +import { spawnSync } from 'node:child_process'; +import { existsSync, readFileSync, readdirSync } from 'node:fs'; +import { dirname, isAbsolute, join } from 'node:path'; + +interface DetectionResult { + projectType: string; + installCmd: string; + validateCmd: string; + typecheckCmd: string; + lintCmd: string; + testCmd: string; + formatCmd: string; +} + +function fileExists(path: string): boolean { + return existsSync(path); +} + +function readText(path: string): string | null { + try { + return readFileSync(path, 'utf8'); + } catch { + return null; + } +} + +function packageHasScript(name: string): boolean { + const raw = readText('package.json'); + if (raw === null) return false; + + try { + const parsed = JSON.parse(raw) as { scripts?: Record }; + return typeof parsed.scripts?.[name] === 'string'; + } catch { + return raw.includes(`"${name}"`); + } +} + +function makefileHasTarget(name: string): boolean { + const raw = readText('Makefile'); + if (raw === null) return false; + return new RegExp(`^${name}:`, 'm').test(raw); +} + +function hasPythonTestSignal(): boolean { + if (!fileExists('tests')) return false; + + try { + return readdirSync('tests').some(entry => entry.startsWith('test_') && entry.endsWith('.py')); + } catch { + return false; + } +} + +function resolveSourceRepo(): string { + const result = spawnSync('git', ['rev-parse', '--git-common-dir'], { encoding: 'utf8' }); + if (result.status !== 0) return ''; + + const gitCommon = result.stdout.trim(); + if (gitCommon.length === 0) return ''; + + const absCommon = isAbsolute(gitCommon) ? gitCommon : join(process.cwd(), gitCommon); + return dirname(absCommon); +} + +function commandExists(command: string): boolean { + const result = spawnSync('which', [command], { stdio: 'ignore' }); + return result.status === 0; +} + +function detectProject(): DetectionResult { + const result: DetectionResult = { + projectType: 'unknown', + installCmd: '', + validateCmd: '', + typecheckCmd: '', + lintCmd: '', + testCmd: '', + formatCmd: '', + }; + + if (fileExists('bun.lock') || fileExists('bun.lockb')) { + result.projectType = 'bun'; + result.installCmd = 'bun install --frozen-lockfile'; + if (packageHasScript('validate')) result.validateCmd = 'bun run validate'; + if (packageHasScript('type-check')) result.typecheckCmd = 'bun run type-check'; + if (packageHasScript('lint')) result.lintCmd = 'bun run lint'; + if (packageHasScript('test')) result.testCmd = 'bun run test'; + if (packageHasScript('format:check')) result.formatCmd = 'bun run format:check'; + return finalize(result); + } + + if ( + fileExists('pyproject.toml') || + fileExists('requirements.txt') || + fileExists('setup.py') || + hasPythonTestSignal() + ) { + result.projectType = 'python'; + + const sourceRepo = resolveSourceRepo(); + let venvBin = ''; + if (fileExists('.venv') && fileExists('.venv/bin/python')) { + venvBin = '.venv/bin'; + } else if (sourceRepo && fileExists(join(sourceRepo, '.venv/bin/python'))) { + venvBin = join(sourceRepo, '.venv/bin'); + } + + if (venvBin) { + if (fileExists(join(venvBin, 'pytest'))) result.testCmd = `${venvBin}/pytest tests/`; + if (fileExists(join(venvBin, 'ruff'))) { + result.lintCmd = `${venvBin}/ruff check .`; + result.formatCmd = `${venvBin}/ruff format --check .`; + } + if (fileExists(join(venvBin, 'mypy'))) result.typecheckCmd = `${venvBin}/mypy .`; + if (fileExists(join(venvBin, 'pyright'))) { + result.typecheckCmd = result.typecheckCmd + ? `${result.typecheckCmd} && ${venvBin}/pyright` + : `${venvBin}/pyright`; + } + } + + if (fileExists('pyproject.toml')) { + if (fileExists('uv.lock')) { + result.installCmd = 'uv sync'; + } else if (fileExists('poetry.lock')) { + result.installCmd = 'poetry install'; + } else if (venvBin) { + result.installCmd = `${venvBin}/python -m pip install -e .`; + } + } else if (fileExists('requirements.txt') && venvBin) { + result.installCmd = `${venvBin}/python -m pip install -r requirements.txt`; + } + + return finalize(result); + } + + if (fileExists('package.json')) { + result.projectType = 'node'; + let run = 'npm run'; + if (fileExists('pnpm-lock.yaml')) { + run = 'pnpm'; + result.installCmd = 'pnpm install --frozen-lockfile'; + } else if (fileExists('yarn.lock')) { + run = 'yarn'; + result.installCmd = 'yarn install --frozen-lockfile'; + } else { + result.installCmd = 'npm ci'; + } + + if (packageHasScript('validate')) result.validateCmd = `${run} validate`; + if (packageHasScript('type-check')) result.typecheckCmd = `${run} type-check`; + if (packageHasScript('lint')) result.lintCmd = `${run} lint`; + if (packageHasScript('test')) result.testCmd = `${run} test`; + if (packageHasScript('format:check')) result.formatCmd = `${run} format:check`; + return finalize(result); + } + + if (fileExists('go.mod')) { + result.projectType = 'go'; + result.installCmd = 'go mod download'; + result.testCmd = 'go test ./...'; + result.typecheckCmd = 'go vet ./...'; + if (commandExists('golangci-lint')) result.lintCmd = 'golangci-lint run'; + result.formatCmd = 'gofmt -l .'; + return finalize(result); + } + + if (fileExists('Cargo.toml')) { + result.projectType = 'rust'; + result.installCmd = 'cargo fetch'; + result.testCmd = 'cargo test'; + result.typecheckCmd = 'cargo check'; + result.lintCmd = 'cargo clippy -- -D warnings'; + result.formatCmd = 'cargo fmt -- --check'; + return finalize(result); + } + + if (fileExists('Makefile')) { + result.projectType = 'makefile'; + if (makefileHasTarget('test')) result.testCmd = 'make test'; + if (makefileHasTarget('lint')) result.lintCmd = 'make lint'; + if (makefileHasTarget('check')) result.validateCmd = 'make check'; + } + + return finalize(result); +} + +function finalize(result: DetectionResult): DetectionResult { + if (result.validateCmd.length === 0) { + const parts = [ + result.typecheckCmd, + result.lintCmd, + result.testCmd, + result.formatCmd, + ].filter(part => part.length > 0); + result.validateCmd = parts.join(' && '); + } + + return result; +} + +function emit(result: DetectionResult): void { + console.log('=== PROJECT DETECTION ==='); + console.log(`PROJECT_TYPE=${result.projectType}`); + console.log(`INSTALL_CMD=${result.installCmd}`); + console.log(`VALIDATE_CMD=${result.validateCmd}`); + console.log(`TYPECHECK_CMD=${result.typecheckCmd}`); + console.log(`LINT_CMD=${result.lintCmd}`); + console.log(`TEST_CMD=${result.testCmd}`); + console.log(`FORMAT_CMD=${result.formatCmd}`); + console.log('=== END DETECTION ==='); + + if ( + result.validateCmd.length === 0 && + result.testCmd.length === 0 && + result.typecheckCmd.length === 0 && + result.lintCmd.length === 0 + ) { + console.log(''); + console.log('NOTE: No automated validators detected in this project.'); + console.log('The implement loop will proceed without automated validation gates.'); + console.log('Human review is required before merging any changes.'); + } +} + +emit(detectProject()); diff --git a/.archon/scripts/tsconfig.json b/.archon/scripts/tsconfig.json new file mode 100644 index 0000000000..433ecdf6b4 --- /dev/null +++ b/.archon/scripts/tsconfig.json @@ -0,0 +1,7 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "noEmit": true + }, + "include": ["./**/*.ts"] +} diff --git a/.archon/workflows/defaults/archon-assist-codex.yaml b/.archon/workflows/defaults/archon-assist-codex.yaml new file mode 100644 index 0000000000..e2d20ce172 --- /dev/null +++ b/.archon/workflows/defaults/archon-assist-codex.yaml @@ -0,0 +1,18 @@ +name: archon-assist-codex +description: | + Use when: No other Codex-safe workflow matches the request, or the user wants general help through Codex. + Triggers: "codex assist", "assist codex", "archon assist codex", "codex archon", + "use archon codex", "general help codex", "codex workflow help". + Handles: Questions, debugging, exploration, one-off tasks, explanations, CI failures, general help. + Capability: Full Codex agent session with file, shell, git, and network access as configured by Archon. + This is the fallback Codex lane, not the default bootstrap for all Archon work. + NOT for: Claude-tuned assist mode (use archon-assist), direct workflow requests that + already match a narrower Codex-safe workflow, or guided Codex PIV work + (use archon-piv-loop-codex). + Note: Will inform user when Codex assist mode is used for tracking. + +provider: codex + +nodes: + - id: assist + command: archon-assist-codex diff --git a/.archon/workflows/defaults/archon-piv-loop-codex.README.md b/.archon/workflows/defaults/archon-piv-loop-codex.README.md new file mode 100644 index 0000000000..21e3969dbc --- /dev/null +++ b/.archon/workflows/defaults/archon-piv-loop-codex.README.md @@ -0,0 +1,188 @@ +--- +title: archon-piv-loop-codex execution notes +workflow: archon-piv-loop-codex +doc_type: workflow-reference +updated: 2026-04-12 +--- + +# archon-piv-loop-codex + +This is a companion note for [`archon-piv-loop-codex.yaml`](./archon-piv-loop-codex.yaml). + +It answers one specific operational question: when the workflow keeps model context, +when it starts a fresh Codex thread, and where human approval pauses/resumes happen. + +## Short answer + +The workflow is **not** one single context window from start to finish. + +It is one workflow run, but it executes as a series of nodes: + +- some nodes are **non-AI** (`script` / `bash`) and invoke no model +- some nodes are **fresh Codex sessions** +- some nodes are **interactive loops** that keep their own loop session across + iterations +- the `implement` loop is intentionally **fresh every iteration** + +## Source of truth + +The behavior below is grounded in: + +- [`archon-piv-loop-codex.yaml`](./archon-piv-loop-codex.yaml) +- [`packages/workflows/src/dag-executor.ts`](../../../packages/workflows/src/dag-executor.ts) +- [`packages/workflows/src/schemas/loop.ts`](../../../packages/workflows/src/schemas/loop.ts) +- [`packages/core/src/clients/codex.ts`](../../../packages/core/src/clients/codex.ts) + +## Context model + +Two different things matter: + +1. **Workflow run** + One Archon workflow record spanning the whole PIV process. +2. **Codex session/thread** + The actual model conversation context used for a node or loop iteration. + +The workflow run is continuous. The Codex thread is not. + +## Phase-by-phase session behavior + +| Phase | Node | Model invocation | Context behavior | +|------|------|------------------|------------------| +| Explore | `explore` | Codex loop | Fresh on iteration 1, then reuses the loop session across approval/feedback rounds | +| Detect | `detect-project` | None | `script` node, no model | +| Plan | `create-plan` | Codex prompt node | Fresh session because `context: fresh` | +| Plan refine | `refine-plan` | Codex loop | Fresh on iteration 1, then reuses the loop session across review rounds | +| Implement setup | `implement-setup` | None | `bash` node, no model | +| Implement | `implement` | Codex loop | Fresh session every iteration because `fresh_context: true` | +| Code review | `code-review` | Codex prompt node | Fresh session because `context: fresh` | +| Fix feedback | `fix-feedback` | Codex loop | Fresh on iteration 1, then reuses the loop session across feedback rounds | +| Finalize | `finalize` | Codex prompt node | Fresh session because `context: fresh` | + +## Where context is reset + +These are the explicit reset points in the YAML: + +- `create-plan` sets `context: fresh` +- `implement` sets `loop.fresh_context: true` +- `code-review` sets `context: fresh` +- `finalize` sets `context: fresh` + +These are the implicit loop reset rules enforced by the executor: + +- every loop starts with a fresh session on **iteration 1** +- later loop iterations reuse the loop's saved session unless + `loop.fresh_context: true` + +That means: + +- `explore`, `refine-plan`, and `fix-feedback` keep loop-local context after the + first turn +- `implement` does not; each task iteration is a fresh Codex thread + +## Flow + +```mermaid +flowchart TD + A["explore loop + fresh on first turn + then reuse loop session"] --> B["detect-project + script node + no model"] + B --> C["create-plan + fresh Codex session"] + C --> D["refine-plan loop + fresh on first turn + then reuse loop session"] + D --> E["implement-setup + bash node + no model"] + E --> F["implement loop + fresh Codex session every iteration"] + F --> G["code-review + fresh Codex session"] + G --> H["fix-feedback loop + fresh on first turn + then reuse loop session"] + H --> I["finalize + fresh Codex session"] + + A -. "/workflow approve " .-> A + D -. "/workflow approve " .-> D + H -. "/workflow approve " .-> H +``` + +## What the executor actually does + +### Prompt and command nodes + +For normal AI nodes, the DAG executor decides whether to reuse a prior session: + +- if the node is in a parallel layer, it is fresh +- if the node has `context: fresh`, it is fresh +- otherwise it can inherit the last sequential session + +In this workflow, the prompt nodes that matter are already marked fresh where a +reset is desired, so they do not inherit prior prompt-node context. + +### Loop nodes + +Loop nodes are handled on a separate execution path. They do **not** use the +same sequential-session logic as normal prompt nodes. + +Instead: + +- iteration 1 is always fresh +- later iterations reuse the loop's `currentSessionId` +- unless `fresh_context: true`, in which case every iteration is fresh + +This is why `implement` behaves differently from `explore`, `refine-plan`, and +`fix-feedback`. + +### Interactive pauses + +When an interactive loop does not emit its completion signal: + +1. Archon pauses the workflow run +2. stores loop metadata including the current loop `sessionId` +3. waits for `/workflow approve ` +4. resumes the loop on the next iteration with that feedback in + `$LOOP_USER_INPUT` + +This is a workflow pause/resume, not a whole-workflow context reset. + +## Important nuance on resume + +There are two resume cases: + +1. **Resume inside an interactive loop** + The loop can continue with the saved loop `sessionId`, unless that loop is + configured to force fresh iterations. +2. **Resume a prior DAG run more generally** + Archon can skip already-completed nodes, but the executor explicitly warns + that prior-node AI session context is not restored automatically. + +So "resume workflow" does not mean "restore one giant conversation across all +phases." + +## Why implement is intentionally fresh + +The implement loop prompt explicitly says the agent is in a **fresh session** +with no memory of previous iterations and must re-read the plan, progress, git +state, and files from disk. + +That makes task execution more deterministic: + +- one task per iteration +- less context drift across tasks +- each task grounded in repo state and artifacts, not remembered chat state + +## Practical takeaway + +If you are reasoning about this workflow operationally: + +- treat **disk artifacts and repo state** as the durable memory +- treat **loop-local session reuse** as available only inside `explore`, + `refine-plan`, and `fix-feedback` +- treat **implement** as stateless between task iterations except for what it + re-reads from disk +- treat `create-plan`, `code-review`, and `finalize` as clean-session nodes diff --git a/.archon/workflows/defaults/archon-piv-loop-codex.yaml b/.archon/workflows/defaults/archon-piv-loop-codex.yaml new file mode 100644 index 0000000000..6ed4779e48 --- /dev/null +++ b/.archon/workflows/defaults/archon-piv-loop-codex.yaml @@ -0,0 +1,1071 @@ +# Execution notes: see `.archon/workflows/defaults/archon-piv-loop-codex.README.md` +# for workflow flow, session reuse, and context reset behavior. +name: archon-piv-loop-codex +description: | + Use when: User wants guided Plan-Implement-Validate development with human-in-the-loop, + using Codex/GPT models instead of Claude. + Triggers: "piv codex", "codex piv", "piv loop codex", "gpt piv", "piv with gpt", + "guided development codex", "structured development codex". + NOT for: Claude-based PIV loops (use archon-piv-loop instead). + NOT for: Autonomous implementation without planning (use archon-feature-development). + NOT for: PRD creation (use archon-interactive-prd). + NOT for: Ralph story-based implementation (use archon-ralph-dag). + + Codex/GPT variant of archon-piv-loop — same 4-phase Plan-Implement-Validate methodology, + with prompt refinements tuned for Codex's behavioral tendencies: + * Explicit numbered SIGNAL EMISSION CONTRACTs in place of Claude-style CRITICAL framing + * Hard stop discipline for the implement loop (Phase 4.5 STOP, "SELECT EXACTLY ONE") + * Per-file staging instead of `git add -A` to prevent task-scope leaks + * Explicit negative guardrails on the implement loop's COMPLETE signal + * Tightened refine-plan approval branch (no tool use on approval) + + Interactive PIV loop workflow — the foundational AI coding methodology: + 1. EXPLORE: Iterative conversation with human to understand the problem (arbitrary rounds) + 2. PLAN: Create structured plan -> iterative review & revision (arbitrary rounds) + 3. IMPLEMENT: Autonomous task-by-task implementation from plan (Ralph loop, hard stop per task) + 4. VALIDATE: Automated code review -> iterative human feedback & fixes (arbitrary rounds) + + The PIV loop comes AFTER a PRD exists. Each PIV loop focuses on ONE granular feature or bug fix. + Input: A description of what to build, a path to an existing plan, or a GitHub issue number. + +provider: codex +interactive: true + +nodes: + # ═══════════════════════════════════════════════════════════════ + # PHASE 1: EXPLORE — Iterative exploration with human + # Understand the idea, explore the codebase, converge on approach + # Loops until the user says they're ready to create the plan. + # ═══════════════════════════════════════════════════════════════ + + - id: explore + loop: + prompt: | + # PIV Loop — Exploration + + You are a senior engineering partner in an iterative exploration session. + Your goal: DEEPLY UNDERSTAND what to build before any code is written. + + **User's request**: $ARGUMENTS + **User's latest input** (empty on first iteration): "$LOOP_USER_INPUT" + + --- + + ## If this is the FIRST iteration (no user input yet): + + **Important**: Do Steps 1-4 in order. Do not jump to questions until you have + shown the "## What I Understand" / "## What Already Exists" / "## Initial + Architecture Thoughts" sections first. + + ### Step 1: Parse the Input + + Determine what the user provided: + + **If it's a file path** (ends in `.md`, `.plan.md`, or `.prd.md`): + - Read the file + - If it's an existing plan → summarize it and ask if they want to refine or proceed + - If it's a PRD → identify the specific phase/feature to focus on + + **If it's a GitHub issue** (`#123` format): + - Fetch it: `gh issue view {number} --json title,body,labels,comments` + - Summarize the issue context + + **If it's free text**: + - This is a feature idea or bug description. Use it directly. + + ### Step 2: Explore the Codebase + + Before asking questions, DO YOUR HOMEWORK: + + 1. **Read CLAUDE.md** — understand project conventions, architecture, and constraints + 2. **Search for related code** — find existing implementations similar to what the user wants + 3. **Read key files** — understand the current state of code the user wants to change + 4. **Check recent git history** — `git log --oneline -20` for recent changes in the area + + ### Step 3: Present Your Understanding + + ``` + ## What I Understand + + You want to: {restated understanding in 2-3 sentences} + + ## What Already Exists + + - {file:line} — {what it does and how it relates} + - {file:line} — {what it does and how it relates} + - {pattern/component} — {how it could be extended or reused} + + ## Initial Architecture Thoughts + + Based on what exists, I'm thinking: + - {approach 1 — extend existing X} + - {approach 2 — if approach 1 doesn't work} + - {key architectural decision that needs your input} + ``` + + ### Step 4: Ask Targeted Questions + + Ask 4-6 questions focused on DECISIONS, not information gathering: + - Scope boundaries, architecture preferences, tech decisions + - Constraints, existing code extension vs fresh build, testing expectations + - Reference actual code you found — don't ask generic questions + + --- + + ## If the user has provided input (subsequent iterations): + + ### Step 1: Process Their Response + + Read their answers carefully. Identify: + - Decisions they've made + - Areas they want you to explore further + - Questions they asked YOU back (answer these with evidence!) + + ### Step 2: Do Targeted Research + + Based on their response: + - If they mentioned specific technologies → research best practices + - If they pointed you to specific code → read it thoroughly + - If they asked you to explore an area → do a thorough investigation + - If they made architecture decisions → validate against the codebase + + ### Step 3: Present Updated Understanding + + Show what you learned, answer their questions with file:line references, + and present your refined architecture recommendation. + + ### Step 4: Converge or Continue + + **If there are still important open questions:** + Ask 2-4 focused questions about remaining ambiguities. + + **If the picture is clear and you have enough to create a plan:** + Present a final implementation summary: + + ``` + ## Implementation Summary + + ### What We're Building + {Clear, specific description} + + ### Scope Boundary + - IN: {what's included} + - OUT: {what's explicitly excluded} + + ### Architecture + - {key decisions} + + ### Files That Will Change + - `{file}` — {what changes and why} + + ### Success Criteria + - [ ] {specific, testable criterion} + - [ ] All validation passes + + ### Key Risks + - {risk — and mitigation} + ``` + + Then tell the user: "I have a clear picture. Say **ready** and I'll create + the structured implementation plan, or share any final thoughts." + + **SIGNAL EMISSION CONTRACT** — emit `PLAN_READY` only when + ALL of these are true: + 1. The user's LATEST message (not any earlier one) contains an explicit + approval phrase from this list: "ready", "create the plan", "let's go", + "proceed", "I'm done" + 2. The user's message does NOT contain a question, a new request, or + additional feedback to explore + 3. You have addressed everything the user asked in previous turns + + If ANY condition is false, DO NOT emit the tag. Continue the conversation + instead. When you reference this contract in your own output, write "the + PLAN_READY signal" (no angle brackets) so you do not trigger it accidentally. + until: PLAN_READY + max_iterations: 15 + interactive: true + gate_message: | + Answer the questions above, ask me to explore specific areas, + or say "ready" when you're satisfied with the exploration. + + # ═══════════════════════════════════════════════════════════════ + # PHASE 1b: DETECT — Discover project validation commands + # Runs once, output referenced by every downstream node so the + # workflow is portable across bun / npm / python / go / rust / etc. + # ═══════════════════════════════════════════════════════════════ + + - id: detect-project + depends_on: [explore] + script: detect-project + runtime: bun + + # ═══════════════════════════════════════════════════════════════ + # PHASE 2: PLAN — Create the structured implementation plan + # ═══════════════════════════════════════════════════════════════ + + - id: create-plan + depends_on: [detect-project] + context: fresh + prompt: | + # PIV Loop — Create Structured Plan + + You are creating a structured implementation plan from a completed exploration phase. + This plan will be the SOLE GUIDE for the implementation agent — it must be complete, + specific, and actionable. + + **Original request**: $ARGUMENTS + **Final exploration summary**: $explore.output + + **Project detection output** (used ONLY to fill in the informational Validation Commands section of the plan): + $detect-project.output + + Parse the `PROJECT_TYPE=...`, `VALIDATE_CMD=...`, `TYPECHECK_CMD=...`, `LINT_CMD=...`, + `TEST_CMD=...`, `FORMAT_CMD=...` lines from the detection output above. These are + **informational** — copy them verbatim into the plan's `## Validation Commands` + section so the plan documents what detect-project found. + + **CRITICAL — DO NOT create a plan task whose purpose is running repo-wide + validators.** The implement loop uses each task's own per-task `**Validate:**` + field (task-scoped). Repo-wide validators (`TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, + `FORMAT_CMD`, `VALIDATE_CMD`) are executed once by the `code-review` node after all + implement tasks complete — never as part of the implement loop's Task List. + + Each `### Task N:` entry in the Task List must be EITHER: + - One CREATE or UPDATE change with a scoped `**Validate:**` field (a diff + readback, a targeted test command, or a file existence check), OR + - One verification-only task (like a pre-flight file-absence check) with a + `**Validate:**` field that reads current state without making changes. + + **Never** a task whose `**Details:**` or `**Validate:**` field runs the repo-wide + validator suite. That pattern creates a deadlock on any repo with pre-existing + validator failures. Pre-existing repo-wide issues are the `code-review` node's + concern, not the implement loop's. + + --- + + ## Step 1: Read the Codebase (Again) + + Before writing the plan, verify your understanding is current: + + 1. **Read CLAUDE.md** — capture all relevant conventions + 2. **Read every file you plan to change** — note exact current state + 3. **Read example test files** — understand testing patterns + 4. **Check for any recent changes** — `git log --oneline -10` + + ## Step 2: Determine Plan Location + + Generate a kebab-case slug from the feature name. + Save to `.claude/archon/plans/{slug}.plan.md`. + + ```bash + mkdir -p .claude/archon/plans + ``` + + ## Step 3: Write the Plan + + Use this template. Fill EVERY section with specific, verified information. + + ```markdown + # Feature: {Title} + + ## Summary + {1-2 sentences: what changes and why} + + ## Mission + {The core goal in one clear statement} + + ## Success Criteria + - [ ] {Specific, testable criterion for this task} + - [ ] No regressions introduced in files this task touches + - [ ] Code-review (which runs automatically after all implement tasks) surfaces + no NEW validator failures attributable to this change (pre-existing repo-wide + failures are out of scope for this task) + + ## Scope + ### In Scope + - {What we ARE building} + ### Out of Scope + - {What we are NOT building — and why} + + ## Codebase Context + ### Key Files + | File | Role | Action | + |------|------|--------| + | `{path}` | {what it does} | CREATE / UPDATE | + + ### Patterns to Follow + {Actual code snippets from the codebase to mirror} + + ## Architecture + - {Decision 1 — with rationale} + - {Decision 2 — with rationale} + + ## Task List + Execute in order. Each task is atomic and independently verifiable. + + ### Task 1: {ACTION} `{file path}` + **Action**: CREATE / UPDATE + **Details**: {Exact changes — specific enough for an agent with no context} + **Pattern**: Follow `{source file}:{lines}` + **Validate**: `{command to verify this task}` + + ## Testing Strategy + | Test File | Test Cases | Validates | + |-----------|-----------|-----------| + | `{path}` | {cases} | {what it validates} | + + ## Validation Commands + Fill these from the `detect-project` output above. If a command is empty, write + `(none available — human verification only)` instead of inventing one. + + 1. Type check: `{TYPECHECK_CMD from detect-project, or "none available"}` + 2. Lint: `{LINT_CMD, or "none available"}` + 3. Tests: `{TEST_CMD, or "none available"}` + 4. Format check: `{FORMAT_CMD, or "none available"}` + 5. Full validation: `{VALIDATE_CMD, or "none available"}` + 6. Project type: `{PROJECT_TYPE}` + + ## Risks + | Risk | Impact | Mitigation | + |------|--------|------------| + | {risk} | {HIGH/MED/LOW} | {specific mitigation} | + ``` + + ## Step 4: Verify the Plan + + 1. Check every file path referenced — verify they exist + 2. Check every pattern cited — verify the code matches + 3. Check task ordering — ensure dependencies are respected + 4. Check completeness — could an agent with NO context implement this? + + ## Step 5: Report + + ``` + ## Plan Created + + **File**: `.claude/archon/plans/{slug}.plan.md` + **Tasks**: {count} + **Files to change**: {count} + + Key decisions: + - {decision 1} + - {decision 2} + + Please review the plan and provide feedback. + ``` + + # ═══════════════════════════════════════════════════════════════ + # PHASE 2b: PLAN — Iterative plan refinement + # Review and revise the plan as many times as needed. + # ═══════════════════════════════════════════════════════════════ + + - id: refine-plan + depends_on: [create-plan] + loop: + prompt: | + # PIV Loop — Plan Refinement + + The user is reviewing the implementation plan and providing feedback. + + **User's feedback** (empty on first iteration): "$LOOP_USER_INPUT" + + --- + + ## Step 1: Find and Read the Plan + + ```bash + ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1 + ``` + + Read the entire plan file. Also read CLAUDE.md for conventions. + + ## Step 2: Process Feedback + + **If there is no user feedback yet** (first iteration, `$LOOP_USER_INPUT` is empty): + - Read the plan carefully + - Present a summary of the plan's key decisions and task list + - Ask the user to review and provide feedback + - Do NOT emit the completion signal on the first iteration + + **If the user EXPLICITLY approved** (said "approved", "looks good", "let's go", etc.): + - Do NOT open, read, or write the plan file. + - Do NOT run any tools. + - Output exactly this sentence: "Plan approved. Proceeding to implementation." + - Then emit `PLAN_APPROVED` on its own line. + + **If the user provided specific feedback:** + - Parse each piece of feedback + - Edit the plan file directly: + - Add/remove/modify tasks as requested + - Update success criteria if needed + - Adjust testing strategy if needed + - Re-verify file paths and patterns after changes + + **SIGNAL EMISSION CONTRACT** — emit `PLAN_APPROVED` only when + ALL of these are true: + 1. The user's LATEST message contains an explicit approval phrase: + "approved", "looks good", "ship it", "let's go", "proceed" + 2. The message does NOT contain questions, requested changes, or new feedback + 3. This is NOT the first iteration (empty `$LOOP_USER_INPUT`) + + If ANY condition is false, DO NOT emit the tag. When referencing this contract + in your output, write "the PLAN_APPROVED signal" (no angle brackets). + + ## Step 3: Show Changes + + ``` + ## Plan Revised + + Changes made: + - {change 1} + - {change 2} + + Updated stats: + - Tasks: {count} + - Files to change: {count} + + Review the updated plan and provide more feedback, or say "approved" to proceed. + ``` + until: PLAN_APPROVED + max_iterations: 10 + interactive: true + gate_message: | + Review the plan document. Provide specific feedback on what to change, + or say "approved" to begin implementation. + + # ═══════════════════════════════════════════════════════════════ + # PHASE 3: IMPLEMENT — Setup + # Read the plan, prepare the environment + # ═══════════════════════════════════════════════════════════════ + + - id: implement-setup + depends_on: [refine-plan, detect-project] + bash: | + set -e + + PLAN_FILE=$(ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1) + + if [ -z "$PLAN_FILE" ]; then + echo "ERROR: No plan file found in .claude/archon/plans/" + exit 1 + fi + + # Dependency install is portable — mirrors the detection node's logic. + # Failures here are non-fatal; the implement loop will surface them. + if [ -f "bun.lock" ] || [ -f "bun.lockb" ]; then + echo "Installing bun dependencies..." + bun install --frozen-lockfile 2>&1 | tail -3 || echo "NOTE: bun install failed; continuing" + elif [ -f "package-lock.json" ]; then + npm ci 2>&1 | tail -3 || echo "NOTE: npm ci failed; continuing" + elif [ -f "yarn.lock" ]; then + yarn install --frozen-lockfile 2>&1 | tail -3 || echo "NOTE: yarn install failed; continuing" + elif [ -f "pnpm-lock.yaml" ]; then + pnpm install --frozen-lockfile 2>&1 | tail -3 || echo "NOTE: pnpm install failed; continuing" + elif [ -f "uv.lock" ]; then + uv sync 2>&1 | tail -3 || echo "NOTE: uv sync failed; continuing" + elif [ -f "poetry.lock" ]; then + poetry install 2>&1 | tail -3 || echo "NOTE: poetry install failed; continuing" + elif [ -f "go.mod" ]; then + go mod download 2>&1 | tail -3 || echo "NOTE: go mod download failed; continuing" + elif [ -f "Cargo.toml" ]; then + cargo fetch 2>&1 | tail -3 || echo "NOTE: cargo fetch failed; continuing" + else + echo "NOTE: no recognized lockfile — skipping dependency install step" + fi + + echo "BRANCH=$(git branch --show-current)" + echo "GIT_ROOT=$(git rev-parse --show-toplevel)" + echo "PLAN_FILE=$PLAN_FILE" + + echo "=== PLAN_START ===" + cat "$PLAN_FILE" + echo "" + echo "=== PLAN_END ===" + + TASK_COUNT=$(grep -c "^### Task [0-9]" "$PLAN_FILE" || true) + echo "TASK_COUNT=${TASK_COUNT:-0}" + + # ═══════════════════════════════════════════════════════════════ + # PHASE 3b: IMPLEMENT — Task-by-Task Loop (Ralph pattern, hard stop per task) + # Fresh context each iteration. Reads plan from disk. + # EXACTLY ONE task per iteration. Validates before committing. + # ═══════════════════════════════════════════════════════════════ + + - id: implement + depends_on: [implement-setup] + idle_timeout: 600000 + loop: + prompt: | + # PIV Loop — Implementation Agent + + You are an autonomous coding agent in a FRESH session — no memory of previous iterations. + Your job: Read the plan from disk, implement EXACTLY ONE task, run the task's + own task-scoped verification, commit, update tracking, exit. **Repo-wide + validation is `code-review`'s responsibility — not yours.** You do not run + repo-wide validators at any phase of this loop. + + **Golden Rule 1**: If the task's OWN verification (Phase 3) fails, fix the + task-specific issue before committing. Never commit broken code. But NEVER + touch pre-existing repo-wide issues — those are out of your scope. + **Golden Rule 2**: One task per iteration. The loop engine starts a fresh iteration for the next task. + + --- + + ## Phase 0: CONTEXT — Load State + + The setup node produced this context: + + $implement-setup.output + + **Project detection** (repo-wide validators — INFORMATIONAL ONLY, do not run): + $detect-project.output + + **User's original request**: $USER_MESSAGE + + The `VALIDATE_CMD`, `TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, `FORMAT_CMD` values + from detect-project are **informational only**. The implement loop does **NOT** + run them at any phase. They run later in the `code-review` node. The implement + loop uses only the TASK-SCOPED `**Validate:**` field from each task in the plan. + + If you find yourself about to run `ruff check .`, `pytest tests/`, + `bun run validate`, or any similar repo-wide command — STOP. That is not your + job. Your job is the single task from the plan, with the task's own per-task + `**Validate:**` check. + + --- + + ### 0.1 Parse Plan File + + Extract the `PLAN_FILE=...` line from the context above. + + ### 0.2 Read Current State (from disk — not from context above) + + The context above is a snapshot from before the loop started. Previous iterations + may have changed things. **You MUST re-read from disk:** + + 1. **Read the plan file** — your implementation guide + 2. **Read progress tracking** — check if `$ARTIFACTS_DIR/progress.txt` exists + 3. **Read CLAUDE.md** — project conventions and constraints + + ### 0.3 Check Git State + + ```bash + git log --oneline -10 + git status + ``` + + Record the current HEAD hash — you will compare against this at the end of the iteration. + + --- + + ## Phase 1: SELECT EXACTLY ONE — Pick The Single Next Task + + From the plan file, identify tasks by `### Task N:` headers. + Cross-reference with commits from previous iterations and progress tracking. + + **STRICT ORDERING**: Always select the LOWEST-numbered incomplete task. Never + skip to a later task, even if it seems more actionable or if the earlier task + is "just a verification". Verification tasks are load-bearing — they are + numbered first precisely because they must run first and their evidence must + be captured before the state changes. + + If Task N is already completed (per `$ARTIFACTS_DIR/progress.txt`), move to + Task N+1. If Task N is incomplete, work on Task N — never Task N+1 or later. + + **If ALL tasks are complete** → Skip to Phase 5 (Completion). + + ### Announce Selection + + ``` + -- Task Selected ------------------------------------------------ + Task: {N} — {task title} + Action: {CREATE / UPDATE} + File: {file path} + ----------------------------------------------------------------- + ``` + + --- + + ## Phase 2: IMPLEMENT — Execute ONLY This Task + + 1. Read the file you're about to change (if it exists) + 2. Read the pattern file referenced in the plan + 3. Make changes following the plan EXACTLY — for Task {N} ONLY + + Do NOT read, edit, or prepare files for Task N+1 in this iteration. + + **DO NOT run repo-wide validators** (`TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, + `FORMAT_CMD`) during Phase 2. Those run once in `code-review` after the + implement loop finishes — not here. Task-scoped verification happens in Phase 3 + using the task's own `**Validate:**` field from the plan. + + --- + + ## Phase 3: VALIDATE — Verify the Task (task-scoped, NOT repo-wide) + + Run ONLY the task's own `**Validate:**` command from the plan file. This is a + TASK-SCOPED check (a diff readback, a targeted test command, a file existence + check). It is **NOT** the repo-wide validator suite. + + **DO NOT run `TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, or `FORMAT_CMD` in this + phase.** Those are repo-wide commands from the `detect-project` output. They + check whether the WHOLE REPO is healthy — a different question from "did my + single task land correctly". Pre-existing repo-wide failures are NOT this + task's responsibility; they are handled by the `code-review` node later, which + runs the repo-wide suite once (not per task) and may make fix commits for + issues it surfaces. + + If the task's `**Validate:**` command is missing or empty in the plan, fall + back to a minimal task-scoped check based on the task's Action: + - `CREATE` / `UPDATE`: `git diff --stat ` plus `cat ` + (confirm the diff shape and content match the intent) + - Verification-only task: no action — proceed directly to Phase 4 + + If the task's `**Validate:**` command fails: + 1. The failure is task-specific (not repo-wide). Fix the task-specific issue + and re-run, up to 3 attempts. + 2. If still unfixable, record the block in `$ARTIFACTS_DIR/progress.txt` and + do NOT commit. + + **Never fail the iteration because of pre-existing repo-wide issues that were + already broken before your task started.** Phase 4.5 HARD RULE 1 forbids you + from fixing those — they are legitimately out of your scope. The repo-wide + health check happens once, later, in the `code-review` node. + + --- + + ## Phase 4: COMMIT — Save Changes + + Before staging, verify the git diff contains ONLY files from the ONE task you just worked on: + + ```bash + git status --short + git diff --stat + ``` + + If you see files outside the scope of the current task (e.g. changes you made while + exploring or testing for Task N+1), revert those extra changes with + `git checkout -- ` BEFORE staging. Do NOT use `git add -A`. Stage explicitly + by file path: + + ```bash + git add # only the files from THIS task + git diff --cached --stat # confirm the staged set matches the task scope + git commit -m "$(cat <<'EOF' + {type}: {task description} + + PIV Task {N}: {brief details} + EOF + )" + ``` + + Track progress in `$ARTIFACTS_DIR/progress.txt` (outside the repo — zero + staging risk, pre-created by the executor, survives resume): + ``` + ## Task {N}: {title} — COMPLETED + Date: {ISO date} + Files: {list} + Commit: {short hash} + --- + ``` + + **DO NOT** stage or commit `$ARTIFACTS_DIR/progress.txt`. It lives outside + the repo entirely and is not a git-tracked file. It is local scratch state + for the loop only. + + --- + + ## Phase 4.5: STOP — Exit After One Task + + "One task" means literally ONE numbered `### Task N:` entry from the plan file. + Not "one logical change." Not "one feature unit." Not "everything that naturally + goes together." ONE numbered entry. + + **HARD RULES**: + 1. Count your commits in this iteration via `git log HEAD ^` (the + hash you recorded in Phase 0.3). If count > 1, iteration discipline was + violated. **Do NOT rewrite history automatically.** Report the violation in + the Phase 4.5 status block, state how many commits were created, and end the + iteration immediately so a human or later cleanup pass can reconcile it. + 2. Verification tasks (like "VERIFY PRE-FLIGHT") count as numbered tasks and + need their own iteration, even if they produce no code change. + 3. If Task N is a verification-only task, your "commit" for that iteration is + writing to `$ARTIFACTS_DIR/progress.txt` and exiting — no git commit required. + 4. After committing (or recording verification), end the iteration with the + Phase 4.5 status block below. Do NOT read, edit, stage, or validate files + for any other task in this iteration. + + Archon monitors durable progress across iterations using the current git HEAD + plus `$ARTIFACTS_DIR/progress.txt`. If multiple iterations finish without a + new commit or a new completed-task entry, the loop will stop as stuck so a + human can inspect the blocked task directly. + + If tasks remain, end the iteration by reporting status in this exact format: + + ``` + Task {N} committed: {short hash} + Files touched: {list} + Remaining tasks: {count} + Next iteration will pick up Task {N+1}. + ``` + + Do NOT emit the completion tag here. Do NOT read or start any other task's files. + + --- + + ## Phase 5: COMPLETE — Check All Tasks + + **SIGNAL EMISSION CONTRACT** — emit `COMPLETE` only when + BOTH of these are true: + 1. Every `### Task N:` entry in the plan file has a matching "COMPLETED" + entry in `$ARTIFACTS_DIR/progress.txt` + 2. `git log` on the current branch shows a commit for each code-affecting + task (verification-only tasks have no commit; see Phase 4.5 Rule 3) + + **Phase 5 does NOT run repo-wide validation.** Repo-wide validators + (`TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, `FORMAT_CMD`, `VALIDATE_CMD`) are the + `code-review` node's responsibility — it runs them once after the implement + loop finishes. Pre-existing repo-wide failures are not this loop's problem. + The implement loop's job is "did I complete my assigned tasks?", not "is the + whole repo healthy?". + + If BOTH conditions hold: + 1. Push: `git push -u origin HEAD` + 2. Emit the completion tag on its own line. + + If either condition is false, tasks remain. Report status using the Phase 4.5 + format and end the iteration normally. The loop engine will start a fresh + iteration. When referencing this contract in your output, write "the COMPLETE + signal" (no angle brackets). + until: COMPLETE + max_iterations: 15 + fresh_context: true + progress_file: "$ARTIFACTS_DIR/progress.txt" + stuck_after_no_progress_iterations: 3 + + # ═══════════════════════════════════════════════════════════════ + # PHASE 4: VALIDATE — Automated code review + # Review all changes against the plan + # ═══════════════════════════════════════════════════════════════ + + - id: code-review + depends_on: [implement, detect-project] + context: fresh + prompt: | + # PIV Loop — Automated Code Review + + The implementation phase is complete. Review ALL changes against the plan. + + **Implementation output**: $implement.output + + **Project detection** (validation commands for this repo): + $detect-project.output + + Parse `VALIDATE_CMD`, `TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, `FORMAT_CMD` from + the detect-project output above. Use them in Step 4 below. + + --- + + ## Step 1: Find and Read the Plan + + ```bash + ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1 + ``` + + ## Step 2: Review All Changes + + ```bash + git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD + git diff $BASE_BRANCH..HEAD --stat + git diff $BASE_BRANCH..HEAD + ``` + + ## Step 3: Check Against Plan + + For EACH task: was it implemented correctly? Do success criteria hold? + For EACH file: check quality, security, patterns, CLAUDE.md compliance. + + ## Step 4: Run Validation + + Run `VALIDATE_CMD` from the detect-project output. If `VALIDATE_CMD` is empty, + run each non-empty command from `TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, + `FORMAT_CMD` individually. If all five are empty, note "No automated validators + available for this project; relying on code review + human verification" and + skip this step. + + ## Step 5: Fix Obvious Issues (SCOPED to files this branch changed) + + Get the list of files this branch touched: + ```bash + git diff --name-only $BASE_BRANCH..HEAD + ``` + + Fix ONLY issues that appear in those files. Do NOT fix issues elsewhere in the + repo — those are pre-existing and out of scope for this branch's review. They + go in the "Pre-existing repo findings" section of Step 6's report instead. + + To distinguish changed-file issues from pre-existing ones: + - Use a changed-file scoped validator invocation ONLY when the command syntax + clearly supports it. + - Otherwise run the full validator and filter its output to findings that point + at files from `git diff --name-only $BASE_BRANCH..HEAD`. + - Never invent a scoped form of a repo command by appending file paths to + commands like `npm run lint`, `cargo test`, `go test ./...`, or similar + wrappers unless the command itself clearly accepts those extra args. + - For each issue in a changed file: fix it, stage only that file, add it to + a single scoped commit + + Stage explicitly by file path and commit: + ```bash + git add # NOT git add -A + git diff --cached --stat # confirm only changed-file fixes are staged + git commit -m "fix: address review findings in this branch's changes" 2>/dev/null || true + ``` + + If none of the changed files have issues, skip this step and proceed to Step 6. + + **Never fix pre-existing repo-wide issues in Step 5.** Document them in Step 6's + "Pre-existing repo findings" section instead. They are out of scope for this + branch — the user can address them in a separate PR if desired. + + ## Step 6: Present Review + + ``` + ## Code Review Complete + + ### Implementation Status + | Task | Status | Notes | + |------|--------|-------| + | {task} | DONE / PARTIAL / MISSING | {notes} | + + ### Validation Results (scoped to files changed by this branch) + - Type-check: PASS / FAIL / SKIPPED + - Lint: PASS / FAIL / SKIPPED + - Tests: PASS / FAIL / SKIPPED + - Format: PASS / FAIL / SKIPPED + + ### Code Quality Findings (in files changed by this branch) + {Issues found in files this branch touched, or "No issues found."} + + ### Pre-existing Repo Findings (OUT OF SCOPE — not fixed) + {List pre-existing issues in files NOT changed by this branch. These are + documented for user awareness but were NOT fixed as part of this review — + they are out of scope for the current branch. The user can address them in + a separate PR if desired. Example: "ruff check reported 12 failures in + .claude/scripts/*.py that pre-date this branch; not fixed."} + + ### Recommendation + {READY FOR REVIEW / NEEDS FIXES} + ``` + + # ═══════════════════════════════════════════════════════════════ + # PHASE 4b: VALIDATE — Iterative human feedback & fixes + # The user tests the implementation and provides feedback. + # Loops until the user approves. + # ═══════════════════════════════════════════════════════════════ + + - id: fix-feedback + depends_on: [code-review, detect-project] + loop: + prompt: | + # PIV Loop — Address Validation Feedback + + The human has reviewed the implementation and provided feedback. + + **Human's feedback** (empty on first iteration): "$LOOP_USER_INPUT" + + **Project detection** (validation commands for this repo): + $detect-project.output + + Parse `VALIDATE_CMD` etc. from the detect-project output for Step 3 below. + + --- + + ## Step 1: Read Context + + ```bash + ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1 + ``` + + Read the plan file and CLAUDE.md for conventions. + + ## Step 2: Process Feedback + + **If there is no user feedback yet** (first iteration, `$LOOP_USER_INPUT` is empty): + - Present the code review results and ask the user to test the implementation + - Do NOT emit the completion signal on the first iteration + + **If the user EXPLICITLY approved** (said "approved", "looks good", "ship it", etc.): + - Output: "Implementation approved!" + - Then emit `VALIDATED` on its own line. + + **SIGNAL EMISSION CONTRACT** — emit `VALIDATED` only when + ALL of these are true: + 1. The user's LATEST message contains an explicit approval phrase: + "approved", "looks good", "ship it", "let's go", "proceed" + 2. The message does NOT contain new feedback or requested fixes + 3. This is NOT the first iteration (empty `$LOOP_USER_INPUT`) + + If ANY condition is false, DO NOT emit the tag. When referencing this contract + in your output, write "the VALIDATED signal" (no angle brackets). + + **If the user provided specific feedback:** + 1. Read the relevant files + 2. Understand each issue + 3. Make the fixes + 4. Type-check after each change + + ## Step 3: Validation (conditional — only when fixes were made) + + **If this iteration is an approval iteration** (Step 2 detected an explicit + approval and you are emitting the VALIDATED signal): SKIP this step entirely. + Do not run validators. Do not make commits. Proceed directly to Step 5 for + the final report and signal emission. + + **If this iteration made fixes in response to user feedback**: validate ONLY + the files you changed in this iteration, not the repo-wide suite. + - Use a file-scoped validator invocation only when the command syntax clearly + supports it. + - Otherwise run the full validator and filter the findings to the files you + just changed. + - Never invent a file-scoped form of `npm run lint`, `cargo test`, + `go test ./...`, or any similar wrapper by simply appending paths unless the + command itself clearly accepts those extra args. + Do NOT run `ruff check .` or equivalent repo-wide commands as a release gate + here — the goal is "did my fixes work?", not "is the whole repo healthy?". + + If scoped validation fails, attempt one more fix pass on the same file(s). + If still failing, report to the user in Step 5 and do NOT emit VALIDATED. + Pre-existing repo-wide failures unrelated to your fixes are out of scope — + never block VALIDATED on them. + + ## Step 4: Commit Fixes + + If this iteration is an approval-only iteration, SKIP this step entirely. + Commit only when you actually made code changes in response to feedback. + + Stage explicitly by file path and confirm the staged set before committing: + + ```bash + git add # only the files changed in THIS feedback pass + git diff --cached --stat + git commit -m "$(cat <<'EOF' + fix: address review feedback + + Changes: + - {fix 1} + - {fix 2} + EOF + )" + ``` + + ## Step 5: Report + + ``` + ## Feedback Addressed + + Changes made: + - {fix 1} + - {fix 2} + + Validation: {PASS / FAIL with details} + + Review again, or say "approved" to finalize. + ``` + until: VALIDATED + max_iterations: 10 + interactive: true + gate_message: | + Test the implementation yourself and review the code changes. + Provide specific feedback on what needs fixing, or say "approved" to finalize. + + # ═══════════════════════════════════════════════════════════════ + # PHASE 5: FINALIZE — Push, create PR, generate summary + # ═══════════════════════════════════════════════════════════════ + + - id: finalize + depends_on: [fix-feedback, implement-setup] + context: fresh + prompt: | + # PIV Loop — Finalize + + The implementation has been approved. Push changes and create a PR. + + **Plan file** (the exact one `create-plan` wrote and `implement` worked on — + use THIS as the source of truth for PR title and body. Do NOT search the repo + for other plan files): + + Parse the `PLAN_FILE=...` line from the implement-setup context below. If for + any reason that line is missing, re-derive with: + `ls -t .claude/archon/plans/*.plan.md | head -1` + + Do NOT read plan files under `docs/plans/` or any other location — those + belong to other features and will pollute the PR title/body. + + **Implementation setup context** (contains `PLAN_FILE=...`): + $implement-setup.output + + --- + + ## Step 1: Push Changes + + ```bash + git push -u origin HEAD 2>&1 || true + ``` + + ## Step 2: Generate Summary + + ```bash + git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD + git diff --stat $(git merge-base HEAD $BASE_BRANCH)..HEAD + ``` + + Read the plan file from the `PLAN_FILE` path you parsed above (NOT any plan in + `docs/plans/`). The feature name for the PR title comes from the + `# Feature: {Title}` heading of THIS plan file only. + + Read progress tracking from `$ARTIFACTS_DIR/progress.txt` for task completion + context. + + ## Step 3: Create PR (if not already created) + + ```bash + gh pr view HEAD --json url 2>/dev/null || echo "NO_PR" + ``` + + If no PR exists: + + ```bash + cat .github/pull_request_template.md 2>/dev/null || echo "NO_TEMPLATE" + ``` + + Create with `gh pr create --draft --base $BASE_BRANCH`: + - Title from the plan's feature name + - Body summarizing the implementation + - Use a HEREDOC for the body + + ## Step 4: Output Summary + + ``` + =============================================================== + PIV LOOP — COMPLETE + =============================================================== + + Feature: {from plan} + Plan: {plan file path} + Branch: {branch name} + PR: {url} + + -- Tasks Completed ----------------------------------------------- + {list from progress tracking} + + -- Commits ------------------------------------------------------- + {git log output} + + -- Files Changed ------------------------------------------------- + {git diff --stat output} + + -- Validation ---------------------------------------------------- + All checks passed. + =============================================================== + ``` diff --git a/.archon/workflows/defaults/archon-piv-loop.yaml b/.archon/workflows/defaults/archon-piv-loop.yaml index 7227900c2f..e32a1a41f2 100644 --- a/.archon/workflows/defaults/archon-piv-loop.yaml +++ b/.archon/workflows/defaults/archon-piv-loop.yaml @@ -520,6 +520,11 @@ nodes: --- ``` + Archon monitors durable progress across iterations using the current git HEAD + plus `.claude/archon/plans/progress.txt`. If multiple iterations finish + without a new commit or a new completed-task entry, the loop will stop as + stuck so a human can inspect the blocked task directly. + --- ## Phase 5: COMPLETE — Check All Tasks @@ -533,6 +538,8 @@ nodes: until: COMPLETE max_iterations: 15 fresh_context: true + progress_file: ".claude/archon/plans/progress.txt" + stuck_after_no_progress_iterations: 3 # ═══════════════════════════════════════════════════════════════ # PHASE 4: VALIDATE — Automated code review diff --git a/.claude/skills/archon/SKILL.md b/.claude/skills/archon/SKILL.md index f36e7391b8..43080fa128 100644 --- a/.claude/skills/archon/SKILL.md +++ b/.claude/skills/archon/SKILL.md @@ -41,6 +41,7 @@ Determine the user's intent and dispatch to the appropriate guide: | **Create a command file** | Read `references/authoring-commands.md` | | **Variable substitution reference** | Read `references/variables.md` | | **CLI command reference** | Read `references/cli-commands.md` | +| **Debug workflow logs / inspect a run** | Read `references/log-debugging.md` — log locations, layers, filtering, interpretation | | **Run an interactive workflow** | Read `references/interactive-workflows.md` — transparent relay protocol | | **Run a workflow (default)** | Continue with "Running Workflows" below | diff --git a/.claude/skills/archon/references/log-debugging.md b/.claude/skills/archon/references/log-debugging.md new file mode 100644 index 0000000000..51d66feff4 --- /dev/null +++ b/.claude/skills/archon/references/log-debugging.md @@ -0,0 +1,293 @@ +# Archon Log Debugging Reference + +Use this guide when the main job is understanding what Archon just did during a +workflow run, why it failed, why it paused, or where the useful evidence lives. + +## Three Log Layers + +Archon exposes three different evidence surfaces. They overlap, but they are +not interchangeable. + +### 1. Runtime process logs + +Use these when you need to debug Archon itself: startup, config loading, +database errors, adapter issues, API route failures, or unexpected process +behavior. + +- Output goes to the current terminal or process log sink +- Verbosity is controlled by `LOG_LEVEL` +- `archon --verbose ...` sets the CLI logger to `debug` + +Examples: + +```bash +LOG_LEVEL=debug archon workflow list +LOG_LEVEL=debug archon workflow run archon-assist "help me debug this run" +LOG_LEVEL=debug bun run dev +``` + +### 2. Per-run workflow JSONL logs + +Use these when you need the raw workflow trace for one run: assistant messages, +tool calls, node boundaries, validation events, and workflow-level failures. + +Default location: + +```text +~/.archon/workspaces///logs/.jsonl +``` + +If `ARCHON_HOME` is set, replace `~/.archon` with that directory. + +### 3. Web UI and API run details + +Use these when you want a quick run summary, node progress, artifacts, and the +conversation view without opening the raw JSONL file. + +- Web UI run details show node state, logs, and artifacts +- `GET /api/workflows/runs/:runId` returns the run plus lean DB events +- `archon workflow status --verbose` gives a CLI summary of active runs + +Important: the UI/API event stream is intentionally lean. It does not replace +the raw JSONL file when you need the full assistant or tool trace. + +## What Each Layer Contains + +### Runtime process logs + +Best for: + +- startup and shutdown failures +- SQLite or PostgreSQL connection errors +- API route errors +- adapter or orchestration errors +- configuration problems + +### Workflow JSONL logs + +Best for: + +- a single run's assistant output +- tool inputs for that run +- node-by-node flow +- validation pass/fail details +- interactive workflow pause output + +The raw JSONL logger writes these event types: + +- `workflow_start` +- `workflow_complete` +- `workflow_error` +- `assistant` +- `tool` +- `validation` +- `node_start` +- `node_complete` +- `node_skipped` +- `node_error` + +### UI/API events + +Best for: + +- current node status +- elapsed time and progress +- artifacts +- recent workflow state in the app + +Expect the naming to differ slightly from the JSONL file. The UI/API layer is +built from `remote_agent_workflow_events` and persisted messages, so event names +such as `node_started` or `tool_called` may appear there instead of the raw +JSONL names. + +## Quick Triage Order + +Use this sequence unless you already know the failing layer: + +1. Get the run ID and current status. +2. Look at the UI run details or `archon workflow status --verbose`. +3. Open the per-run JSONL file for the full trace. +4. Turn on `LOG_LEVEL=debug` or `--verbose` only if the current evidence is too + thin. +5. Return to process logs if the failure looks like Archon runtime behavior + rather than workflow logic. + +## Finding the Run + +For active runs: + +```bash +archon workflow status +archon workflow status --verbose +archon workflow status --json +archon workflow status --json --verbose +``` + +If you already have the run ID, locate the file directly: + +```bash +find "${ARCHON_HOME:-$HOME/.archon}/workspaces" -name ".jsonl" 2>/dev/null +``` + +## Reading the JSONL File + +Set a shell variable first: + +```bash +LOG_FILE="${ARCHON_HOME:-$HOME/.archon}/workspaces///logs/.jsonl" +``` + +Show the last lines: + +```bash +tail -n 40 "$LOG_FILE" +``` + +Search for failures: + +```bash +rg '"type":"workflow_error"|"type":"node_error"' "$LOG_FILE" +``` + +Search for one node: + +```bash +rg '"step":"implement"' "$LOG_FILE" +``` + +Search for validations: + +```bash +rg '"type":"validation"' "$LOG_FILE" +``` + +## Filtering Patterns + +### With `rg` + +Assistant messages: + +```bash +rg '"type":"assistant"' "$LOG_FILE" +``` + +Tool calls: + +```bash +rg '"type":"tool"' "$LOG_FILE" +``` + +Skipped nodes: + +```bash +rg '"type":"node_skipped"' "$LOG_FILE" +``` + +### With `jq` if installed + +Latest assistant message: + +```bash +jq -r 'select(.type=="assistant") | .content' "$LOG_FILE" | tail -n 1 +``` + +Node errors with timestamps: + +```bash +jq -c 'select(.type=="node_error") | {ts, step, error}' "$LOG_FILE" +``` + +Validation results: + +```bash +jq -c 'select(.type=="validation") | {ts, step, check, result, error}' "$LOG_FILE" +``` + +## How To Interpret Common Events + +### `workflow_start` + +The run was created and the workflow began. This is the anchor for the rest of +the file. + +### `node_start` and `node_complete` + +The workflow crossed a node boundary. These tell you which step ran, in what +order, and where time was spent. + +### `node_skipped` + +This usually means a `when:` condition or trigger rule prevented the node from +running. It is not necessarily a failure. + +### `node_error` + +The node failed. Start here for step-local failures. + +### `validation` + +A named check ran and produced `pass`, `fail`, `warn`, or `unknown`. + +### `assistant` + +This is the workflow agent's textual output for the run. In interactive +workflows, this is the content you relay back to the user. + +### `tool` + +A raw tool invocation was recorded in the JSONL trace. Use this when you need +to see what the workflow attempted, not just the summarized UI status. + +## Interactive Workflow Note + +For interactive workflows, the important readback pattern is: + +1. get the run ID +2. open the JSONL file +3. extract the last `assistant` event +4. relay its `content` directly + +That is the canonical way to surface pause output from the raw log. + +## UI Versus Raw File + +Use the UI or API when: + +- you need quick node status +- you want artifacts and high-level progress +- you are navigating several runs quickly + +Use the raw JSONL file when: + +- you need the exact assistant text +- you need the raw tool trace +- UI summaries feel incomplete +- you are investigating a single run deeply + +## Common Failure Patterns + +`workflow appears active but progress is unclear`: +Open the JSONL file and check the most recent `assistant`, `tool`, and +`node_*` events. + +`UI shows state but not enough context`: +Use the raw JSONL for the detailed trace. + +`run failed but nothing obvious appears in JSONL`: +Check Archon runtime logs with `LOG_LEVEL=debug`; the problem may be outside the +workflow trace itself. + +`interactive workflow is paused and you need the exact wording`: +Extract the last `assistant` event from the JSONL file. + +## Minimal Operator Checklist + +When debugging a run for someone else, report: + +1. run ID +2. workflow name +3. current status +4. failing node or last completed node +5. most recent assistant output +6. most relevant error or validation event +7. whether the problem looks like workflow logic or Archon runtime behavior diff --git a/.gitignore b/.gitignore index a2f33c5d5c..2f314ac2f3 100644 --- a/.gitignore +++ b/.gitignore @@ -44,9 +44,12 @@ e2e-screenshots/ # Archon logs and artifacts (generated at runtime) .archon/logs/ .archon/artifacts/ +artifacts/ # Agent artifacts (generated, local only) -.agents/ +.agents/* +!.agents/skills/ +!.agents/skills/** .agents/rca-reports/ .agents/plans/ .agents/pr-reviews diff --git a/CLAUDE.md b/CLAUDE.md index 0e902537dd..e54050544a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -47,6 +47,38 @@ - Workspaces automatically sync with origin before worktree creation (ensures latest code) - **NEVER run `git clean -fd`** - it permanently deletes untracked files (use `git checkout .` instead) +**Fork & Upstream Integration (local clone only)** + +This working copy is a fork of `coleam00/Archon`. Remotes are set up fork-first so local customizations survive upstream releases without merge chaos: + +- `origin` → `https://github.com/matzls/Archon.git` (your fork, push access) +- `upstream` → `https://github.com/coleam00/Archon.git` (Cole's repo, read-only) +- `dev` tracks `upstream/dev` — keep it a clean mirror of upstream and never commit directly to it + +**Where different customizations belong:** + +1. **Personal config** — already outside git: `~/.archon/.env`, `~/.archon/config.yaml`, `~/.archon/archon.db`, per-target-repo `.archon/commands/` and `.archon/workflows/`. Upstream never touches these. +2. **Broadly useful code changes** — open a PR from a feature branch on the fork to `coleam00/Archon:dev`. If merged, zero ongoing maintenance. +3. **Personal code changes** — live on a feature branch on the fork; rebase on `upstream/dev` when pulling new releases. + +**Integrating upstream releases:** + +```bash +git fetch upstream +git checkout dev +git merge --ff-only upstream/dev # dev stays a clean mirror of upstream +git checkout feature/my-branch +git rebase dev # replay local commits on top of new dev +git push --force-with-lease origin feature/my-branch +``` + +**Contributing back upstream:** + +```bash +git push -u origin feature/my-branch +gh pr create --repo coleam00/Archon --base dev --head matzls:feature/my-branch +``` + ## Engineering Principles These are implementation constraints, not slogans. Apply them by default. diff --git a/docs/design/codex-first-workflow-surface-strategy.md b/docs/design/codex-first-workflow-surface-strategy.md new file mode 100644 index 0000000000..c051d3f446 --- /dev/null +++ b/docs/design/codex-first-workflow-surface-strategy.md @@ -0,0 +1,440 @@ +--- +title: Codex-First Workflow Surface Strategy +status: draft +created: 2026-04-13 +updated: 2026-04-13 +--- + +# Design Doc: Codex-First Workflow Surface Strategy + +## 1. Purpose + +Define what this fork should mean by "Codex-first" for local Archon usage without +degrading or muddying the original/cloud/Claude implementation surface. + +This document is not an implementation PRD. It defines: +- the target Codex workflow surface for this fork +- which current workflows are strong enough to keep +- which current workflows should be removed or deferred +- what counts as real Codex parity versus misleading pseudo-parity +- the adaptation rules future Codex workflows must satisfy before they are + shipped as defaults + +This document is the design and policy anchor for follow-on implementation PRDs +and plans. + +## 2. Problem Statement + +This fork now has a meaningful Codex surface, but it is uneven. + +Some Codex workflows are genuinely adapted and useful, especially +`archon-piv-loop-codex`. Others currently imply more maturity than they +actually have, especially the provisional `archon-feature-development-codex` +workflow. In addition, some behavior differs between repo-local source usage and +bundled/binary usage because bundled defaults are hardcoded in +`packages/workflows/src/defaults/bundled-defaults.ts`. + +The risk is not only missing functionality. The larger risk is false +confidence: +- a workflow appears Codex-supported but is only a rename or provider patch +- a repo-local workflow exists but is not actually bundled or shipped +- a workflow appears parity-complete while relying on Claude-oriented + assumptions +- documentation overstates what Codex can currently do inside Archon's workflow + runtime + +If this fork is going to be run mainly through Codex, the Codex defaults must +be intentionally curated, clearly documented, and held to a real quality bar. + +## 3. Goals + +### Primary Goals + +- Make this fork clearly and honestly Codex-first for local usage. +- Preserve original/cloud/Claude behavior unless there is a correctness or + shared-runtime parity reason to change it. +- Keep only genuinely Codex-adapted workflows in the default Codex surface. +- Remove or defer thin pseudo-parity workflows. +- Define a repeatable checklist for adapting future workflows to Codex. +- Separate repo-local experiments from truly shipped default assets. + +### Secondary Goals + +- Improve operator clarity for which workflow to use under Codex. +- Reduce bundle drift between source checkout behavior and bundled/binary + behavior. +- Provide a basis for future Codex-specific implementation and workflow-builder + work. + +## 4. Non-Goals + +- Rewriting the Claude/default workflow surface to match Codex. +- Forcing one-to-one migration of Claude-oriented workflow-node features where + Codex support is absent or materially different. +- Claiming parity based on possible SDK analogues that Archon does not yet + expose or validate. +- Implementing all Codex parity improvements in one slice. +- Replacing the current workflow-builder immediately. + +## 5. Current Repo-Grounded State + +### Strong Codex Surface + +- `.archon/workflows/defaults/archon-assist-codex.yaml` +- `.archon/workflows/defaults/archon-piv-loop-codex.yaml` +- `.archon/commands/defaults/archon-assist-codex.md` +- `.agents/skills/archon/SKILL.md` +- `packages/core/src/clients/codex.ts` + +### Weak Or Misleading Codex Surface + +- the provisional `archon-feature-development-codex` workflow +- `.archon/workflows/defaults/archon-workflow-builder.yaml` when interpreted as + Codex-safe +- bundled defaults in `packages/workflows/src/defaults/bundled-defaults.ts`, + which currently lag repo-local Codex assets + +### Current Runtime Constraint Surface + +- Codex client behavior is implemented in `packages/core/src/clients/codex.ts` +- Claude client behavior is implemented in `packages/core/src/clients/claude.ts` +- workflow-level validation and provider-specific restrictions are enforced in + `packages/workflows/src/validator.ts` +- workflow dependency support is described in + `packages/workflows/src/deps.ts` +- default bundled asset behavior is defined in + `packages/workflows/src/defaults/bundled-defaults.ts` +- current orchestrator routing remains assist-centric in + `packages/core/src/orchestrator/prompt-builder.ts` + +## 6. Design Principles + +### 6.1 Honest Capability Boundaries + +A workflow is not "Codex-supported" just because it has `provider: codex` or +because a rough SDK analogue may exist. It is Codex-supported only when the +actual Archon workflow surface, validation rules, runtime behavior, and +operator guidance all line up. + +### 6.2 Codex Defaults Must Be Curated + +Codex-specific defaults in this fork should be few, intentional, and high +quality. A thin or misleading workflow is worse than a missing one. + +### 6.3 Preserve Shared Runtime Where Reasonable + +Shared runtime code should stay shared unless: +- correctness requires a change +- parity requires a shared abstraction improvement +- a Codex-specific branch is unavoidable and contained + +### 6.4 No Fake Parity + +If a workflow cannot cleanly support Codex yet, it should remain: +- Claude-only +- repo-local experimental +- deferred for redesign + +It should not be promoted into the default Codex surface prematurely. + +### 6.5 Source And Bundle Must Not Disagree On Shipped Defaults + +A workflow that is intended as a real default must exist consistently in: +- repo-local defaults +- bundled defaults +- discovery tests +- metadata surfaces where relevant + +## 7. Decision Summary + +| Surface | Decision | Status | +| --- | --- | --- | +| `archon-piv-loop-codex` | Keep as the reference-quality Codex workflow | Keep | +| `archon-assist-codex` | Keep as the general Codex assist lane | Keep | +| `archon-feature-development-codex` | Remove from the default surface for now; rebuild later only if it becomes a real Codex-native workflow | Remove / rebuild later | +| `archon-workflow-builder` | Leave shared/original workflow alone for now; do not treat it as Codex-safe | Defer | +| `archon-workflow-builder-codex` | Design as a separate future workflow, not a patch on the current builder | Future work | +| Codex capability crosswalk doc | Create as supporting reference documentation | Planned | +| Bundled-vs-repo default parity rules | Tighten and test | Planned | + +## 8. Workflow-Specific Decisions + +### 8.1 `archon-piv-loop-codex` + +`.archon/workflows/defaults/archon-piv-loop-codex.yaml` is currently the +strongest Codex-native workflow in the repo. + +Why it stays: +- it is meaningfully adapted for Codex behavior rather than just renamed +- it has explicit loop discipline and operator guidance +- it is already treated as part of the Codex surface +- it is useful as the quality benchmark for future Codex workflows + +Design role: +- reference implementation +- quality bar for future Codex workflow adaptation +- baseline operator experience target + +### 8.2 `archon-assist-codex` + +`.archon/workflows/defaults/archon-assist-codex.yaml` remains the default Codex +assist workflow. + +Why it stays: +- it serves a real routing purpose +- it already has a Codex-specific command surface via + `.archon/commands/defaults/archon-assist-codex.md` +- it is useful as the general entry lane for Codex users + +Constraint: +- it should not become the catch-all substitute for every Codex workflow need +- more specialized Codex workflows should not be forced through assist-centric + routing forever + +### 8.3 `archon-feature-development-codex` + +The provisional `archon-feature-development-codex` workflow should be removed +from the default surface in its current form. + +Why it should be removed: +- it is currently too thin to justify first-class default status +- it does not yet show the same level of Codex-specific adaptation as the PIV + loop +- it creates the impression of feature-development parity without earning it +- it is currently repo-local only rather than a real shipped default + +Future path: +- rebuild from scratch later if the fork needs a real Codex-native + feature-development lane +- reintroduce only after prompt quality, operator guidance, runtime fit, and + bundling/testing all meet the Codex default bar + +### 8.4 `archon-workflow-builder` + +`.archon/workflows/defaults/archon-workflow-builder.yaml` should remain +untouched for now and should not be presented as Codex-safe. + +Why: +- it is shared/original behavior +- it currently carries Claude-oriented assumptions +- forcing mixed-provider pseudo-parity here would create confusion and risk + +Future path: +- create a dedicated `archon-workflow-builder-codex` only when there is a clear + Codex-safe design +- optimize it specifically for Codex-supported workflow authoring, validation, + and operator use + +## 9. Shipped Asset Policy + +This fork needs a stricter distinction between four classes of workflow +surface: + +| Class | Meaning | Allowed Visibility | +| --- | --- | --- | +| Shipped default | Supported, bundled, tested, operator-ready | CLI, UI, docs, bundle | +| Repo-local experimental | Present in repo for development or evaluation, not yet shipped | repo only | +| Deferred / Claude-only | Intentionally not for Codex yet | docs only | +| Misleading pseudo-parity | Looks supported but is not actually ready | not allowed | + +A workflow must satisfy all of the following before it is considered a shipped +Codex default: +- genuinely Codex-adapted prompt and operator behavior +- valid against Codex workflow constraints +- bundled in shipped defaults if intended as a default +- covered by basic discovery and asset-parity tests +- described honestly in docs and metadata +- not dependent on unsupported Claude-only fields or assumptions + +## 10. Codex Workflow Adaptation Checklist + +Any future Codex-specific workflow must pass this checklist before being added +to the default surface. + +### 10.1 Routing And Identity + +- Is the provider explicit? +- Is the workflow name honest and specific? +- Does routing send the user to this workflow for the right class of task? +- Is the workflow distinguishable from assist-only routing? + +### 10.2 Prompt And Operator Quality + +- Is the prompt written for Codex behavior rather than copied from Claude? +- Are stop/continue semantics explicit? +- Are validation and iteration expectations scoped and concrete? +- Is the operator guidance at least parity quality with the Claude/default + equivalent? + +### 10.3 Runtime Fit + +- Does it avoid unsupported or ignored Codex workflow fields? +- Does it avoid fake support for node capabilities that Archon does not expose + on Codex? +- Are tool, sandbox, network, and reasoning assumptions aligned with actual + Codex runtime behavior? + +### 10.4 Shipped Asset Completeness + +- Is the workflow present in repo defaults if intended? +- Is it present in bundled defaults if intended? +- Is any required command or supporting doc present and discoverable? +- Are repo-local-only docs clearly treated as repo-local support material, not + shipped runtime assets? + +### 10.5 Testing And Observability + +- Is there at least one test or validation assertion proving it is + discoverable? +- If bundled, is bundled inclusion tested? +- Are key routing assumptions covered? +- Does the runtime produce enough operator-visible evidence to debug failures? + +### 10.6 Parity Honesty + +- Is parity real, degraded, or intentionally absent? +- If degraded, is the limitation documented clearly? +- If a feature is unsupported, is the workflow redesigned instead of awkwardly + translated? + +## 11. Capability Crosswalk Policy + +Future parity work must distinguish between: +- currently implemented Archon Codex support +- possible Codex SDK analogue +- unsupported in current Archon workflow/runtime surface +- intentionally left Claude-only + +This fork should not claim parity based on theoretical analogue alone. + +### Current Crosswalk Policy + +- reasoning-effort style controls may map cleanly if already exposed through + Codex runtime wiring +- sandbox and network controls may map cleanly if already exposed and validated +- system-prompt or instruction-lane parity must be proven in Archon's actual + Codex integration before being claimed +- hooks, per-node controls, MCP shape, and similar workflow-node capabilities + must be treated cautiously and documented as unsupported until verified + +Supporting document planned: +- a focused reference doc comparing Claude-oriented runtime/workflow fields + against Codex runtime/workflow equivalents or gaps + +That reference should drive future implementation decisions, not speculative +assumptions. + +## 12. Repo-Local vs Bundled Rules + +The fork must clearly separate source-checkout convenience from bundled product +truth. + +```mermaid +flowchart LR + A["Repo defaults"] --> B["Source discovery"] + C["Bundled imports"] --> D["Binary defaults"] + B --> E["Local source truth"] + D --> F["Shipped product truth"] +``` + +Rules: +- if a workflow is meant to be a true default, repo discovery and bundled + imports must agree +- if a workflow is experimental, it must not be described as a real shipped + default +- bundled asset tests must fail when intended defaults drift from the bundle + set +- repo-local supporting docs may remain repo-only if they are not needed in + bundled runtime behavior + +Implication: +- repo-only README material such as + `.archon/workflows/defaults/archon-piv-loop-codex.README.md` is acceptable + when it is clearly operator support documentation rather than a bundled + runtime dependency + +## 13. Routing Target State + +The desired routing model for this fork is: + +- `archon-assist-codex` for general Codex assistance +- `archon-piv-loop-codex` for iterative Codex-native implementation and + validation loops where that lane fits +- no `archon-feature-development-codex` until a real Codex-native + implementation exists +- no claim that `archon-workflow-builder` is Codex-safe +- future specialized Codex workflows added only after passing the adaptation + checklist + +This keeps the visible Codex surface smaller, clearer, and more trustworthy. + +## 14. Planned Follow-On Documents + +This design doc should be followed by small, scoped implementation documents +rather than one large execution plan. + +### 14.1 Supporting Reference Doc + +Codex vs Claude workflow/runtime capability crosswalk: +- actual implemented Archon support +- likely analogues worth investigating +- unsupported fields +- redesign-required areas + +### 14.2 Implementation PRD A + +Codex workflow surface cleanup: +- align visible default surfaces and metadata to the actual Codex lane set +- tighten bundle/default parity expectations +- remove thin pseudo-parity from default routing + +### 14.3 Implementation PRD B + +Codex-native feature-development workflow v2: +- design from scratch +- define prompt structure, iteration contract, validation scope, and operator + guidance +- benchmark against `archon-piv-loop-codex` + +### 14.4 Implementation PRD C + +Codex workflow-builder variant: +- separate workflow +- explicitly Codex-safe authoring and guidance contract +- no mixed-provider ambiguity + +## 15. Recommended Implementation Sequence + +1. Tighten shipped-vs-experimental asset rules and bundled parity checks. +2. Write the Codex-vs-Claude capability crosswalk reference. +3. Decide whether a Codex-native feature-development lane is actually needed. +4. If needed, design and implement it from scratch. +5. Only after that, design a Codex-safe workflow-builder variant. + +This sequence prioritizes trustworthiness and clarity before expansion. + +## 16. Risks + +| Risk | Why it matters | Mitigation | +| --- | --- | --- | +| Over-claiming Codex parity | Creates operator confusion and brittle workflows | Keep unsupported areas explicit | +| Bundle drift | Source behavior differs from shipped behavior | Add asset parity rules and tests | +| Over-expanding Codex defaults too early | Increases maintenance and pseudo-parity risk | Keep the default set intentionally small | +| Shared-runtime churn | Could destabilize original/cloud behavior | Prefer contained, justified changes only | +| Rebuilding too soon without a capability crosswalk | Risks repeating thin adaptation | Write the crosswalk doc first | + +## 17. Final Position + +This fork should be Codex-first by being intentionally narrower and more +truthful, not by mirroring every Claude/default surface immediately. + +Near-term Codex-first means: +- keep the strong Codex workflows +- remove the thin one +- defer the builder +- document the real capability boundary +- add new Codex workflows only when they are genuinely adapted and + operator-ready + +That is the path to a first-class Codex fork without degrading the +original/cloud implementation. diff --git a/docs/prd/workflow-node-display-names.prd.md b/docs/prd/workflow-node-display-names.prd.md new file mode 100644 index 0000000000..eaa792270b --- /dev/null +++ b/docs/prd/workflow-node-display-names.prd.md @@ -0,0 +1,364 @@ +--- +title: Workflow Node Display Names +status: draft +created: 2026-04-13 +updated: 2026-04-13 +--- + +# PRD: Workflow Node Display Names + +## 1. Problem Statement + +**Who has this problem:** Mase as the primary operator of Archon workflows, including +when running several workflows in parallel and needing to understand them quickly. +Secondary user: a technically capable observer who did not author the workflow — in +practice, future-Mase reviewing a completed run after some delay, or another observer +reviewing progress without deep knowledge of the workflow internals. They understand the +high-level goal but cannot be expected to decode raw YAML internals from the graph. + +**What problem they face:** When opening the workflow execution graph today, node labels +are too generic or internal. For non-command nodes — loop, script, approval, bash, +prompt — the label shown is either the raw `node.id` (the machine identifier from the +YAML) or a hardcoded type string like "Prompt" or "Shell". Neither tells the observer +what the node *does* in this workflow. The pain is immediate: you can see nodes +executing but cannot tell what is happening or what each step's purpose is. + +**Why it cannot be solved today by naming YAML better:** The display problem is not +purely about author discipline. Even with a descriptive `id`, the execution surface +does not reliably surface useful human-readable names for non-command nodes. Command +nodes get reasonable labels (the command name), but every other node type falls back +to the raw id or a hardcoded generic string. There is a structural gap: no schema +field exists to carry a human intent label distinct from the machine id, and no +inference logic exists to derive one from node content. + +**Why now:** Archon workflows are in active daily use and this surfaced immediately as +a usability problem. Quick comprehension across multiple runs without reverse-engineering +node ids matters from the first day of use. + +--- + +## 2. Evidence + +- **Verified in code:** `dag-executor.ts` emits `nodeName: node.command ?? node.id` + for command/prompt nodes and `node.id` for all other types (bash, script, loop, + approval, cancel). Raw node id is the fallback for the majority of node types. +- **Verified in code:** `WorkflowCanvas.tsx` `resolveNodeLabel()` (line 25–29) only + handles `'command'`, `'prompt'`, `'bash'`; returns hardcoded `'Prompt'` or `'Shell'` + for non-command types. Loop, script, approval, cancel are not handled. +- **Verified in code:** `ExecutionDagNode.tsx` (line 59) renders `data.label` directly. + For execution nodes, label is the same `data.label` field as the builder. Loop nodes + only get a type badge `'LOOP'` with no descriptive label. +- **Verified in schema:** `packages/workflows/src/schemas/dag-node.ts` — no + `display_name` field exists in any of the 7 node type schemas (CommandNode, + PromptNode, BashNode, ScriptNode, LoopNode, ApprovalNode, CancelNode). +- **Verified in DB:** `migrations/012_workflow_events.sql` — `step_name` column stores + `node.id`; no separate display label column in the events table. +- **Verified in events:** `event-emitter.ts` `NodeStartedEvent`, `NodeCompletedEvent`, + `NodeFailedEvent`, `NodeSkippedEvent` all carry `nodeName` field; its value is + populated by `dag-executor.ts` using the logic above. + +--- + +## 3. Proposed Solution + +Add an optional `display_name` field to the DAG node schema so workflow authors can +attach a human-readable label to any node. Wire that field through the execution event +pipeline and through the web UI graph components so both live and historical graph views +show the label. When `display_name` is absent, apply a resolution chain that infers a +meaningful label from available node content (phase 2) rather than falling back to the +raw id. + +This extends existing primitives: the schema, event emitter, and graph components +already have the structural slots needed. No new tables, no new API endpoints, and no +changes to the `step_name` DB contract are required for the MVP. + +--- + +## 4. Key Hypothesis + +If workflow authors can optionally provide a `display_name` on any node, and the graph +view shows that name as the primary label, then operators and observers will be able to +understand what each node does without inspecting raw YAML or decoding internal ids. + +The hypothesis is testable: after the change, open a workflow graph and ask whether +each node's label explains its purpose without additional context. + +--- + +## 5. What We're NOT Building + +- **Workflow Builder canvas changes.** The builder (`WorkflowBuilderPage`, `WorkflowCanvas.tsx`, `DagNodeComponent.tsx`) is phase 2. Phase 1 must not change builder rendering or editing behavior. +- **Non-graph execution surfaces.** The currently-executing banner, progress list, and log-derived step labels are phase 2. Phase 1 only changes labels on the execution graph node cards. +- **Database / event-contract changes.** `step_name` in `remote_agent_workflow_events` + remains `node.id`. We do not add a `display_name` column to the events table. The + display label is resolved at read/render time from the workflow definition, not stored + in event history. +- **Sophisticated NLP inference.** Stripping boilerplate like "You are ..." or + summarizing multi-paragraph prompts is out of scope for v1. Simple truncation only. +- **Retroactive relabeling of old runs.** Historical runs will benefit from inference + fallbacks if the workflow YAML is still available, but there is no backfill job. +- **Per-platform display_name variants.** One label per node; no locale or platform + override concept. +- **Approval / cancel node detailed labels** beyond what the display_name field or + simple inference provides. + +--- + +## 6. Success Metrics + +**Primary (qualitative, operator-assessed):** +- When opening a workflow execution graph, every node has a label that explains its + purpose without requiring the operator to inspect raw YAML or node ids. +- For workflows where `display_name` is set, no raw ids or generic hardcoded strings + appear as primary labels. +- For workflows without `display_name`, the inferred label (phase 2) is more meaningful + than the current fallback. + +**Observable signal (phase 1):** +- 0 nodes in a display_name-annotated workflow show a raw `node.id` as their primary + label in the execution graph. + +**Observable signal (phase 2):** +- For prompt and loop nodes, the inferred label visibly reflects the intent from the + first line of the prompt content (truncated to 80 chars), not "Prompt" or the node id. + +--- + +## 7. Open Questions + +| # | Question | Current Answer | +|---|----------|----------------| +| 1 | What truncation length for inferred labels? | 80 characters; adjust in a follow-on if needed. | +| 2 | Should boilerplate stripping be applied (e.g. "You are ...")? | No. Over-engineering for v1; skip. | +| 3 | How should loop nodes be labeled by inference? | Use the first 80 chars of `node.loop.prompt` (the inner prompt text). | +| 4 | How should script nodes be labeled by inference? | Use the script filename from `node.script` if it references a file, else first non-blank line. | +| 5 | Does `display_name` need to appear in Workflow Builder canvas? | Out of scope for this slice; follow-on. | +| 6 | Should `display_name` be stored in DB events for the observer path? | No — resolve at render time from the definition. Keeps DB contract clean. | + +--- + +## 8. Users & Context + +**Primary user:** Mase as Archon workflow operator. Runs workflows daily, sometimes +several in parallel. Needs quick comprehension of what is happening in any graph view, +including mid-run and post-run review after a delay. + +**Secondary user:** Technically capable observer who did not author the workflow. +Understands the high-level goal. Should not need to know raw node ids or internal YAML +structure to read the graph. + +**JTBD:** +> When I run an Archon workflow and open the graph view, I want to clearly understand +> what each node in the graph is doing or what its job is, so I can understand what the +> agent did or is doing — without decoding internal identifiers. + +**Non-users / out of scope for this slice:** +- Non-technical stakeholders who need a narrative summary (not a graph) +- Workflow authors who want to edit display names in the builder canvas (follow-on) + +--- + +## 9. Solution Detail + +### MoSCoW Table + +| Priority | Item | Notes | +|----------|------|-------| +| **Must** | `display_name?: string` field in `dagNodeSchema` | Optional; backward-compatible | +| **Must** | Execution graph node cards show `display_name` when present | Scope limited to graph node cards | +| **Must** | Execution graph shows meaningful static fallback labels for loop, script, approval, and cancel nodes | Keep this local to execution graph in phase 1 | +| **Should** | Inference fallback for prompt nodes: first 80 chars of `node.prompt` | Phase 2 | +| **Should** | Inference fallback for loop nodes: first 80 chars of `node.loop.prompt` | Phase 2 | +| **Should** | Inference fallback for bash nodes: first non-comment line of `node.bash` | Phase 2 | +| **Should** | Inference fallback for script nodes: filename or first non-blank line | Phase 2 | +| **Could** | Tooltip showing full prompt/script on hover when label is truncated | Phase 2 or 3 | +| **Won't** | Builder canvas display_name editing | Out of scope this slice | +| **Won't** | DB event contract changes | Out of scope | +| **Won't** | Boilerplate stripping from prompts | Over-engineering for v1 | + +### MVP Definition + +Phase 1 is the minimum viable increment: +1. Add `display_name?: string` to the schema. +2. Regenerate frontend API types so the web app receives the new field. +3. Resolve labels in the execution graph from the workflow definition, using `display_name` when present. +4. Keep simple static fallbacks for node kinds without `display_name`: command name, `Shell`, `Prompt`, `Loop`, `Script`, `Approval`, `Cancel`. + +Phase 2 adds builder support, non-graph execution-surface updates, and optional inference so workflows without `display_name` still show more meaningful labels derived from content. + +--- + +## 10. Technical Approach + +All paths verified against the codebase at the time of writing. + +### Schema Extension +**File:** `packages/workflows/src/schemas/dag-node.ts` + +Add `display_name: z.string().optional()` to the shared `dagNodeBaseSchema` (the fields +common to all node types). This automatically makes it available on all 7 node type +schemas without touching each union branch. Because `dagNodeSchema` is a discriminated +union built on per-type schemas, the shared base approach is the lowest-change path. + +Currently the schema has no `display_name` field. The `id` field is the stable +machine identifier and must not be changed or overloaded. + +### Web UI — Execution Graph Label +**Primary files:** `packages/web/src/components/workflows/WorkflowDagViewer.tsx`, `packages/web/src/components/workflows/ExecutionDagNode.tsx` + +Phase 1 should be execution-graph-only. The execution graph already receives the full workflow definition via `dagNodes`, so it can resolve a display label directly from the definition without changing workflow events, DB contracts, or non-graph execution surfaces. + +Recommended phase-1 behavior: +- `display_name` wins when present +- otherwise use a simple static per-type fallback +- do not attempt prompt/script inference yet +- do not change builder rendering paths in this phase + +Recommended execution-only resolver shape: +```typescript +function resolveExecutionNodeLabel(dn: DagNode): string { + if (dn.display_name) return dn.display_name; + if ('command' in dn && dn.command) return dn.command; + if ('bash' in dn && dn.bash) return 'Shell'; + if ('loop' in dn && dn.loop) return 'Loop'; + if ('script' in dn && dn.script) return 'Script'; + if ('approval' in dn && dn.approval) return 'Approval'; + if ('cancel' in dn && dn.cancel) return 'Cancel'; + return 'Prompt'; +} +``` + +`WorkflowDagViewer.tsx` can apply this resolver when building the execution node data from `dagNodes`. + +`ExecutionDagNode.tsx` should be updated only as needed so the execution graph can show correct badges/colors for any newly distinguished node kinds used in phase 1. + +### Builder Isolation +`packages/web/src/lib/dag-layout.ts` is shared by execution and builder loading. Because phase 1 must stay execution-only, avoid using a shared resolver there for this first slice. Keep builder rendering behavior unchanged until phase 2. + +### DagNodeData Interface +**File:** `packages/web/src/components/workflows/DagNodeComponent.tsx` + +If phase 1 keeps label resolution inside the execution graph path, builder-facing `DagNodeData` can stay unchanged. Only add new shared node-type values there in phase 1 if the execution implementation truly requires them. Prefer keeping this untouched until phase 2 if possible. + +### Type Regeneration +**File:** `packages/web/src/lib/api.generated.d.ts` + +After adding `display_name` to the schema and running the server, run: +```bash +bun --filter @archon/web generate:types +``` +This regenerates `api.generated.d.ts` from the OpenAPI spec so the web package sees +the new field via `DagNode` from `@/lib/api`. + +### Phase 2 — Inference Helpers +**New utility function** (suggest placing in `packages/workflows/src/utils/` or +inline in `dag-executor.ts`): + +```typescript +function inferNodeLabel(node: DagNode, maxLen = 80): string { + if (node.display_name) return node.display_name; + if (node.command) return node.command; + if (node.prompt) return node.prompt.slice(0, maxLen).trimEnd(); + if (node.loop?.prompt) return node.loop.prompt.slice(0, maxLen).trimEnd(); + if (node.bash) return firstNonCommentLine(node.bash) ?? 'Shell'; + if (node.script) return firstNonBlankLine(node.script) ?? 'Script'; + if (node.approval) return node.approval.message?.slice(0, maxLen) ?? 'Approval'; + return node.id; +} +``` + +Apply in both `dag-executor.ts` (events) and the web UI label-building path. + +### No DB or API Changes Required (Phase 1) +- `remote_agent_workflow_events.step_name` remains `node.id` — no migration needed in phase 1. +- No new API endpoints needed; `display_name` rides through the existing workflow definition returned by `GET /api/workflows/:name`. +- The existing `GET /api/workflows/:name` route already returns the full workflow definition including node fields, so `display_name` will be available in the response automatically after schema extension. +- Non-graph execution surfaces may still show raw ids after phase 1 because they are driven from workflow events, not from graph-definition label resolution. That is a deliberate phase-1 tradeoff. + +--- + +## 11. Implementation Phases + +### Phase 1 — Explicit display_name (MVP) + +| # | Task | File(s) | Notes | +|---|------|---------|-------| +| 1.1 | Add `display_name?: string` to dagNodeBaseSchema | `packages/workflows/src/schemas/dag-node.ts` | Shared base; one change covers all types | +| 1.2 | Regenerate frontend API types | `packages/web/src/lib/api.generated.d.ts` | `bun --filter @archon/web generate:types` | +| 1.3 | Add execution-only label resolver using workflow definition | `packages/web/src/components/workflows/WorkflowDagViewer.tsx` | `display_name` first, then simple static fallback | +| 1.4 | Update execution node badges/colors only if needed for newly distinguished kinds | `packages/web/src/components/workflows/ExecutionDagNode.tsx` | Keep changes local to execution graph | +| 1.5 | Run `bun run validate` | All packages | type-check, lint, format, tests | + +**Parallel opportunities in Phase 1:** +- Schema/type regeneration can proceed ahead of the execution-graph UI update, but the slice is small enough that sequential implementation is likely cleaner. + +### Phase 2 — Inference Fallbacks + +| # | Task | File(s) | Notes | +|---|------|---------|-------| +| 2.1 | Add builder compatibility for `display_name` and expanded node kinds | `packages/web/src/lib/dag-layout.ts`, `packages/web/src/components/workflows/DagNodeComponent.tsx`, builder surfaces | Shared builder/render path | +| 2.2 | Update non-graph execution surfaces to show display labels instead of raw ids | `packages/web/src/components/workflows/WorkflowExecution.tsx`, `DagNodeProgress.tsx`, `WorkflowLogs.tsx` | Currently executing banner, progress list, log labels | +| 2.3 | Decide whether to emit/persist display labels in events for better historical fidelity | executor/event/SSE/read models as needed | Optional, depends on how much post-hoc accuracy matters | +| 2.4 | Add simple inference fallback | shared helper + graph/render paths | 80-char truncation; no NLP | +| 2.5 | Optional: add truncation tooltip in ExecutionDagNode | `packages/web/src/components/workflows/ExecutionDagNode.tsx` | Show full text on hover | +| 2.6 | Run `bun run validate` | All packages | | + +--- + +## 12. Decisions Log + +| Decision | Rationale | +|----------|-----------| +| `display_name` is optional, not required | Backward-compatible; existing workflows continue to work unchanged | +| `step_name` in DB events stays as `node.id` | Preserves machine-stable identity for event correlation; display is a UI concern | +| Display label resolved at render time, not stored in events | Keeps DB contract clean; label can be updated by editing the workflow YAML without migrating historical data | +| Phase 1 skips inference | Reduces scope; explicit labeling is the highest-value unblocked step | +| Phase 1 is execution-graph-only | Keeps blast radius small and avoids shared builder/event paths | +| Builder canvas excluded from phase 1 | Separate creation surface; move to phase 2 with shared-rendering adjustments | +| Non-graph execution surfaces excluded from phase 1 | They depend on event-driven names and can be addressed coherently in phase 2 | +| Boilerplate stripping excluded | Over-engineering for v1; simple truncation at 80 chars is sufficient | +| `display_name` added to shared base schema, not per-type | One change covers all 7 node types; no per-type duplication | +| No new API endpoints needed | `display_name` rides through the existing workflow definition response | + +--- + +## Validation Notes + +**Validated against codebase at:** `packages/workflows/src/schemas/dag-node.ts`, +`packages/workflows/src/dag-executor.ts`, `packages/web/src/lib/dag-layout.ts`, +`packages/web/src/components/workflows/WorkflowDagViewer.tsx`, +`packages/web/src/components/workflows/ExecutionDagNode.tsx`, +`packages/web/src/components/workflows/DagNodeComponent.tsx`, +`packages/web/src/components/workflows/WorkflowCanvas.tsx`, +`packages/web/src/routes/WorkflowExecutionPage.tsx`, +`packages/workflows/src/schemas/loop.ts`, +`migrations/012_workflow_events.sql` + +**Corrections made during validation:** + +1. **Critical — wrong file for execution label building.** The PRD originally stated + `WorkflowExecutionPage.tsx` constructs execution node labels. That page only renders + `` (2 lines). Current execution labels come from shared web DAG + helpers consumed by `WorkflowDagViewer.tsx`, but phase 1 was then narrowed further to + avoid shared builder paths and keep label resolution local to the execution graph. + +2. **`resolveNodeDisplay()` gap confirmed.** The function currently falls through loop, + script, approval, cancel nodes to the `'Prompt'` branch — verified in source. This + is the root cause of the display problem for those node types, but because the helper is shared with builder loading it should be handled in phase 2 unless phase 1 explicitly accepts builder impact. + +3. **`loop.prompt` field name confirmed correct.** `loopNodeConfigSchema` in + `packages/workflows/src/schemas/loop.ts` uses `prompt` as the field name. The + inference reference `node.loop.prompt` in the PRD is accurate. + +4. **`resolveNodeLabel()` in `WorkflowCanvas.tsx` is builder-only.** It is called only + at lines 154 and 266 of `WorkflowCanvas.tsx` (drag-create paths). It is NOT used in + the execution graph — `resolveNodeDisplay()` in `dag-layout.ts` is the execution + path. PRD updated to clarify scope and defer builder changes. + +5. **`dagNodeBaseSchema` name verified correct** at line 113 of `dag-node.ts`. + +6. **`approval.message` field name verified correct** at line 249 of `dag-node.ts`. + +7. **`packages/workflows/src/utils/` directory confirmed to exist** with existing + utilities (variable-substitution, tool-formatter, idle-timeout). Phase 2 inference + helper can be placed here. diff --git a/eslint.config.mjs b/eslint.config.mjs index 69bf635bd5..ec06379d32 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -40,7 +40,7 @@ export default tseslint.config( // Project-specific settings { - files: ['packages/*/src/**/*.{ts,tsx}'], + files: ['packages/*/src/**/*.{ts,tsx}', '.archon/scripts/**/*.{ts,tsx}'], languageOptions: { parserOptions: { projectService: true, diff --git a/packages/cli/package.json b/packages/cli/package.json index bd8c7390bf..f7b4a4c05d 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -8,7 +8,7 @@ }, "scripts": { "cli": "bun src/cli.ts", - "test": "bun test src/commands/version.test.ts src/commands/setup.test.ts && bun test src/commands/workflow.test.ts && bun test src/commands/isolation.test.ts && bun test src/commands/chat.test.ts && bun test src/commands/serve.test.ts", + "test": "bun test src/commands/version.test.ts src/commands/setup.test.ts && bun test src/commands/workflow.test.ts && bun test src/commands/continue.test.ts && bun test src/commands/isolation.test.ts && bun test src/commands/chat.test.ts && bun test src/commands/serve.test.ts", "type-check": "bun x tsc --noEmit" }, "dependencies": { diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index d7dedf4810..4afad07651 100755 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -123,7 +123,7 @@ Options: --quiet, -q Reduce log verbosity to warnings and errors only --verbose, -v Show debug-level output --json Output machine-readable JSON (for workflow list) - --workflow Workflow to run for 'continue' (default: archon-assist) + --workflow Workflow to run for 'continue' (default: archon-assist or archon-assist-codex by assistant) --no-context Skip context injection for 'continue' --allow-env-keys Grant env-key consent during auto-registration (bypasses the env-leak gate for this codebase; diff --git a/packages/cli/src/commands/continue.test.ts b/packages/cli/src/commands/continue.test.ts new file mode 100644 index 0000000000..3da05480b9 --- /dev/null +++ b/packages/cli/src/commands/continue.test.ts @@ -0,0 +1,124 @@ +import { describe, test, expect, mock, beforeEach, spyOn } from 'bun:test'; +import { continueCommand } from './continue'; + +const mockWorkflowRunCommand = mock(async () => {}); +const mockFindActiveByBranchName = mock(async () => ({ + codebase_id: 'cb-1', + working_path: '/tmp/project', + branch_name: 'feature/test', +})); +const mockGetCodebase = mock(async () => ({ + id: 'cb-1', + ai_assistant_type: 'claude', +})); +const mockLoadConfig = mock(async () => ({ assistant: 'claude' })); +const mockFindLatestRunByWorkingPath = mock(async () => null); + +mock.module('./workflow', () => ({ + workflowRunCommand: mockWorkflowRunCommand, +})); + +mock.module('@archon/core/db/isolation-environments', () => ({ + findActiveByBranchName: mockFindActiveByBranchName, +})); + +mock.module('@archon/core/db/codebases', () => ({ + getCodebase: mockGetCodebase, +})); + +mock.module('@archon/core/db/workflows', () => ({ + findLatestRunByWorkingPath: mockFindLatestRunByWorkingPath, +})); + +mock.module('@archon/core', () => ({ + loadConfig: mockLoadConfig, +})); + +mock.module('@archon/git', () => ({ + execFileAsync: mock(async () => ({ stdout: '', stderr: '' })), +})); + +mock.module('@archon/paths', () => ({ + createLogger: () => ({ + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + }), + getRunArtifactsPath: mock(() => '/tmp/artifacts'), + parseOwnerRepo: mock(() => null), +})); + +describe('continueCommand', () => { + const consoleLogSpy = spyOn(console, 'log').mockImplementation(() => {}); + + beforeEach(() => { + mockWorkflowRunCommand.mockClear(); + mockFindActiveByBranchName.mockClear(); + mockGetCodebase.mockClear(); + mockLoadConfig.mockClear(); + mockFindLatestRunByWorkingPath.mockClear(); + consoleLogSpy.mockClear(); + }); + + test('defaults to Claude assist workflow for Claude codebases', async () => { + mockGetCodebase.mockResolvedValueOnce({ id: 'cb-1', ai_assistant_type: 'claude' }); + + await continueCommand('feature/test', 'continue please', { noContext: true }); + + expect(mockWorkflowRunCommand).toHaveBeenCalledWith( + '/tmp/project', + 'archon-assist', + 'continue please', + expect.objectContaining({ noWorktree: true, codebaseId: 'cb-1' }) + ); + }); + + test('defaults to Codex assist workflow for Codex codebases', async () => { + mockGetCodebase.mockResolvedValueOnce({ id: 'cb-1', ai_assistant_type: 'codex' }); + + await continueCommand('feature/test', 'continue please', { noContext: true }); + + expect(mockWorkflowRunCommand).toHaveBeenCalledWith( + '/tmp/project', + 'archon-assist-codex', + 'continue please', + expect.objectContaining({ noWorktree: true, codebaseId: 'cb-1' }) + ); + }); + + test('falls back to config when codebase assistant is unavailable', async () => { + mockGetCodebase.mockResolvedValueOnce(null); + mockLoadConfig.mockResolvedValueOnce({ assistant: 'codex' }); + + await continueCommand('feature/test', 'continue please', { noContext: true }); + + expect(mockWorkflowRunCommand).toHaveBeenCalledWith( + '/tmp/project', + 'archon-assist-codex', + 'continue please', + expect.objectContaining({ noWorktree: true, codebaseId: 'cb-1' }) + ); + }); + + test('respects explicit workflow override', async () => { + mockGetCodebase.mockResolvedValueOnce({ id: 'cb-1', ai_assistant_type: 'codex' }); + + await continueCommand('feature/test', 'continue please', { + noContext: true, + workflow: 'archon-smart-pr-review', + }); + + expect(mockWorkflowRunCommand).toHaveBeenCalledWith( + '/tmp/project', + 'archon-smart-pr-review', + 'continue please', + expect.objectContaining({ noWorktree: true, codebaseId: 'cb-1' }) + ); + }); +}); diff --git a/packages/cli/src/commands/continue.ts b/packages/cli/src/commands/continue.ts index bec7d79c67..ce2819f0a3 100644 --- a/packages/cli/src/commands/continue.ts +++ b/packages/cli/src/commands/continue.ts @@ -5,6 +5,7 @@ import { workflowRunCommand } from './workflow'; import * as isolationDb from '@archon/core/db/isolation-environments'; import * as codebaseDb from '@archon/core/db/codebases'; import * as workflowDb from '@archon/core/db/workflows'; +import { loadConfig } from '@archon/core'; import { execFileAsync } from '@archon/git'; import { createLogger, getRunArtifactsPath, parseOwnerRepo } from '@archon/paths'; import type { WorkflowRun } from '@archon/workflows/schemas/workflow-run'; @@ -24,6 +25,7 @@ export interface ContinueOptions { } const DEFAULT_WORKFLOW = 'archon-assist'; +const DEFAULT_CODEX_WORKFLOW = 'archon-assist-codex'; /** * Continue work on an existing worktree with prior run context injected. @@ -37,8 +39,6 @@ export async function continueCommand( userMessage: string, options: ContinueOptions = {} ): Promise { - const workflowName = options.workflow ?? DEFAULT_WORKFLOW; - // 1. Resolve branch → isolation environment const env = await isolationDb.findActiveByBranchName(branch); if (!env) { @@ -48,6 +48,9 @@ export async function continueCommand( ); } + const workflowName = + options.workflow ?? (await getDefaultContinueWorkflow(env.working_path, env.codebase_id)); + // 2. Find prior run on this worktree path const priorRun = await workflowDb.findLatestRunByWorkingPath(env.working_path); @@ -86,6 +89,27 @@ export async function continueCommand( } } +async function getDefaultContinueWorkflow(cwd: string, codebaseId: string): Promise { + try { + const codebase = await codebaseDb.getCodebase(codebaseId); + if (codebase?.ai_assistant_type === 'codex') { + return DEFAULT_CODEX_WORKFLOW; + } + if (codebase?.ai_assistant_type === 'claude') { + return DEFAULT_WORKFLOW; + } + } catch { + // Fall through to config lookup. + } + + try { + const config = await loadConfig(cwd); + return config.assistant === 'codex' ? DEFAULT_CODEX_WORKFLOW : DEFAULT_WORKFLOW; + } catch { + return DEFAULT_WORKFLOW; + } +} + /** * Build a markdown context preamble from git state and prior run artifacts. * Each section is independently try/caught — failures produce empty strings, never throw. diff --git a/packages/cli/src/commands/workflow.test.ts b/packages/cli/src/commands/workflow.test.ts index 7f13f8d83f..c10a4f7106 100644 --- a/packages/cli/src/commands/workflow.test.ts +++ b/packages/cli/src/commands/workflow.test.ts @@ -25,6 +25,12 @@ const mockLogger = { child: mock(() => mockLogger), }; +const mockFsAccess = mock(() => Promise.resolve()); + +mock.module('fs/promises', () => ({ + access: mockFsAccess, +})); + // Mock @archon/paths (createLogger moved here from @archon/core) mock.module('@archon/paths', () => ({ createLogger: mock(() => mockLogger), @@ -345,6 +351,8 @@ describe('workflowRunCommand', () => { consoleSpy = spyOn(console, 'log').mockImplementation(() => {}); mockLogger.warn.mockClear(); mockLogger.info.mockClear(); + mockFsAccess.mockClear(); + mockFsAccess.mockResolvedValue(undefined); }); afterEach(() => { @@ -363,6 +371,16 @@ describe('workflowRunCommand', () => { ); }); + it('fails early when Archon home is not writable for workflow state', async () => { + mockFsAccess.mockRejectedValueOnce( + Object.assign(new Error('Operation not permitted'), { code: 'EPERM' }) + ); + + await expect(workflowRunCommand('/test/path', 'assist', 'hello')).rejects.toThrow( + "requires write access to '/home/test/.archon'" + ); + }); + it('should throw error when workflow not found', async () => { const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ diff --git a/packages/cli/src/commands/workflow.ts b/packages/cli/src/commands/workflow.ts index 89dd5911e4..d95dad1c90 100644 --- a/packages/cli/src/commands/workflow.ts +++ b/packages/cli/src/commands/workflow.ts @@ -1,6 +1,9 @@ /** * Workflow command - list and run workflows */ +import { constants as fsConstants } from 'fs'; +import { access } from 'fs/promises'; +import { dirname, join } from 'path'; import { registerRepository, loadConfig, @@ -79,6 +82,56 @@ function generateConversationId(): string { return `cli-${String(timestamp)}-${random}`; } +async function assertArchonStateWritable(commandName: string): Promise { + if (process.env.DATABASE_URL) { + return; + } + + const archonHome = getArchonHome(); + const dbPath = join(archonHome, 'archon.db'); + const homeAccessTarget = dirname(archonHome); + + try { + await access(archonHome, fsConstants.W_OK); + } catch (error) { + const err = error as NodeJS.ErrnoException; + if (err.code === 'ENOENT') { + try { + await access(homeAccessTarget, fsConstants.W_OK); + return; + } catch (parentError) { + const parentErr = parentError as NodeJS.ErrnoException; + throw new Error( + `Archon CLI '${commandName}' requires write access to '${homeAccessTarget}' ` + + `so it can create '${archonHome}' and its SQLite state.\n` + + `Current failure: ${parentErr.message}\n` + + `Fix: rerun outside the outer workspace sandbox or grant write access to '${homeAccessTarget}'.` + ); + } + } + throw new Error( + `Archon CLI '${commandName}' requires write access to '${archonHome}' ` + + `because local workflow state uses SQLite at '${dbPath}' when DATABASE_URL is unset.\n` + + `Current failure: ${err.message}\n` + + `Fix: rerun outside the outer workspace sandbox or grant write access to '${archonHome}'.` + ); + } + + try { + await access(dbPath, fsConstants.W_OK); + } catch (error) { + const err = error as NodeJS.ErrnoException; + if (err.code === 'ENOENT') { + return; + } + throw new Error( + `Archon CLI '${commandName}' requires write access to '${dbPath}'.\n` + + `Current failure: ${err.message}\n` + + `Fix: rerun outside the outer workspace sandbox or grant write access to '${archonHome}'.` + ); + } +} + /** Render a workflow event to stderr as a progress line. Called only when --quiet is not set. */ function renderWorkflowEvent(event: WorkflowEmitterEvent, verbose: boolean): void { switch (event.type) { @@ -210,6 +263,8 @@ export async function workflowRunCommand( userMessage: string, options: WorkflowRunOptions = {} ): Promise { + await assertArchonStateWritable('workflow run'); + const { workflows: workflowEntries, errors } = await loadWorkflows(cwd); if (workflowEntries.length === 0 && errors.length === 0) { @@ -279,9 +334,10 @@ export async function workflowRunCommand( conversation = await conversationDb.getOrCreateConversation('cli', conversationId); } catch (error) { const err = error as Error; - throw new Error( - `Failed to access database: ${err.message}\nHint: Check that DATABASE_URL is set and the database is running.` - ); + const readOnlyHint = err.message.toLowerCase().includes('readonly') + ? `\nHint: Archon needs write access to '${getArchonHome()}' (SQLite state lives there when DATABASE_URL is unset).` + : '\nHint: Check that DATABASE_URL is set and the database is running.'; + throw new Error(`Failed to access database: ${err.message}${readOnlyHint}`); } // Try to find a codebase for this directory @@ -806,6 +862,7 @@ export async function workflowStatusCommand(json?: boolean, verbose?: boolean): * findResumableRun picks up the prior failed run and skips completed nodes. */ export async function workflowResumeCommand(runId: string): Promise { + await assertArchonStateWritable('workflow resume'); const run = await resumeWorkflowOp(runId); if (!run.working_path) { throw new Error( @@ -839,6 +896,7 @@ export async function workflowResumeCommand(runId: string): Promise { * Abandon a workflow run by ID (marks it as cancelled). */ export async function workflowAbandonCommand(runId: string): Promise { + await assertArchonStateWritable('workflow abandon'); const run = await abandonWorkflow(runId); console.log(`Abandoned workflow run: ${runId}`); console.log(`Workflow: ${run.workflow_name}`); @@ -849,6 +907,7 @@ export async function workflowAbandonCommand(runId: string): Promise { * Writes the approval events and transitions to 'failed' for auto-resume. */ export async function workflowApproveCommand(runId: string, comment?: string): Promise { + await assertArchonStateWritable('workflow approve'); const result = await approveWorkflow(runId, comment); // CLI auto-resumes after approval (unlike chat, which defers to next user message) @@ -905,6 +964,7 @@ export async function workflowApproveCommand(runId: string, comment?: string): P * Reject a paused workflow run by ID (marks it as cancelled). */ export async function workflowRejectCommand(runId: string, reason?: string): Promise { + await assertArchonStateWritable('workflow reject'); const result = await rejectWorkflow(runId, reason); if (result.cancelled) { @@ -965,6 +1025,7 @@ export async function workflowRejectCommand(runId: string, reason?: string): Pro * Delete terminal workflow runs older than the given number of days. */ export async function workflowCleanupCommand(days: number): Promise { + await assertArchonStateWritable('workflow cleanup'); try { const { count } = await workflowDb.deleteOldWorkflowRuns(days); if (count === 0) { diff --git a/packages/core/src/db/workflows.test.ts b/packages/core/src/db/workflows.test.ts index bbbfa6ccf4..28d0548db6 100644 --- a/packages/core/src/db/workflows.test.ts +++ b/packages/core/src/db/workflows.test.ts @@ -332,6 +332,22 @@ describe('workflows database', () => { expect(params).toContain(JSON.stringify({ error: 'Timeout exceeded' })); }); + test('merges optional metadata into failure payload', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([], 1)); + + await failWorkflowRun('workflow-run-123', 'Timeout exceeded', { + node_counts: { completed: 1, failed: 1, skipped: 0, total: 2 }, + }); + + const [, params] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(params).toContain( + JSON.stringify({ + error: 'Timeout exceeded', + node_counts: { completed: 1, failed: 1, skipped: 0, total: 2 }, + }) + ); + }); + test('throws when rowCount is 0', async () => { mockQuery.mockResolvedValueOnce(createQueryResult([], 0)); @@ -618,7 +634,7 @@ describe('workflows database', () => { }); describe('failOrphanedRuns', () => { - test('transitions all running runs to failed with completed_at and returns count', async () => { + test('transitions non-CLI running runs to failed with completed_at and returns count', async () => { mockQuery.mockResolvedValueOnce(createQueryResult([], 2)); const result = await failOrphanedRuns(); @@ -628,6 +644,7 @@ describe('workflows database', () => { expect(query).toContain("status = 'failed'"); expect(query).toContain('completed_at = NOW()'); expect(query).toContain("status = 'running'"); + expect(query).toContain("platform_type != 'cli'"); expect(params).toContain(JSON.stringify({ failure_reason: 'server_restart' })); }); diff --git a/packages/core/src/db/workflows.ts b/packages/core/src/db/workflows.ts index 0abfb0474d..2b9d5011ff 100644 --- a/packages/core/src/db/workflows.ts +++ b/packages/core/src/db/workflows.ts @@ -471,7 +471,11 @@ export async function completeWorkflowRun( } } -export async function failWorkflowRun(id: string, error: string): Promise { +export async function failWorkflowRun( + id: string, + error: string, + metadata?: Record +): Promise { const dialect = getDialect(); let result: Awaited>; try { @@ -479,7 +483,7 @@ export async function failWorkflowRun(id: string, error: string): Promise `UPDATE remote_agent_workflow_runs SET status = 'failed', completed_at = ${dialect.now()}, metadata = ${dialect.jsonMerge('metadata', 2)} WHERE id = $1 AND status = 'running'`, - [id, JSON.stringify({ error })] + [id, JSON.stringify({ error, ...(metadata ?? {}) })] ); } catch (dbError) { const err = dbError as Error; @@ -828,6 +832,8 @@ export async function updateWorkflowActivity(id: string): Promise { /** * Transition all 'running' workflow runs to 'failed'. * Called on server startup to mark runs orphaned by process termination. + * Excludes CLI-owned runs because they may still be executing in a separate + * process while sharing the same database with the server. * The next invocation of the same workflow at the same path will auto-resume * from completed nodes via findResumableRun. */ @@ -839,7 +845,10 @@ export async function failOrphanedRuns(): Promise<{ count: number }> { SET status = 'failed', completed_at = ${dialect.now()}, metadata = ${dialect.jsonMerge('metadata', 1)} - WHERE status = 'running'`, + WHERE status = 'running' + AND conversation_id IN ( + SELECT id FROM remote_agent_conversations WHERE platform_type != 'cli' + )`, [JSON.stringify({ failure_reason: 'server_restart' })] ); const count = result.rowCount ?? 0; diff --git a/packages/core/src/orchestrator/orchestrator-agent.test.ts b/packages/core/src/orchestrator/orchestrator-agent.test.ts index 70080cc01a..a89ae47da8 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.test.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.test.ts @@ -125,6 +125,15 @@ mock.module('../db/workflow-events', () => ({ createWorkflowEvent: mockCreateWorkflowEvent, })); +// Mock db/messages so handleMessage persistence hooks (for non-web platforms) +// don't try to open a real DB connection. addMessage is the only function we +// exercise in these tests. +const mockAddMessage = mock(() => Promise.resolve({} as unknown)); +mock.module('../db/messages', () => ({ + addMessage: mockAddMessage, + listMessages: mock(() => Promise.resolve([])), +})); + mock.module('../config/config-loader', () => ({ loadConfig: mock(() => Promise.resolve({})), })); @@ -1407,3 +1416,151 @@ describe('discoverAllWorkflows — merge repo workflows over global', () => { expect(mockDiscoverWorkflowsWithConfig).toHaveBeenCalledTimes(2); }); }); + +// ─── Telegram user-message persistence ──────────────────────────────────────── + +/** + * These tests cover the `platform.getPlatformType() === 'telegram'` persistence + * gate added to handleMessage. They verify that: + * 1. natural-language telegram messages are persisted with role='user' + * 2. deterministic slash commands skip persistence (stay ephemeral) + * 3. web conversations do NOT trigger the centralized path (web's existing + * PersistenceBuffer still owns the web flow) + * + * Assistant-message persistence hooks (inside handleStreamMode / handleBatchMode + * and in the top-level catch) are not covered here — they require mocking + * sendQuery to yield actual content, which is out of scope for this test batch. + * Track as a follow-up. + */ +function makeTelegramPlatform(): IPlatformAdapter { + return { + sendMessage: mock(() => Promise.resolve()), + ensureThread: mock((id: string) => Promise.resolve(id)), + getStreamingMode: mock(() => 'stream' as const), + getPlatformType: mock(() => 'telegram'), + start: mock(() => Promise.resolve()), + stop: mock(() => {}), + }; +} + +describe('telegram user-message persistence', () => { + beforeEach(() => { + mockAddMessage.mockClear(); + mockGetOrCreateConversation.mockReset(); + mockGetCodebase.mockReset(); + mockListCodebases.mockReset(); + mockDiscoverWorkflowsWithConfig.mockReset(); + mockHandleCommand.mockReset(); + mockParseCommand.mockReset(); + + mockGetOrCreateConversation.mockImplementation(() => Promise.resolve(null)); + mockGetCodebase.mockImplementation(() => Promise.resolve(null)); + mockListCodebases.mockImplementation(() => Promise.resolve([])); + mockDiscoverWorkflowsWithConfig.mockImplementation(() => + Promise.resolve({ workflows: [], errors: [] }) + ); + mockHandleCommand.mockImplementation(() => + Promise.resolve({ success: true, message: 'ok', workflow: undefined }) + ); + mockParseCommand.mockImplementation((text: string) => { + const matches = text.match(/"[^"]+"|'[^']+'|\S+/g) ?? []; + if (matches.length === 0 || !matches[0] || !matches[0].startsWith('/')) { + return { command: '', args: [] }; + } + + return { + command: matches[0].slice(1), + args: matches.slice(1).map(arg => { + if ( + (arg.startsWith('"') && arg.endsWith('"')) || + (arg.startsWith("'") && arg.endsWith("'")) + ) { + return arg.slice(1, -1); + } + return arg; + }), + }; + }); + }); + + test('natural-language telegram message is persisted as user turn', async () => { + const conversation = makeConversation({ + id: 'telegram-conv-db-id', + platform_type: 'telegram', + platform_conversation_id: '8579582275', + title: null, + }); + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(conversation)); + + const platform = makeTelegramPlatform(); + await handleMessage(platform, '8579582275', 'What does the orchestrator do?'); + + // user message persisted exactly once + expect(mockAddMessage).toHaveBeenCalled(); + const userCalls = mockAddMessage.mock.calls.filter(c => c[1] === 'user'); + expect(userCalls).toHaveLength(1); + expect(userCalls[0]?.[0]).toBe('telegram-conv-db-id'); + expect(userCalls[0]?.[2]).toBe('What does the orchestrator do?'); + expect(userCalls[0]?.[3]).toEqual({ platformType: 'telegram' }); + }); + + test('deterministic slash command (/help) skips persistence', async () => { + const conversation = makeConversation({ + id: 'telegram-conv-db-id', + platform_type: 'telegram', + platform_conversation_id: '8579582275', + title: null, + }); + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(conversation)); + mockHandleCommand.mockReturnValueOnce( + Promise.resolve({ success: true, message: 'help text', workflow: undefined }) + ); + + const platform = makeTelegramPlatform(); + await handleMessage(platform, '8579582275', '/help'); + + // /help must not persist anything — neither user nor assistant row + const userCalls = mockAddMessage.mock.calls.filter(c => c[1] === 'user'); + const assistantCalls = mockAddMessage.mock.calls.filter(c => c[1] === 'assistant'); + expect(userCalls).toHaveLength(0); + expect(assistantCalls).toHaveLength(0); + }); + + test('slash-prefixed AI prompt (/etc/hosts) is persisted as user turn', async () => { + const conversation = makeConversation({ + id: 'telegram-conv-db-id', + platform_type: 'telegram', + platform_conversation_id: '8579582275', + title: null, + }); + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(conversation)); + const platform = makeTelegramPlatform(); + await handleMessage(platform, '8579582275', '/etc/hosts'); + + const userCalls = mockAddMessage.mock.calls.filter(c => c[1] === 'user'); + expect(userCalls).toHaveLength(1); + expect(userCalls[0]?.[0]).toBe('telegram-conv-db-id'); + expect(userCalls[0]?.[2]).toBe('/etc/hosts'); + expect(userCalls[0]?.[3]).toEqual({ platformType: 'telegram' }); + expect(mockHandleCommand).not.toHaveBeenCalled(); + }); + + test('web platform does not trigger centralized persistence path', async () => { + const conversation = makeConversation({ + id: 'web-conv-db-id', + platform_type: 'web', + platform_conversation_id: 'web-test-1', + title: 'Web Test', + }); + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(conversation)); + + // Default makePlatform() returns platform_type='web' + const platform = makePlatform(); + await handleMessage(platform, 'web-test-1', 'Hello from the web UI'); + + // Centralized path is gated to telegram only — web is handled by its own + // PersistenceBuffer at the server layer, not by this code path. + const userCalls = mockAddMessage.mock.calls.filter(c => c[1] === 'user'); + expect(userCalls).toHaveLength(0); + }); +}); diff --git a/packages/core/src/orchestrator/orchestrator-agent.ts b/packages/core/src/orchestrator/orchestrator-agent.ts index 97d989f47c..430b71f719 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.ts @@ -20,6 +20,7 @@ import { ConversationNotFoundError } from '../types'; import * as db from '../db/conversations'; import * as codebaseDb from '../db/codebases'; import * as sessionDb from '../db/sessions'; +import * as messageDb from '../db/messages'; import * as commandHandler from '../handlers/command-handler'; import { formatToolCall } from '@archon/workflows/utils/tool-formatter'; import { classifyAndFormatError } from '../utils/error-formatter'; @@ -61,6 +62,18 @@ function getLog(): ReturnType { const MAX_BATCH_ASSISTANT_CHUNKS = 20; /** Max total chunks (assistant + tool) to keep in batch mode */ const MAX_BATCH_TOTAL_CHUNKS = 200; +const DETERMINISTIC_COMMANDS = new Set([ + 'help', + 'status', + 'reset', + 'workflow', + 'register-project', + 'update-project', + 'remove-project', + 'commands', + 'init', + 'worktree', +]); // ─── Types ────────────────────────────────────────────────────────────────── @@ -98,6 +111,15 @@ function findCodebaseByName( }); } +function isDeterministicCommandMessage(message: string): boolean { + if (!message.startsWith('/')) { + return false; + } + + const { command } = commandHandler.parseCommand(message); + return DETERMINISTIC_COMMANDS.has(command); +} + /** * Parse orchestrator commands from AI response text. * Scans for /invoke-workflow and /register-project patterns. @@ -458,8 +480,13 @@ function buildFullPrompt( : undefined; const systemPrompt = scopedCodebase - ? buildProjectScopedPrompt(scopedCodebase, codebases, workflows) - : buildOrchestratorPrompt(codebases, workflows); + ? buildProjectScopedPrompt( + scopedCodebase, + codebases, + workflows, + scopedCodebase.ai_assistant_type || conversation.ai_assistant_type + ) + : buildOrchestratorPrompt(codebases, workflows, conversation.ai_assistant_type); const contextSuffix = issueContext ? '\n\n---\n\n## Additional Context\n\n' + issueContext : ''; @@ -502,11 +529,15 @@ export async function handleMessage( ): Promise { const { issueContext, threadContext, parentConversationId, isolationHints, attachedFiles } = context ?? {}; + // Hoisted so the top-level catch block can persist error messages for + // non-web platforms (telegram, etc.) — see the catch handler at the end + // of handleMessage for the rationale. + let conversation: Conversation | undefined; try { getLog().debug({ conversationId }, 'orchestrator_message_received'); // 1. Get/create conversation and inherit thread context - let conversation = await db.getOrCreateConversation( + conversation = await db.getOrCreateConversation( platform.getPlatformType(), conversationId, undefined, @@ -529,6 +560,27 @@ export async function handleMessage( ); } + // 1d. Persist inbound user message for non-web chat platforms (currently + // telegram only; broaden once slack/discord/github webhook replay is + // audited). Web's PersistenceBuffer and the HTTP routes already own the + // web path. Deterministic slash commands (/help, /status, etc.) skip + // this by design — they're ephemeral utility chatter, not conversation + // content. Gated before the approval-routing block on purpose so that + // natural-language approval responses ARE captured. + if (platform.getPlatformType() === 'telegram' && !isDeterministicCommandMessage(message)) { + try { + await messageDb.addMessage(conversation.id, 'user', message, { + platformType: 'telegram', + }); + } catch (persistErr) { + getLog().error( + { err: toError(persistErr), conversationId: conversation.id }, + 'telegram_user_message_persistence_failed' + ); + // Swallow — persistence failure must not break the user-facing reply. + } + } + // Natural-language approval routing — if a workflow is paused in this // conversation, treat any non-slash message as the approval response. if (!message.startsWith('/')) { @@ -647,20 +699,8 @@ export async function handleMessage( // 2. Check for deterministic commands if (message.startsWith('/')) { const { command } = commandHandler.parseCommand(message); - const deterministicCommands = [ - 'help', - 'status', - 'reset', - 'workflow', - 'register-project', - 'update-project', - 'remove-project', - 'commands', - 'init', - 'worktree', - ]; - - if (deterministicCommands.includes(command)) { + + if (DETERMINISTIC_COMMANDS.has(command)) { if (command === 'register-project') { getLog().debug({ command, conversationId }, 'deterministic_command'); const result = await handleRegisterProject(message, platform, conversationId); @@ -809,6 +849,24 @@ export async function handleMessage( } catch (sendError) { getLog().error({ err: toError(sendError), conversationId }, 'error_notification_failed'); } + // Persist the error response as the assistant turn for non-web chat + // platforms. Without this, the Web UI view of a telegram conversation + // would show an orphan user row with no assistant counterpart whenever + // the orchestrator throws. Skipped silently if the conversation lookup + // failed before reaching getOrCreateConversation. + if (conversation && platform.getPlatformType() === 'telegram') { + try { + await messageDb.addMessage(conversation.id, 'assistant', userMessage, { + platformType: 'telegram', + error: true, + }); + } catch (persistErr) { + getLog().error( + { err: toError(persistErr), conversationId: conversation.id }, + 'telegram_error_message_persistence_failed' + ); + } + } } } @@ -925,7 +983,22 @@ async function handleStreamMode( return; } - // Text was already streamed — nothing more to send + // Text was already streamed — nothing more to send. + // Persist the assistant turn for non-web chat platforms (telegram today). + // Gated AFTER the retract branches above so retracted text is never saved, + // matching the semantics MessagePersistence.retractLastSegment uses on web. + if (platform.getPlatformType() === 'telegram' && fullResponse.trim().length > 0) { + try { + await messageDb.addMessage(conversation.id, 'assistant', fullResponse, { + platformType: 'telegram', + }); + } catch (persistErr) { + getLog().error( + { err: toError(persistErr), conversationId: conversation.id }, + 'telegram_assistant_message_persistence_failed' + ); + } + } } // ─── Batch Mode ───────────────────────────────────────────────────────────── @@ -1061,6 +1134,23 @@ async function handleBatchMode( // No orchestrator commands — send the clean response getLog().debug({ messageLength: finalMessage.length }, 'sending_final_message'); await platform.sendMessage(conversationId, finalMessage); + + // Persist the assistant turn for non-web chat platforms (telegram today). + // Placed after the successful send so we don't record messages the user + // never actually saw. Same retract semantics as stream mode — the two + // early returns above (workflowInvocation, projectRegistration) skip this. + if (platform.getPlatformType() === 'telegram' && finalMessage.trim().length > 0) { + try { + await messageDb.addMessage(conversation.id, 'assistant', finalMessage, { + platformType: 'telegram', + }); + } catch (persistErr) { + getLog().error( + { err: toError(persistErr), conversationId: conversation.id }, + 'telegram_assistant_message_persistence_failed' + ); + } + } } // ─── Orchestrator Command Handlers ────────────────────────────────────────── diff --git a/packages/core/src/orchestrator/orchestrator.test.ts b/packages/core/src/orchestrator/orchestrator.test.ts index d5e81038da..86770ed4b5 100644 --- a/packages/core/src/orchestrator/orchestrator.test.ts +++ b/packages/core/src/orchestrator/orchestrator.test.ts @@ -619,7 +619,11 @@ describe('orchestrator-agent handleMessage', () => { await handleMessage(platform, 'chat-456', 'help me'); expect(mockListCodebases).toHaveBeenCalled(); - expect(mockBuildOrchestratorPrompt).toHaveBeenCalledWith([mockCodebase], expect.any(Array)); + expect(mockBuildOrchestratorPrompt).toHaveBeenCalledWith( + [mockCodebase], + expect.any(Array), + 'claude' + ); }); test('builds project-scoped prompt when conversation has codebase_id', async () => { @@ -636,7 +640,8 @@ describe('orchestrator-agent handleMessage', () => { expect(mockBuildProjectScopedPrompt).toHaveBeenCalledWith( mockCodebase, [mockCodebase], - expect.any(Array) + expect.any(Array), + 'claude' ); }); diff --git a/packages/core/src/orchestrator/prompt-builder.test.ts b/packages/core/src/orchestrator/prompt-builder.test.ts index 7a734950b1..6d7cccbd23 100644 --- a/packages/core/src/orchestrator/prompt-builder.test.ts +++ b/packages/core/src/orchestrator/prompt-builder.test.ts @@ -1,5 +1,5 @@ import { describe, test, expect } from 'bun:test'; -import { buildRoutingRulesWithProject } from './prompt-builder'; +import { buildRoutingRulesWithProject, getAssistWorkflowName } from './prompt-builder'; describe('buildRoutingRulesWithProject', () => { test('routing rules include --prompt in invocation format', () => { @@ -30,4 +30,22 @@ describe('buildRoutingRulesWithProject', () => { expect(rules).toContain('NO knowledge of the conversation history'); }); + + test('uses Codex assist workflow when assistant type is codex', () => { + const rules = buildRoutingRulesWithProject('my-project', 'codex'); + + expect(rules).toContain('**archon-assist-codex**'); + expect(rules).toContain('/invoke-workflow archon-assist-codex --project my-project'); + }); +}); + +describe('getAssistWorkflowName', () => { + test('returns Codex assist workflow for codex assistant', () => { + expect(getAssistWorkflowName('codex')).toBe('archon-assist-codex'); + }); + + test('returns Claude assist workflow by default', () => { + expect(getAssistWorkflowName('claude')).toBe('archon-assist'); + expect(getAssistWorkflowName()).toBe('archon-assist'); + }); }); diff --git a/packages/core/src/orchestrator/prompt-builder.ts b/packages/core/src/orchestrator/prompt-builder.ts index d5f307db5b..b8482240c4 100644 --- a/packages/core/src/orchestrator/prompt-builder.ts +++ b/packages/core/src/orchestrator/prompt-builder.ts @@ -6,6 +6,10 @@ import type { Codebase } from '../types'; import type { WorkflowDefinition } from '@archon/workflows/schemas/workflow'; +export function getAssistWorkflowName(assistantType?: string): string { + return assistantType === 'codex' ? 'archon-assist-codex' : 'archon-assist'; +} + /** * Format a single project for the orchestrator prompt. */ @@ -40,18 +44,19 @@ export function formatWorkflowSection(workflows: readonly WorkflowDefinition[]): /** * Build the routing rules section of the prompt. */ -export function buildRoutingRules(): string { - return buildRoutingRulesWithProject(); +export function buildRoutingRules(assistantType?: string): string { + return buildRoutingRulesWithProject(undefined, assistantType); } /** * Build the routing rules section, optionally scoped to a specific project. * When projectName is provided, rule #4 defaults to that project instead of asking. */ -export function buildRoutingRulesWithProject(projectName?: string): string { +export function buildRoutingRulesWithProject(projectName?: string, assistantType?: string): string { const rule4 = projectName ? `4. If ambiguous which project → use **${projectName}** (the active project)` : '4. If ambiguous which project → ask the user'; + const assistWorkflow = getAssistWorkflowName(assistantType); return `## Routing Rules @@ -77,15 +82,15 @@ Rules: Routing behavior: - If the user clearly wants work done (e.g., "create a plan for X", "implement Y", "fix Z") → include a brief explanation of what you're doing, then invoke the workflow. -- If the user is asking a question or it's unclear whether they want a workflow → answer their question directly. You may suggest a workflow by name (e.g., "I can run the **archon-assist** workflow for this if you'd like"), but do NOT include /invoke-workflow in your response. +- If the user is asking a question or it's unclear whether they want a workflow → answer their question directly. You may suggest a workflow by name (e.g., "I can run the **${assistWorkflow}** workflow for this if you'd like"), but do NOT include /invoke-workflow in your response. Example (clear intent): I'll analyze the orchestrator module architecture for you. -/invoke-workflow archon-assist --project my-project --prompt "Analyze the orchestrator module architecture: explain how it routes messages, manages sessions, and dispatches workflows to AI clients" +/invoke-workflow ${assistWorkflow} --project my-project --prompt "Analyze the orchestrator module architecture: explain how it routes messages, manages sessions, and dispatches workflows to AI clients" Example (ambiguous — answer directly): User: "What do you think about adding dark mode?" -Response: "Adding dark mode would involve... [answer the question]. If you'd like me to create a plan for this, I can run the **archon-idea-to-pr** workflow." +Response: "Adding dark mode would involve... [answer the question]. If you'd like me to create a plan for this, I can run the **archon-idea-to-pr** workflow." ## Project Setup @@ -113,7 +118,8 @@ IMPORTANT: Always clone into ~/.archon/workspaces/{owner}/{repo}/source unless t */ export function buildOrchestratorPrompt( codebases: readonly Codebase[], - workflows: readonly WorkflowDefinition[] + workflows: readonly WorkflowDefinition[], + assistantType?: string ): string { let prompt = `# Archon Orchestrator @@ -138,7 +144,7 @@ You can answer questions directly or invoke workflows for structured development prompt += '## Available Workflows\n\n'; prompt += formatWorkflowSection(workflows); - prompt += buildRoutingRules(); + prompt += buildRoutingRules(assistantType); return prompt; } @@ -151,7 +157,8 @@ You can answer questions directly or invoke workflows for structured development export function buildProjectScopedPrompt( scopedCodebase: Codebase, allCodebases: readonly Codebase[], - workflows: readonly WorkflowDefinition[] + workflows: readonly WorkflowDefinition[], + assistantType?: string ): string { const otherCodebases = allCodebases.filter(c => c.id !== scopedCodebase.id); @@ -179,7 +186,7 @@ ${formatProjectSection(scopedCodebase)} prompt += '## Available Workflows\n\n'; prompt += formatWorkflowSection(workflows); - prompt += buildRoutingRulesWithProject(scopedCodebase.name); + prompt += buildRoutingRulesWithProject(scopedCodebase.name, assistantType); return prompt; } diff --git a/packages/docs-web/src/content/docs/book/essential-workflows.md b/packages/docs-web/src/content/docs/book/essential-workflows.md index 7d2c3bc43e..9fe4462dc0 100644 --- a/packages/docs-web/src/content/docs/book/essential-workflows.md +++ b/packages/docs-web/src/content/docs/book/essential-workflows.md @@ -21,6 +21,7 @@ What do you want to do? │ ├── Ask a question or explore the codebase │ └── archon-assist +│ └── archon-assist-codex │ ├── Fix a bug from a GitHub issue │ └── archon-fix-github-issue @@ -60,6 +61,17 @@ The starting point for anything that doesn't fit elsewhere. It runs a single ful archon workflow run archon-assist "What does the orchestrator do?" archon workflow run archon-assist "Why are tests failing in the auth module?" archon workflow run archon-assist "Explain the isolation system to me" + +#### `archon-assist-codex` + +Codex-tuned catch-all for the same class of work when you want the assist lane +to stay on the Codex side of the house. + +```bash +archon workflow run archon-assist-codex "What does the orchestrator do?" +archon workflow run archon-assist-codex "Why are tests failing in the auth module?" +archon workflow run archon-assist-codex "Explain the isolation system to me" +``` ``` **What it produces**: A direct answer. No PR, no artifacts — just the AI working through your question with full access to your code. @@ -209,6 +221,7 @@ archon workflow run archon-resolve-conflicts "Resolve conflicts on PR #94" | Workflow | Use When | Creates PR? | Uses Isolation? | |----------|----------|-------------|-----------------| | `archon-assist` | Questions, exploration, debugging | No | No | +| `archon-assist-codex` | Questions, exploration, debugging (Codex-tuned) | No | No | | `archon-fix-github-issue` | Fix a GitHub issue (smart routing) | Yes (draft) | Yes | | `archon-idea-to-pr` | Feature from description | Yes | Yes | | `archon-plan-to-pr` | Execute an existing plan | Yes | Yes | @@ -229,7 +242,7 @@ To see all workflows available in your current directory: archon workflow list ``` -The list shows both Archon's bundled defaults and any custom workflows in your repo's `.archon/workflows/` directory. Custom workflows override bundled ones by name — if you create a workflow named `archon-assist`, it replaces the built-in. +The list shows both Archon's bundled defaults and any custom workflows in your repo's `.archon/workflows/` directory. Custom workflows override bundled ones by name — if you create a workflow named `archon-assist`, it replaces the built-in. Codex-tuned variants use the `-codex` suffix, for example `archon-assist-codex`. Ready to build your own? In [Chapter 7: Creating Your First Workflow →](/book/first-workflow/), you'll build one from scratch — incrementally, version by version, until you've got a mini version of `archon-idea-to-pr`. diff --git a/packages/docs-web/src/content/docs/book/first-five-minutes.md b/packages/docs-web/src/content/docs/book/first-five-minutes.md index aec29aa212..3e53d0a4f9 100644 --- a/packages/docs-web/src/content/docs/book/first-five-minutes.md +++ b/packages/docs-web/src/content/docs/book/first-five-minutes.md @@ -54,13 +54,14 @@ Navigate to any git repository on your machine, then run: cd /path/to/your/project archon workflow run archon-assist "What's the entry point for this application?" +archon workflow run archon-assist-codex "What's the entry point for this application?" ``` Archon will analyze your codebase and answer the question with full context. You'll see it thinking through your files in real time, streamed to your terminal. **You just ran your first Archon workflow.** It's a single-step workflow — one command, one AI call, one answer. Simple, but useful. -> **Tip:** `archon-assist` works for any question. "How does auth work?", "Where is the database configured?", "What does this function do?" — it's your always-available codebase expert. +> **Tip:** `archon-assist` is the default catch-all, and `archon-assist-codex` is the Codex-tuned variant. Use the Codex lane when you want general help to stay on Codex-specific prompts and workflow names. --- diff --git a/packages/docs-web/src/content/docs/book/quick-reference.md b/packages/docs-web/src/content/docs/book/quick-reference.md index ede87c0dab..9ed1bcfbd7 100644 --- a/packages/docs-web/src/content/docs/book/quick-reference.md +++ b/packages/docs-web/src/content/docs/book/quick-reference.md @@ -269,7 +269,8 @@ defaults: |-------|-------------|-----| | `Workflow "X" not found` | YAML file not discovered | Check file is in `.archon/workflows/` and `archon workflow list` shows it | | `Command "X" not found` | Command file missing | Check `.archon/commands/X.md` exists and `archon validate commands X` passes | -| `Routing unclear — falling back to archon-assist` | No workflow matched the input | Use an explicit workflow name: `archon workflow run my-workflow "..."` | +| `Routing unclear — falling back to archon-assist` | No workflow matched the input on Claude/default flows | Use an explicit workflow name: `archon workflow run my-workflow "..."` | +| `Routing unclear — falling back to archon-assist-codex` | No workflow matched the input on Codex-oriented flows | Use an explicit workflow name: `archon workflow run my-workflow "..."` | | `Worktree already exists for branch X` | Prior run left a worktree | Run `archon complete X` or `archon isolation cleanup` | | `Not a git repository` | Running outside a repo | `cd` into a git repo first — workflow and isolation commands require one | | `Model X is not valid for provider Y` | Provider/model mismatch | Use Claude models (`sonnet`, `opus`, `haiku`) with `provider: claude`; use other models with `provider: codex` | @@ -303,6 +304,7 @@ archon workflow run my-workflow --no-worktree "..." **Test a command directly** before embedding it in a workflow: ```bash archon workflow run archon-assist "/command-invoke my-command some-arg" +archon workflow run archon-assist-codex "/command-invoke my-command some-arg" ``` ### Getting Help diff --git a/packages/docs-web/src/content/docs/getting-started/overview.md b/packages/docs-web/src/content/docs/getting-started/overview.md index f1d58ae402..65982b649d 100644 --- a/packages/docs-web/src/content/docs/getting-started/overview.md +++ b/packages/docs-web/src/content/docs/getting-started/overview.md @@ -264,6 +264,9 @@ archon workflow list # Ask a question about the codebase archon workflow run archon-assist "How does the auth module work?" +# Codex-tuned catch-all +archon workflow run archon-assist-codex "How does the auth module work?" + # Plan a feature on an isolated branch archon workflow run archon-feature-development --branch feat/dark-mode "Add dark mode" @@ -339,6 +342,7 @@ archon complete --force # skip uncommitted-changes check | Workflow | What It Does | |----------|-------------| | `archon-assist` | General Q&A, debugging, exploration, CI failures — catch-all | +| `archon-assist-codex` | Codex-tuned catch-all for general Q&A, debugging, exploration, and CI failures | | `archon-fix-github-issue` | Investigate, root cause analysis, implement fix, validate, PR | | `archon-idea-to-pr` | Feature idea, plan, implement, validate, PR, parallel reviews, self-fix | | `archon-plan-to-pr` | Execute existing plan, implement, validate, PR, review | @@ -356,11 +360,12 @@ archon complete --force # skip uncommitted-changes check | `archon-remotion-generate` | Generate or modify Remotion video compositions with AI | | `archon-interactive-prd` | Create a PRD through guided conversation | | `archon-piv-loop` | Guided Plan-Implement-Validate development with human-in-the-loop | +| `archon-piv-loop-codex` | Guided Plan-Implement-Validate development using Codex/GPT models | | `archon-adversarial-dev` | Build a complete application from scratch using adversarial development | These bundled workflows work for most projects. To customize, copy one from `.archon/workflows/defaults/` into `.archon/workflows/` and modify it — same-named files override the defaults. -> **Auto-selection:** You don't need to remember workflow names. Just describe what you want — the router reads all workflow descriptions and picks the best match. For example, "fix issue #42" routes to `archon-fix-github-issue`, while "review this PR" routes to `archon-smart-pr-review`. If nothing matches clearly, it falls back to `archon-assist`. +> **Auto-selection:** You don't need to remember workflow names. Just describe what you want — the router reads all workflow descriptions and picks the best match. For example, "fix issue #42" routes to `archon-fix-github-issue`, while "review this PR" routes to `archon-smart-pr-review`. If nothing matches clearly, it falls back to `archon-assist` for Claude and `archon-assist-codex` for Codex-oriented flows. --- diff --git a/packages/docs-web/src/content/docs/guides/authoring-workflows.md b/packages/docs-web/src/content/docs/guides/authoring-workflows.md index 6481aefac7..3ab2efc064 100644 --- a/packages/docs-web/src/content/docs/guides/authoring-workflows.md +++ b/packages/docs-web/src/content/docs/guides/authoring-workflows.md @@ -483,7 +483,7 @@ When a `nodes:` (DAG) workflow fails (including due to a server restart), the ne 3. Completed nodes are skipped; only failed and not-yet-run nodes are executed. 4. You receive a platform message like: `Resuming workflow — skipping 3 already-completed node(s).` -**Server restart**: If a server restart leaves runs in `running` status, they are automatically marked as `failed` on the next startup (with `metadata.failure_reason = 'server_restart'`). The next invocation of the same workflow at the same path auto-resumes from completed nodes. +**Server restart**: If a server restart leaves server-owned runs in `running` status, they are automatically marked as `failed` on the next startup (with `metadata.failure_reason = 'server_restart'`). CLI-owned runs are excluded because they can continue executing outside the server process while sharing the same database. The next invocation of the same workflow at the same path auto-resumes from completed nodes. **Known limitation**: AI session context from prior nodes is not restored. If a downstream node relies on in-context knowledge from a prior run's session (rather than artifacts), it may need to re-read those artifacts explicitly. diff --git a/packages/docs-web/src/content/docs/guides/index.md b/packages/docs-web/src/content/docs/guides/index.md index 0d53209fb6..4acf1eaafe 100644 --- a/packages/docs-web/src/content/docs/guides/index.md +++ b/packages/docs-web/src/content/docs/guides/index.md @@ -34,6 +34,7 @@ Archon ships with ready-to-use workflows that cover common coding tasks. You do | Workflow | What It Does | |----------|-------------| | `archon-assist` | General Q&A, debugging, exploration -- the catch-all | +| `archon-assist-codex` | Codex-tuned catch-all for general Q&A, debugging, and exploration | | `archon-fix-github-issue` | Investigate, root cause, implement fix, validate, PR | | `archon-smart-pr-review` | Complexity-adaptive PR review | | `archon-comprehensive-pr-review` | Multi-agent PR review (5 parallel reviewers) | diff --git a/packages/docs-web/src/content/docs/guides/loop-nodes.md b/packages/docs-web/src/content/docs/guides/loop-nodes.md index 0e9e3eebc3..f22d7cda94 100644 --- a/packages/docs-web/src/content/docs/guides/loop-nodes.md +++ b/packages/docs-web/src/content/docs/guides/loop-nodes.md @@ -70,6 +70,9 @@ the executor checks for workflow cancellation. max_iterations: 10 # Required. Hard limit — node fails if exceeded. fresh_context: true # Optional. Default: false. until_bash: "..." # Optional. Bash script checked after each iteration. + progress_file: "..." # Optional. Progress tracker used for durable-progress checks. + stuck_after_no_progress_iterations: 3 # Optional. Fail early after repeated + # iterations with no new durable progress. interactive: true # Optional. Default: false. Pause after each non-completing # iteration for user input via /workflow approve. gate_message: "..." # Required when interactive: true. Message shown to the @@ -149,6 +152,21 @@ build success. The bash script supports the same variable substitution as `prompt` (`$ARTIFACTS_DIR`, `$nodeId.output`, etc.). Note: `$nodeId.output` values are shell-escaped when substituted into `until_bash`. +### `progress_file` and `stuck_after_no_progress_iterations` + +Use these together when a loop should stop early if iterations are spinning +without durable state changes. + +- `progress_file` points at the task tracker the loop updates as work completes +- `stuck_after_no_progress_iterations` sets how many consecutive non-completing + iterations Archon tolerates when neither of these changed: + - the current git `HEAD` + - the completed-task count in `progress_file` + +This is useful for stateless implementation loops that re-read work from disk on +each iteration. Instead of burning all `max_iterations`, the loop fails early and +surfaces a stuck-task condition for human review. + ## Patterns ### Stateless agent (Ralph pattern) diff --git a/packages/docs-web/src/content/docs/reference/assistant-architecture.md b/packages/docs-web/src/content/docs/reference/assistant-architecture.md new file mode 100644 index 0000000000..5ba1647cb9 --- /dev/null +++ b/packages/docs-web/src/content/docs/reference/assistant-architecture.md @@ -0,0 +1,453 @@ +--- +title: Assistant Architecture +description: How Archon selects Claude or Codex across host skills, conversations, workflows, and nodes, including fork-specific differences and Codex limitations. +category: reference +audience: [developer] +status: current +sidebar: + order: 2 +--- + +This document explains the full assistant-selection stack in Archon. + +It exists to answer questions like: + +- What does "Codex-driven repo" actually mean? +- How is that different from workflow `provider: codex`? +- What is the difference between an Archon skill and workflow-node `skills:`? +- Which parts are standard upstream Archon, and which parts are specific to this fork? +- Which nodes can run on Codex today, and what breaks or degrades? + +## Executive Summary + +Archon has multiple independent selection layers: + +1. **Host harness layer**: Claude Code or Codex is the outer tool you are using to invoke Archon. +2. **Conversation/orchestrator layer**: Archon stores an assistant type on codebases and conversations. That decides whether the top-level Archon conversation runs through the Claude client or the Codex client. +3. **Workflow default provider layer**: When a workflow runs, Archon resolves a default provider for the workflow from workflow YAML or merged config. +4. **Per-node provider/model layer**: Individual AI nodes can override provider and model again. + +Those layers are related, but they are not the same setting. + +## Mental Model + +```mermaid +flowchart TD + U["User request"] --> H["Host harness\nClaude Code or Codex"] + H --> S["Archon host skill\noptional routing layer"] + S --> O["Archon orchestrator conversation"] + O -->|"direct answer"| C["Conversation assistant client\nClaudeClient or CodexClient"] + O -->|"invoke workflow"| W["Workflow executor"] + W --> WD["Workflow default provider"] + WD --> N["Per-node provider/model override"] + N --> R["Actual runtime for the node"] + + NS["workflow node skills:"] -. Claude-only node feature .-> N +``` + +The critical point is that the workflow executor can choose a provider that is different from the top-level conversation assistant. + +## Layer 1: Host Harness + +This is the outer coding agent you are currently running: + +- Claude Code +- Codex + +At this layer, Archon can be exposed through a **host skill** that teaches the outer agent how to call Archon workflows and how to route requests. + +In this repo today: + +- Claude-oriented host skill: `.claude/skills/archon/` +- Codex-oriented host skill: `.agents/skills/archon/` + +This layer is about **how the outer assistant learns to use Archon**. It does **not** decide how workflow nodes run once Archon is executing them. + +## Layer 2: Conversation And Orchestrator Assistant + +Archon stores `ai_assistant_type` on codebases and conversations in the database. That value determines which top-level assistant client powers the Archon conversation. + +What it affects: + +- direct chat answers from Archon +- top-level orchestration and routing +- which default "assist" workflow the orchestrator suggests when routing is unclear + +What it does not automatically affect: + +- the provider used by every workflow node + +### How conversation assistant type is chosen + +At conversation creation time, Archon uses this order: + +1. existing conversation value if conversation already exists +2. parent conversation assistant type when inheriting context +3. codebase `ai_assistant_type` from the database when a codebase is attached +4. `DEFAULT_AI_ASSISTANT` env var +5. built-in default `claude` + +Important implementation detail: + +- the codebase `ai_assistant_type` is currently set when the repo is registered +- registration auto-detects `codex` if a `.codex/` folder exists, otherwise `claude` if a `.claude/` folder exists +- repo `.archon/config.yaml` does **not** currently write back into `remote_agent_codebases.ai_assistant_type` + +So "this repo is Codex-driven in the database" means: + +- the **top-level Archon conversation** for that codebase will use Codex by default +- it does **not** guarantee that every workflow node runs on Codex + +## Layer 3: Workflow Default Provider + +When a workflow starts, Archon resolves a default provider for the workflow separately from the conversation assistant. + +Resolution order: + +1. workflow `provider` +2. infer provider from workflow `model` +3. merged config `assistant` +4. built-in default `claude` + +The merged config order is: + +1. built-in defaults +2. `~/.archon/config.yaml` +3. repo `.archon/config.yaml` +4. environment variables + +That means a workflow can run on: + +- Claude even when the top-level conversation is Codex +- Codex even when the top-level conversation is Claude + +This is the main reason the system can feel confusing if you think there is only one "assistant" switch. + +## Layer 4: Per-Node Provider And Model + +For AI nodes, Archon resolves provider and model again at node execution time. + +Current resolution order for a command or prompt node: + +1. node `provider` +2. infer provider from node `model` +3. workflow default provider + +Model resolution: + +1. node `model` +2. workflow model if provider matches workflow provider +3. config default model for that provider + +This means a single workflow can mix: + +- mostly Claude nodes with a few Codex nodes +- mostly Codex nodes with a few Claude nodes + +provided those nodes do not rely on features unsupported by the chosen provider. + +## Two Different Meanings Of "Skills" + +This is a major source of confusion. + +### Host skill + +Examples: + +- `.claude/skills/archon/SKILL.md` +- `.agents/skills/archon/SKILL.md` + +Purpose: + +- teach the outer assistant how to use Archon +- route a request into the correct Archon workflow +- explain conventions like branch naming and workflow selection + +This is **outside** the workflow engine. + +### Workflow-node `skills:` + +Example in workflow YAML: + +```yaml +nodes: + - id: review + prompt: "Review the implementation" + skills: + - code-review +``` + +Purpose: + +- preload domain-specific knowledge into a workflow node +- currently implemented using Claude Agent SDK agent definitions + +This is a **workflow node feature**, not a host-routing feature. + +Current behavior: + +- supported for Claude nodes +- ignored with warnings for Codex nodes + +## Node Types And Assistant Relevance + +Not every node type depends on Claude or Codex. + +| Node type | Uses AI provider? | Notes | +| --- | --- | --- | +| `command` | Yes | Named markdown prompt file loaded, then executed by Claude or Codex | +| `prompt` | Yes | Inline AI prompt executed by Claude or Codex | +| `loop` | Yes | Iterative AI execution; special runtime path | +| `bash` | No | Shell only | +| `script` | No | `bun` or `uv` runtime only | +| `approval` | No AI execution of its own | Human gate | +| `cancel` | No | Terminates workflow | + +So the real Codex/Claude compatibility question applies primarily to `command`, `prompt`, and `loop` nodes. + +## Codex Compatibility By Node Type + +### Command and prompt nodes + +These are the best candidates for Codex. + +They can run on Codex if they rely only on: + +- prompt text +- repository access +- shell/file/git/network capabilities provided by Codex +- `output_format` + +They should **not** rely on Claude-only node features listed later in this document. + +### Loop nodes + +Current code supports loop-node provider/model selection, but the docs still say otherwise. + +Actual current behavior: + +- loop nodes do resolve `provider` and `model` +- loop nodes can therefore run on Codex +- loop nodes still do **not** support most of the richer Claude-only node features + +This is a load-bearing doc/code mismatch. Treat the implementation as authoritative until docs are corrected. + +### Bash/script/approval/cancel nodes + +These are provider-agnostic. They can exist in a Codex-oriented workflow because they do not call either AI assistant directly. + +## Exact Codex Limitations For Workflow Nodes + +### Supported on Codex nodes + +Supported today: + +- `provider: codex` +- `model: ` +- `output_format` +- config-level Codex defaults: + - `assistants.codex.model` + - `assistants.codex.modelReasoningEffort` + - `assistants.codex.webSearchMode` + - `assistants.codex.additionalDirectories` + +### Ignored with warnings on Codex command/prompt nodes + +These features are currently Claude-only and are ignored on Codex nodes: + +- `skills` +- `hooks` +- `mcp` +- `allowed_tools` +- `denied_tools` +- Claude advanced options: + - `effort` + - `thinking` + - `maxBudgetUsd` + - `systemPrompt` + - `fallbackModel` + - `betas` + - `sandbox` + +### Loop-node limitations + +Loop nodes have a separate limitation set. + +Current implementation: + +- `provider` and `model` do work for loop nodes +- these still do **not** apply to loop iterations: + - `hooks` + - `mcp` + - `skills` + - `allowed_tools` + - `denied_tools` + - `output_format` + +### Workflow-level Codex tuning + +Workflow YAML now supports these Codex tuning fields as real runtime inputs: + +- `modelReasoningEffort` +- `webSearchMode` +- `additionalDirectories` + +Practical effect: + +- `model:` on a workflow is effective +- workflow-level `modelReasoningEffort`, `webSearchMode`, and `additionalDirectories` override Archon config for that workflow +- if the workflow omits them, execution falls back to `assistants.codex.*` + +Current precedence is: + +1. workflow YAML +2. `assistants.codex.*` in Archon config +3. SDK defaults + +## When Codex Can Realistically Replace Claude For A Node + +A node is a good candidate for Codex when all of these are true: + +1. it is a `command`, `prompt`, or simple `loop` node +2. it does not depend on `skills`, `hooks`, `mcp`, or tool restriction fields +3. it does not depend on Claude-only advanced options +4. the prompt is generic and does not assume Claude-specific behavior +5. the required tools are available through Codex's own runtime setup + +A node is **not** a good candidate for Codex when it depends on: + +- Claude skill injection +- Claude hook behavior +- per-node MCP wiring +- Claude-specific system-prompt or thinking controls + +## Upstream vs This Fork + +### Standard upstream implementation + +Upstream Archon already supports the broad architecture: + +- Claude and Codex are both first-class assistant providers +- workflow YAML supports `provider` and `model` +- workflow nodes can select provider and model +- config supports both Claude and Codex defaults +- conversation/orchestrator assistant selection exists + +Upstream public docs also already describe: + +- Codex as an AI assistant +- per-node `provider` / `model` +- workflow-level Codex settings + +### Fork-specific additions in this repo + +This fork adds a more explicit Codex-facing routing surface. + +Verified additions in this checkout: + +1. **Codex-specific assist workflow** + - `.archon/workflows/defaults/archon-assist-codex.yaml` + - `.archon/commands/defaults/archon-assist-codex.md` + +2. **Codex-specific host skill** + - `.agents/skills/archon/SKILL.md` + +3. **Docs updated to mention Codex-specific assist lane** + - `archon-assist-codex` appears in the local docs and workflow catalog + +### Fork-specific caveats + +There are also fork-local inconsistencies worth knowing: + +1. **The setup wizard still installs the Claude skill surface** + - it copies `.claude/skills/archon/` + - it does not install `.agents/skills/archon/` + +2. **The Codex host skill references `archon-piv-loop-codex`** + - no matching workflow file exists in this checkout today + +3. **Repo-local workflow default is not pinned here** + - this repo's `.archon/config.yaml` does not set `assistant:` + - so workflow default provider for this repo depends on global config or environment unless a workflow sets its own provider + +4. **Docs still overstate some Codex workflow capabilities** + - workflow-level Codex YAML options are parsed but not fully consumed by the executor + +5. **Docs still understate some loop-node capabilities** + - docs say loop-node `provider` / `model` are ignored + - current code resolves them + +## Practical Precedence Tables + +### Conversation/orchestrator assistant + +| Order | Source | +| --- | --- | +| 1 | existing conversation | +| 2 | parent conversation | +| 3 | codebase `ai_assistant_type` from DB | +| 4 | `DEFAULT_AI_ASSISTANT` | +| 5 | built-in default `claude` | + +### Workflow default provider + +| Order | Source | +| --- | --- | +| 1 | workflow `provider` | +| 2 | inferred from workflow `model` | +| 3 | merged config `assistant` | +| 4 | built-in default `claude` | + +### Per-node provider + +| Order | Source | +| --- | --- | +| 1 | node `provider` | +| 2 | inferred from node `model` | +| 3 | workflow default provider | + +## Recommended Migration Strategy For Claude-To-Codex Workflow Changes + +When converting a workflow or node from Claude to Codex: + +1. start with `command` or `prompt` nodes +2. remove or replace `skills`, `hooks`, `mcp`, and tool restriction fields +3. keep the prompt generic +4. test one node at a time before converting the entire workflow +5. prefer a `-codex` workflow variant when behavior meaningfully diverges +6. keep Claude as the provider for nodes that truly depend on Claude-only features + +This usually leads to a mixed-provider workflow rather than an all-or-nothing migration. + +## Decision Checklist + +Use this checklist before changing a node to Codex: + +- Is this node AI-driven at all? +- Does it use `skills`, `hooks`, `mcp`, `allowed_tools`, or `denied_tools`? +- Does it rely on Claude-only advanced fields? +- Is the desired behavior actually controlled by the conversation assistant, the workflow default, or the node override? +- If the repo is "Codex-driven" only in the database, do we also want the workflow YAML or repo config to reflect that? + +If you answer those questions first, the assistant-selection model becomes much less ambiguous. + +## Verified Source Anchors + +The implementation details in this document were verified against: + +- `packages/core/src/db/conversations.ts` +- `packages/core/src/orchestrator/orchestrator-agent.ts` +- `packages/core/src/config/config-loader.ts` +- `packages/core/src/handlers/clone.ts` +- `packages/workflows/src/executor.ts` +- `packages/workflows/src/dag-executor.ts` +- `.agents/skills/archon/SKILL.md` +- `.archon/workflows/defaults/archon-assist-codex.yaml` +- `.archon/commands/defaults/archon-assist-codex.md` + +And compared against upstream/public docs: + +- `https://archon.diy/getting-started/ai-assistants/` +- `https://archon.diy/guides/authoring-workflows/` +- `https://archon.diy/guides/skills/` +- `https://archon.diy/book/essential-workflows/` diff --git a/packages/docs-web/src/content/docs/reference/cli.md b/packages/docs-web/src/content/docs/reference/cli.md index 33f6436884..17c90c827d 100644 --- a/packages/docs-web/src/content/docs/reference/cli.md +++ b/packages/docs-web/src/content/docs/reference/cli.md @@ -146,6 +146,10 @@ Workflow names are resolved using a 4-tier fallback hierarchy. This applies cons 3. **Suffix match** - `assist` matches `archon-assist` (looks for `-assist` suffix) 4. **Substring match** - `smart` matches `archon-smart-pr-review` +Codex-specific workflow names use the `-codex` suffix. For example: +- `assist-codex` matches `archon-assist-codex` +- `piv-loop-codex` matches `archon-piv-loop-codex` + If multiple workflows match at the same tier, an error lists the candidates: ``` Ambiguous workflow 'review'. Did you mean: diff --git a/packages/docs-web/src/content/docs/reference/index.md b/packages/docs-web/src/content/docs/reference/index.md index f20bf30943..5621c89454 100644 --- a/packages/docs-web/src/content/docs/reference/index.md +++ b/packages/docs-web/src/content/docs/reference/index.md @@ -12,6 +12,7 @@ Technical reference documentation for Archon internals. ## Reference Docs - **[Architecture](/reference/architecture/)** -- System overview, interfaces, data flow, extension guides +- **[Assistant Architecture](/reference/assistant-architecture/)** -- Claude vs Codex selection across host skills, conversations, workflows, and nodes - **[Archon Directories](/reference/archon-directories/)** -- Directory structure, path resolution, configuration system - **[CLI Reference](/reference/cli/)** -- All CLI commands, flags, and usage examples - **[Commands Reference](/reference/commands/)** -- Slash commands available in all platform adapters diff --git a/packages/docs-web/src/content/docs/reference/troubleshooting.md b/packages/docs-web/src/content/docs/reference/troubleshooting.md index 50805c7911..457b49515c 100644 --- a/packages/docs-web/src/content/docs/reference/troubleshooting.md +++ b/packages/docs-web/src/content/docs/reference/troubleshooting.md @@ -63,6 +63,12 @@ curl http://localhost:3090/health/db SQLite requires no setup. The database is created automatically at `~/.archon/archon.db`. If you see errors, check that the `~/.archon/` directory exists and is writable. +For workflow-mutating CLI commands (`workflow run`, `workflow resume`, `workflow approve`, +`workflow reject`, `workflow cleanup`), Archon must be able to write the SQLite +state under `~/.archon/`. If you run Archon from an outer workspace sandbox, grant +write access to `~/.archon/` or rerun the CLI outside that sandbox. Changing the +inner Codex sandbox settings does not fix a parent-process write restriction. + **For remote PostgreSQL:** ```bash # Verify DATABASE_URL diff --git a/packages/paths/src/archon-paths.ts b/packages/paths/src/archon-paths.ts index ca8ea73774..81de9f9cd0 100644 --- a/packages/paths/src/archon-paths.ts +++ b/packages/paths/src/archon-paths.ts @@ -198,6 +198,13 @@ export function getDefaultWorkflowsPath(): string { return join(getAppArchonBasePath(), 'workflows', 'defaults'); } +/** + * Get the path to the app's bundled default scripts directory + */ +export function getDefaultScriptsPath(): string { + return join(getAppArchonBasePath(), 'scripts'); +} + /** * Returns the path to the cached web UI distribution for a given version. * Example: ~/.archon/web-dist/v0.3.2/ diff --git a/packages/paths/src/index.ts b/packages/paths/src/index.ts index 99a254f4ca..279bde3818 100644 --- a/packages/paths/src/index.ts +++ b/packages/paths/src/index.ts @@ -11,6 +11,7 @@ export { getAppArchonBasePath, getDefaultCommandsPath, getDefaultWorkflowsPath, + getDefaultScriptsPath, logArchonPaths, validateAppDefaultsPaths, parseOwnerRepo, diff --git a/packages/server/src/routes/api.health.test.ts b/packages/server/src/routes/api.health.test.ts index 6cf895464e..fffd65a337 100644 --- a/packages/server/src/routes/api.health.test.ts +++ b/packages/server/src/routes/api.health.test.ts @@ -87,6 +87,7 @@ mock.module('@archon/workflows/defaults', () => ({ BUNDLED_WORKFLOWS: {}, BUNDLED_COMMANDS: { 'archon-assist': '# archon-assist command', + 'archon-assist-codex': '# archon-assist-codex command', plan: '# plan command', implement: '# implement command', }, @@ -433,7 +434,7 @@ describe('GET /api/commands', () => { const body = (await response.json()) as { commands: Array<{ name: string; source: string }> }; expect(Array.isArray(body.commands)).toBe(true); - // BUNDLED_COMMANDS mock has 3 entries + // BUNDLED_COMMANDS mock includes the bundled defaults for this test const bundledCommands = body.commands.filter(c => c.source === 'bundled'); expect(bundledCommands.length).toBeGreaterThan(0); }); @@ -449,6 +450,17 @@ describe('GET /api/commands', () => { expect(archonAssist?.source).toBe('bundled'); }); + test('includes archon-assist-codex as bundled command', async () => { + const app = makeApp(); + const response = await app.request('/api/commands'); + expect(response.status).toBe(200); + + const body = (await response.json()) as { commands: Array<{ name: string; source: string }> }; + const archonAssistCodex = body.commands.find(c => c.name === 'archon-assist-codex'); + expect(archonAssistCodex).toBeDefined(); + expect(archonAssistCodex?.source).toBe('bundled'); + }); + test('includes plan and implement as bundled commands', async () => { const app = makeApp(); const response = await app.request('/api/commands'); diff --git a/packages/server/src/routes/api.workflows.test.ts b/packages/server/src/routes/api.workflows.test.ts index e50b252640..6356985558 100644 --- a/packages/server/src/routes/api.workflows.test.ts +++ b/packages/server/src/routes/api.workflows.test.ts @@ -21,10 +21,20 @@ const mockDiscoverWorkflows = mock(async (_cwd: string) => ({ })); // Default: returns a valid workflow. Use mockReturnValueOnce in tests that need a parse failure. -const mockParseWorkflow = mock((_content: string, _filename: string) => ({ - workflow: makeTestWorkflow({ name: 'test', description: 'Test workflow' }), - error: null, -})); +const mockParseWorkflow = mock((content: string, _filename: string) => { + const nameMatch = /^name:\s*(.+)$/m.exec(content); + const descriptionMatch = /^description:\s*(.+)$/m.exec(content); + const providerMatch = /^provider:\s*(.+)$/m.exec(content); + + return { + workflow: makeTestWorkflow({ + name: nameMatch?.[1] ?? 'test', + description: descriptionMatch?.[1] ?? 'Test workflow', + ...(providerMatch?.[1] ? { provider: providerMatch[1] } : {}), + }), + error: null, + }; +}); mock.module('@archon/core', () => ({ handleMessage: mock(async () => {}), @@ -74,9 +84,12 @@ mock.module('@archon/workflows/command-validation', () => ({ mock.module('@archon/workflows/defaults', () => ({ BUNDLED_WORKFLOWS: { 'archon-assist': 'name: archon-assist\ndescription: Archon Assist\nnodes: []', + 'archon-assist-codex': + 'name: archon-assist-codex\ndescription: Archon Assist Codex\nprovider: codex\nnodes: []', }, BUNDLED_COMMANDS: { 'archon-assist': '# archon-assist command', + 'archon-assist-codex': '# archon-assist-codex command', }, isBinaryBuild: mock(() => false), })); @@ -224,6 +237,24 @@ describe('GET /api/workflows/:name', () => { expect(body.workflow).toBeDefined(); }); + test('returns bundled Codex workflow with source:bundled', async () => { + const app = createTestApp(); + registerApiRoutes(app, {} as WebAdapter, {} as ConversationLockManager); + + mockListCodebases.mockImplementationOnce(async () => []); + + const response = await app.request('/api/workflows/archon-assist-codex'); + expect(response.status).toBe(200); + const body = (await response.json()) as { + source: string; + filename: string; + workflow: { provider?: string }; + }; + expect(body.source).toBe('bundled'); + expect(body.filename).toBe('archon-assist-codex.yaml'); + expect(body.workflow.provider).toBe('codex'); + }); + test('returns project workflow with source:project when file exists on disk', async () => { const testDir = join(tmpdir(), `wf-get-test-${Date.now()}`); const workflowDir = join(testDir, '.archon', 'workflows'); diff --git a/packages/web/src/components/chat/ChatInterface.tsx b/packages/web/src/components/chat/ChatInterface.tsx index fca7698390..581d8005ef 100644 --- a/packages/web/src/components/chat/ChatInterface.tsx +++ b/packages/web/src/components/chat/ChatInterface.tsx @@ -37,6 +37,18 @@ import { import { useProject } from '@/contexts/ProjectContext'; import { ensureUtc } from '@/lib/format'; +/** + * Human-readable reply hints for conversations whose input is disabled because + * the conversation originated on another platform. Web UI input remains + * disabled for these platforms — users reply from the originating app. + */ +const PLATFORM_REPLY_HINTS: Record = { + telegram: 'This conversation is running in Telegram — reply from the Telegram app.', + slack: 'This conversation is running in Slack — reply there.', + discord: 'This conversation is running in Discord — reply there.', + github: 'This conversation is running in a GitHub issue — reply there.', +}; + function mapMessageRow(row: MessageResponse): ChatMessage { let meta: { toolCalls?: { @@ -787,7 +799,8 @@ export function ChatInterface({ conversationId }: ChatInterfaceProps): React.Rea } disabledReason={ currentConv != null && currentConv.platform_type !== 'web' - ? 'Continuing chats from other platforms in the Web UI is coming soon' + ? (PLATFORM_REPLY_HINTS[currentConv.platform_type] ?? + 'This conversation is running on another platform — reply from there.') : undefined } /> diff --git a/packages/web/src/lib/workflow-metadata.test.ts b/packages/web/src/lib/workflow-metadata.test.ts index 18af743267..8fe693fe97 100644 --- a/packages/web/src/lib/workflow-metadata.test.ts +++ b/packages/web/src/lib/workflow-metadata.test.ts @@ -135,6 +135,10 @@ describe('getWorkflowDisplayName', () => { test('handles single-word names', () => { expect(getWorkflowDisplayName('archon-assist')).toBe('Assist'); }); + + test('handles Codex suffixed workflow names', () => { + expect(getWorkflowDisplayName('archon-assist-codex')).toBe('Assist Codex'); + }); }); describe('getWorkflowCategory', () => { @@ -172,6 +176,7 @@ describe('getWorkflowCategory', () => { 'Development' ); expect(getWorkflowCategory('archon-assist', 'General help')).toBe('Development'); + expect(getWorkflowCategory('archon-assist-codex', 'General Codex help')).toBe('Development'); expect(getWorkflowCategory('archon-idea-to-pr', 'From idea to PR')).toBe('Development'); }); }); diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts index 150ea4eeb7..5d49cc1040 100644 --- a/packages/workflows/src/dag-executor.test.ts +++ b/packages/workflows/src/dag-executor.test.ts @@ -23,6 +23,8 @@ mock.module('@archon/paths', () => ({ return paths; }, getDefaultCommandsPath: () => '/nonexistent/defaults', + getDefaultScriptsPath: () => '/Users/mase/Codebase/Personal-Projects/Archon/.archon/scripts', + BUNDLED_IS_BINARY: false, })); // --- Imports (after mocks) --- @@ -1512,6 +1514,111 @@ describe('executeDagWorkflow -- output_format structured output', () => { .filter(msg => typeof msg === 'string' && msg.includes('did not return structured output')); expect(warningMessages).toHaveLength(0); }); + + it('uses workflow-level Codex tuning instead of config defaults for normal nodes', async () => { + mockGetAssistantClientDag.mockImplementation(() => ({ + sendQuery: mockSendQueryDag, + getType: () => 'codex', + })); + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'workflow scoped codex settings' }; + yield { type: 'result', sessionId: 'codex-sid-3' }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('codex-workflow-options-run'); + const config: WorkflowConfig = { + ...minimalConfig, + assistant: 'codex', + assistants: { + ...minimalConfig.assistants, + codex: { + modelReasoningEffort: 'low', + webSearchMode: 'disabled', + additionalDirectories: ['/config/default'], + }, + }, + }; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-codex-workflow-options', + testDir, + { + name: 'codex-workflow-options', + modelReasoningEffort: 'xhigh', + webSearchMode: 'live', + additionalDirectories: ['/workflow/override'], + nodes: [{ id: 'classify', command: 'classify' }], + }, + workflowRun, + 'codex', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + config + ); + + const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; + expect(optionsArg.modelReasoningEffort).toBe('xhigh'); + expect(optionsArg.webSearchMode).toBe('live'); + expect(optionsArg.additionalDirectories).toEqual(['/workflow/override']); + }); + + it('falls back to config Codex tuning when workflow-level values are absent for normal nodes', async () => { + mockGetAssistantClientDag.mockImplementation(() => ({ + sendQuery: mockSendQueryDag, + getType: () => 'codex', + })); + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'config scoped codex settings' }; + yield { type: 'result', sessionId: 'codex-sid-4' }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('codex-config-options-run'); + const config: WorkflowConfig = { + ...minimalConfig, + assistant: 'codex', + assistants: { + ...minimalConfig.assistants, + codex: { + modelReasoningEffort: 'medium', + webSearchMode: 'cached', + additionalDirectories: ['/config/fallback'], + }, + }, + }; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-codex-config-options', + testDir, + { + name: 'codex-config-options', + nodes: [{ id: 'classify', command: 'classify' }], + }, + workflowRun, + 'codex', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + config + ); + + const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; + expect(optionsArg.modelReasoningEffort).toBe('medium'); + expect(optionsArg.webSearchMode).toBe('cached'); + expect(optionsArg.additionalDirectories).toEqual(['/config/fallback']); + }); }); describe('executeDagWorkflow -- when condition parse errors (fail-closed)', () => { @@ -2765,6 +2872,184 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { }); }); + it('uses workflow-level Codex tuning instead of config defaults for loop nodes', async () => { + mockGetAssistantClientDag.mockImplementation(() => ({ + sendQuery: mockSendQueryDag, + getType: () => 'codex', + })); + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'DONE' }; + yield { type: 'result', sessionId: 'loop-codex-sid-1' }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('codex-loop-workflow-options-run'); + const config: WorkflowConfig = { + ...minimalConfig, + assistant: 'codex', + assistants: { + ...minimalConfig.assistants, + codex: { + modelReasoningEffort: 'low', + webSearchMode: 'disabled', + additionalDirectories: ['/config/loop-default'], + }, + }, + }; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-codex-loop-workflow-options', + testDir, + { + name: 'codex-loop-workflow-options', + modelReasoningEffort: 'high', + webSearchMode: 'live', + additionalDirectories: ['/workflow/loop-override'], + nodes: [ + { + id: 'my-loop', + loop: { + prompt: 'Do a task. When done, output DONE.', + until: 'DONE', + max_iterations: 1, + }, + }, + ], + }, + workflowRun, + 'codex', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + config + ); + + const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; + expect(optionsArg.modelReasoningEffort).toBe('high'); + expect(optionsArg.webSearchMode).toBe('live'); + expect(optionsArg.additionalDirectories).toEqual(['/workflow/loop-override']); + }); + + it('preserves node-level loop provider and model when workflow-level Codex tuning is present', async () => { + mockGetAssistantClientDag.mockImplementation(provider => ({ + sendQuery: mockSendQueryDag, + getType: () => provider, + })); + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'DONE' }; + yield { type: 'result', sessionId: 'loop-mixed-provider-sid-1' }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('codex-loop-mixed-provider-run'); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-codex-loop-mixed-provider', + testDir, + { + name: 'codex-loop-mixed-provider', + modelReasoningEffort: 'xhigh', + webSearchMode: 'live', + additionalDirectories: ['/workflow/codex-override'], + nodes: [ + { + id: 'my-loop', + provider: 'claude', + model: 'sonnet', + loop: { + prompt: 'Do a task. When done, output DONE.', + until: 'DONE', + max_iterations: 1, + }, + }, + ], + }, + workflowRun, + 'codex', + 'gpt-5.3-codex', + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + { ...minimalConfig, assistant: 'codex' } + ); + + expect(mockGetAssistantClientDag.mock.calls[0]?.[0]).toBe('claude'); + const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; + expect(optionsArg.model).toBe('sonnet'); + expect(optionsArg.modelReasoningEffort).toBeUndefined(); + expect(optionsArg.webSearchMode).toBeUndefined(); + expect(optionsArg.additionalDirectories).toBeUndefined(); + }); + + it('falls back to config Codex tuning when workflow-level values are absent for loop nodes', async () => { + mockGetAssistantClientDag.mockImplementation(() => ({ + sendQuery: mockSendQueryDag, + getType: () => 'codex', + })); + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'DONE' }; + yield { type: 'result', sessionId: 'loop-codex-sid-2' }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('codex-loop-config-options-run'); + const config: WorkflowConfig = { + ...minimalConfig, + assistant: 'codex', + assistants: { + ...minimalConfig.assistants, + codex: { + modelReasoningEffort: 'minimal', + webSearchMode: 'cached', + additionalDirectories: ['/config/loop-fallback'], + }, + }, + }; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-codex-loop-config-options', + testDir, + { + name: 'codex-loop-config-options', + nodes: [ + { + id: 'my-loop', + loop: { + prompt: 'Do a task. When done, output DONE.', + until: 'DONE', + max_iterations: 1, + }, + }, + ], + }, + workflowRun, + 'codex', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + config + ); + + const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; + expect(optionsArg.modelReasoningEffort).toBe('minimal'); + expect(optionsArg.webSearchMode).toBe('cached'); + expect(optionsArg.additionalDirectories).toEqual(['/config/loop-fallback']); + }); + it('completes after multiple iterations', async () => { let callCount = 0; mockSendQueryDag.mockImplementation(function* () { @@ -2860,6 +3145,138 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { ).toBe(1); }); + it('fails early when no durable progress is made across consecutive iterations', async () => { + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'Still working...' }; + yield { type: 'result', sessionId: 'loop-session' }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'dag-loop-stuck', + nodes: [ + { + id: 'my-loop', + loop: { + prompt: 'Do task.', + until: 'COMPLETE', + max_iterations: 10, + progress_file: '$ARTIFACTS_DIR/progress.txt', + stuck_after_no_progress_iterations: 2, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(mockSendQueryDag.mock.calls.length).toBe(3); + const failCalls = ( + mockDeps.store.failWorkflowRun as Mock<(id: string, error: string) => Promise> + ).mock.calls; + expect(failCalls.length).toBe(1); + const platformMessages = ( + platform.sendMessage as Mock< + ( + conversationId: string, + content: string, + metadata?: Record + ) => Promise + > + ).mock.calls.map(call => String(call[1])); + expect(platformMessages.some(message => message.includes('no durable progress'))).toBe(true); + }); + + it('resets the no-progress streak when the progress file advances', async () => { + let callCount = 0; + mockSendQueryDag.mockImplementation(async function* () { + callCount++; + const artifactsDir = join(testDir, 'artifacts'); + await mkdir(artifactsDir, { recursive: true }); + if (callCount === 1) { + await writeFile( + join(artifactsDir, 'progress.txt'), + '## Task 1: First task — COMPLETED\nDate: 2026-04-13\n---\n', + 'utf8' + ); + yield { type: 'assistant', content: 'Completed the first task.' }; + } else if (callCount === 2) { + await writeFile( + join(artifactsDir, 'progress.txt'), + '## Task 1: First task — COMPLETED\nDate: 2026-04-13\n---\n' + + '## Task 2: Second task — COMPLETED\nDate: 2026-04-13\n---\n', + 'utf8' + ); + yield { type: 'assistant', content: 'Completed the second task.' }; + } else { + yield { type: 'assistant', content: 'All done! COMPLETE' }; + } + yield { type: 'result', sessionId: `loop-session-${String(callCount)}` }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'dag-loop-progress', + nodes: [ + { + id: 'my-loop', + loop: { + prompt: 'Do task.', + until: 'COMPLETE', + max_iterations: 5, + progress_file: '$ARTIFACTS_DIR/progress.txt', + stuck_after_no_progress_iterations: 2, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(mockSendQueryDag.mock.calls.length).toBe(3); + expect( + (mockDeps.store.failWorkflowRun as Mock<(id: string, error: string) => Promise>).mock + .calls.length + ).toBe(0); + expect( + ( + mockDeps.store.completeWorkflowRun as Mock< + (id: string, metadata?: Record) => Promise + > + ).mock.calls.length + ).toBe(1); + }); + it('loop node output available to downstream nodes via $nodeId.output', async () => { let loopCallCount = 0; mockSendQueryDag.mockImplementation(function* (prompt: string) { @@ -5006,6 +5423,48 @@ describe('executeDagWorkflow -- script nodes', () => { expect(mockSendQueryDag.mock.calls.length).toBe(0); }); + it('named bun script executes from Archon default scripts when repo script is absent', async () => { + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('script-default-run-id', { + workflow_name: 'script-default-test', + conversation_id: 'conv-default-script', + user_message: 'default script test', + }); + + await writeFile(join(testDir, 'package.json'), JSON.stringify({ name: 'default-script-test' })); + + const commandsDir = join(testDir, '.archon', 'commands'); + await mkdir(commandsDir, { recursive: true }); + await writeFile(join(commandsDir, 'report-detect.md'), 'Detection output:\n$detect.output'); + + const nodes: DagNode[] = [ + { id: 'detect', script: 'detect-project', runtime: 'bun' }, + { id: 'report', command: 'report-detect', depends_on: ['detect'] }, + ]; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-default-script', + testDir, + { name: 'default-script-test', nodes }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(mockSendQueryDag.mock.calls.length).toBe(1); + const prompt = mockSendQueryDag.mock.calls[0][0] as string; + expect(prompt).toContain('PROJECT_TYPE=node'); + expect(prompt).toContain('INSTALL_CMD=npm ci'); + }); + it('non-zero exit code results in failed state', async () => { const mockDeps = createMockDeps(); const platform = createMockPlatform(); @@ -5043,6 +5502,54 @@ describe('executeDagWorkflow -- script nodes', () => { expect(failMsg).toBeDefined(); }); + it('marks workflow failed when some nodes succeed and a later node fails', async () => { + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('script-partial-fail-run-id', { + workflow_name: 'script-partial-fail-test', + conversation_id: 'conv-partial-fail', + user_message: 'partial fail test', + }); + + const nodes: DagNode[] = [ + { id: 'ok', script: 'console.log("ok")', runtime: 'bun' }, + { id: 'boom', script: 'process.exit(1)', runtime: 'bun', depends_on: ['ok'] }, + ]; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-partial-fail', + testDir, + { name: 'script-partial-fail-test', nodes }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(mockDeps.store.completeWorkflowRun as ReturnType).not.toHaveBeenCalled(); + const failCalls = ( + mockDeps.store.failWorkflowRun as Mock< + (id: string, error: string, metadata?: Record) => Promise + > + ).mock.calls; + expect(failCalls.length).toBe(1); + expect(failCalls[0][1]).toContain('failed after partial execution'); + expect(failCalls[0][2]).toEqual( + expect.objectContaining({ + node_counts: { completed: 1, failed: 1, skipped: 0, total: 2 }, + }) + ); + expect((failCalls[0][2] as Record).failed_nodes).toEqual( + expect.stringContaining("'boom':") + ); + }); + it('timeout kills subprocess', async () => { const mockDeps = createMockDeps(); const platform = createMockPlatform(); diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index facfbd1068..bba13586df 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -5,10 +5,11 @@ * Independent nodes within the same layer run concurrently via Promise.allSettled. * Captures all assistant output regardless of streaming mode for $node_id.output substitution. */ -import { readFile } from 'fs/promises'; -import { resolve, isAbsolute } from 'path'; +import { readFile, mkdtemp, writeFile, rm } from 'fs/promises'; +import { resolve, isAbsolute, join } from 'path'; +import { tmpdir } from 'os'; import { execFileAsync } from '@archon/git'; -import { discoverScripts } from './script-discovery'; +import { resolveNamedScript } from './script-discovery'; import type { WorkflowAssistantOptions, IWorkflowPlatform, @@ -75,13 +76,16 @@ function getLog(): ReturnType { return cachedLog; } -/** Workflow-level Claude SDK options — per-node overrides take precedence via ?? */ +/** Workflow-level execution options. Per-node `provider`/`model` overrides still take precedence. */ interface WorkflowLevelOptions { effort?: EffortLevel; thinking?: ThinkingConfig; fallbackModel?: string; betas?: string[]; sandbox?: SandboxSettings; + modelReasoningEffort?: WorkflowAssistantOptions['modelReasoningEffort']; + webSearchMode?: WorkflowAssistantOptions['webSearchMode']; + additionalDirectories?: WorkflowAssistantOptions['additionalDirectories']; } /** Internal node execution result — extends NodeOutput with cost data for aggregation. */ @@ -101,10 +105,48 @@ interface SendMessageContext { nodeName?: string; } +type WorkflowCodexExecutionOptions = Pick< + WorkflowAssistantOptions, + 'modelReasoningEffort' | 'webSearchMode' | 'additionalDirectories' +>; + +interface BundledScriptExecution { + cmd: string; + args: string[]; + cleanup?: () => Promise; +} + /** Default DAG node retry for TRANSIENT errors */ const DEFAULT_NODE_MAX_RETRIES = 2; const DEFAULT_NODE_RETRY_DELAY_MS = 3000; +async function buildBundledScriptExecution( + scriptName: string, + runtime: 'bun' | 'uv', + content: string, + nodeDeps: string[] +): Promise { + if (runtime === 'uv') { + const withFlags = nodeDeps.flatMap(dep => ['--with', dep]); + return { + cmd: 'uv', + args: ['run', ...withFlags, 'python', '-c', content], + }; + } + + const tempDir = await mkdtemp(join(tmpdir(), `archon-bundled-script-${scriptName}-`)); + const scriptPath = join(tempDir, `${scriptName}.ts`); + await writeFile(scriptPath, content, 'utf8'); + + return { + cmd: 'bun', + args: ['run', scriptPath], + cleanup: async (): Promise => { + await rm(tempDir, { recursive: true, force: true }); + }, + }; +} + /** * Get effective retry config for a DAG node. */ @@ -254,6 +296,24 @@ export function buildSDKHooksFromYAML(nodeHooks: WorkflowNodeHooks): SDKHooksMap return sdkHooks; } +function resolveWorkflowCodexOptions( + workflowLevelOptions: WorkflowLevelOptions, + config: WorkflowConfig +): WorkflowCodexExecutionOptions | undefined { + const modelReasoningEffort = + workflowLevelOptions.modelReasoningEffort ?? config.assistants.codex.modelReasoningEffort; + const webSearchMode = workflowLevelOptions.webSearchMode ?? config.assistants.codex.webSearchMode; + const additionalDirectories = + workflowLevelOptions.additionalDirectories ?? config.assistants.codex.additionalDirectories; + + const resolved: WorkflowCodexExecutionOptions = {}; + if (modelReasoningEffort !== undefined) resolved.modelReasoningEffort = modelReasoningEffort; + if (webSearchMode !== undefined) resolved.webSearchMode = webSearchMode; + if (additionalDirectories !== undefined) resolved.additionalDirectories = additionalDirectories; + + return Object.keys(resolved).length > 0 ? resolved : undefined; +} + /** * Load MCP server config from a JSON file and expand environment variables. * Format: Record matching the SDK's expected shape. @@ -488,14 +548,15 @@ async function resolveNodeProviderAndModel( let options: WorkflowAssistantOptions | undefined; if (provider === 'codex') { options = { - model, - modelReasoningEffort: config.assistants.codex.modelReasoningEffort, - webSearchMode: config.assistants.codex.webSearchMode, - additionalDirectories: config.assistants.codex.additionalDirectories, + ...(model ? { model } : {}), + ...(resolveWorkflowCodexOptions(workflowLevelOptions, config) ?? {}), }; if (node.output_format) { options.outputFormat = { type: 'json_schema', schema: node.output_format }; } + if (Object.keys(options).length === 0) { + options = undefined; + } } else { const claudeOptions: WorkflowAssistantOptions = {}; if (model) claudeOptions.model = model; @@ -1511,6 +1572,7 @@ async function executeScriptNode( // Build the command and args based on runtime and inline vs named let cmd = ''; let args: string[] = []; + let cleanupBundledScript: (() => Promise) | undefined; const nodeDeps = node.deps ?? []; @@ -1527,13 +1589,11 @@ async function executeScriptNode( args = ['run', ...withFlags, 'python', '-c', finalScript]; } } else { - // Named script — look up in .archon/scripts/ directory - const scriptsDir = resolve(cwd, '.archon', 'scripts'); - const scripts = await discoverScripts(scriptsDir); - const scriptDef = scripts.get(finalScript); + // Named script — look up in repo scripts first, then Archon defaults + const scriptDef = await resolveNamedScript(cwd, finalScript); if (!scriptDef) { - const errorMsg = `Script node '${node.id}': named script '${finalScript}' not found in .archon/scripts/`; + const errorMsg = `Script node '${node.id}': named script '${finalScript}' not found in .archon/scripts/ or Archon defaults`; getLog().error({ nodeId: node.id, scriptName: finalScript }, 'script_not_found'); await safeSendMessage(platform, conversationId, errorMsg, nodeContext); await logNodeError(logDir, workflowRun.id, node.id, errorMsg); @@ -1562,21 +1622,48 @@ async function executeScriptNode( return { state: 'failed', output: '', error: errorMsg }; } - // Use scriptDef.runtime (canonical source) instead of re-deriving from extension - if (scriptDef.runtime === 'uv') { - cmd = 'uv'; - const withFlags = nodeDeps.flatMap(dep => ['--with', dep]); - args = ['run', ...withFlags, scriptDef.path]; + if ('bundled' in scriptDef && scriptDef.bundled) { + const bundledExecution = await buildBundledScriptExecution( + scriptDef.name, + scriptDef.runtime, + scriptDef.content, + nodeDeps + ); + cmd = bundledExecution.cmd; + args = bundledExecution.args; + cleanupBundledScript = bundledExecution.cleanup; } else { - cmd = 'bun'; - args = ['run', scriptDef.path]; + // Use scriptDef.runtime (canonical source) instead of re-deriving from extension + if (scriptDef.runtime === 'uv') { + cmd = 'uv'; + const withFlags = nodeDeps.flatMap(dep => ['--with', dep]); + args = ['run', ...withFlags, scriptDef.path]; + } else { + cmd = 'bun'; + args = ['run', scriptDef.path]; + } } } - const { stdout, stderr } = await execFileAsync(cmd, args, { - cwd, - timeout, - }); + let stdout = ''; + let stderr = ''; + try { + const result = await execFileAsync(cmd, args, { + cwd, + timeout, + }); + stdout = result.stdout; + stderr = result.stderr; + } finally { + if (cleanupBundledScript) { + await cleanupBundledScript().catch((error: Error) => { + getLog().warn( + { err: error, nodeId: node.id, scriptName: finalScript }, + 'bundled_script_cleanup_failed' + ); + }); + } + } // Trim trailing newline from stdout (common shell behavior) const output = stdout.replace(/\n$/, ''); @@ -1663,22 +1750,17 @@ async function executeScriptNode( } /** - * Build WorkflowAssistantOptions from resolved provider, model, and config. + * Build WorkflowAssistantOptions from resolved provider, model, workflow-level options, and config. * Caller is responsible for resolving per-node overrides before passing model. */ function buildLoopNodeOptions( provider: 'claude' | 'codex', model: string | undefined, + workflowLevelOptions: WorkflowLevelOptions, config: WorkflowConfig ): WorkflowAssistantOptions | undefined { const codexOptions = - provider === 'codex' - ? { - modelReasoningEffort: config.assistants.codex.modelReasoningEffort, - webSearchMode: config.assistants.codex.webSearchMode, - additionalDirectories: config.assistants.codex.additionalDirectories, - } - : undefined; + provider === 'codex' ? resolveWorkflowCodexOptions(workflowLevelOptions, config) : undefined; const claudeOptions = provider === 'claude' && config.assistants.claude.settingSources @@ -1689,6 +1771,103 @@ function buildLoopNodeOptions( return { ...(model ? { model } : {}), ...codexOptions, ...claudeOptions }; } +interface LoopProgressSnapshot { + gitHead?: string; + completedTaskCount?: number; +} + +async function resolveLoopProgressFile( + progressFile: string, + workflowRun: WorkflowRun, + artifactsDir: string, + baseBranch: string, + docsDir: string, + issueContext: string | undefined +): Promise { + const { prompt: substitutedPath } = substituteWorkflowVariables( + progressFile, + workflowRun.id, + workflowRun.user_message, + artifactsDir, + baseBranch, + docsDir, + issueContext + ); + return substitutedPath; +} + +function countCompletedTasks(progressText: string): number { + const matches = progressText.match(/^## Task \d+: .* — COMPLETED$/gm); + return matches?.length ?? 0; +} + +async function captureLoopProgressSnapshot( + cwd: string, + workflowRun: WorkflowRun, + loop: LoopNode['loop'], + artifactsDir: string, + baseBranch: string, + docsDir: string, + issueContext: string | undefined +): Promise { + let gitHead: string | undefined; + try { + const result = await execFileAsync('git', ['rev-parse', 'HEAD'], { cwd }); + gitHead = result.stdout.trim() || undefined; + } catch { + gitHead = undefined; + } + + let completedTaskCount: number | undefined; + if (loop.progress_file) { + try { + const resolvedPath = await resolveLoopProgressFile( + loop.progress_file, + workflowRun, + artifactsDir, + baseBranch, + docsDir, + issueContext + ); + const progressPath = isAbsolute(resolvedPath) ? resolvedPath : resolve(cwd, resolvedPath); + const progressText = await readFile(progressPath, 'utf8').catch( + (error: Error & { code?: string }) => { + if (error.code === 'ENOENT') return ''; + throw error; + } + ); + completedTaskCount = countCompletedTasks(progressText); + } catch { + completedTaskCount = undefined; + } + } + + return { gitHead, completedTaskCount }; +} + +function didLoopProgressAdvance( + previous: LoopProgressSnapshot, + current: LoopProgressSnapshot +): boolean { + if ( + previous.gitHead !== undefined && + current.gitHead !== undefined && + previous.gitHead !== current.gitHead + ) { + return true; + } + + if ( + previous.completedTaskCount !== undefined && + current.completedTaskCount !== undefined && + current.completedTaskCount > previous.completedTaskCount + ) { + return true; + } + + return false; +} + /** * Execute a loop node — runs prompt repeatedly until completion signal or max iterations. * @@ -1711,6 +1890,7 @@ async function executeLoopNode( baseBranch: string, docsDir: string, nodeOutputs: Map, + workflowLevelOptions: WorkflowLevelOptions, config: WorkflowConfig, issueContext?: string ): Promise { @@ -1745,7 +1925,14 @@ async function executeLoopNode( let loopTotalCostUsd: number | undefined; let loopFinalStopReason: string | undefined; let loopTotalNumTurns: number | undefined; - const resolvedOptions = buildLoopNodeOptions(workflowProvider, workflowModel, config); + let previousProgressSnapshot: LoopProgressSnapshot | undefined; + let noProgressStreak = 0; + const resolvedOptions = buildLoopNodeOptions( + workflowProvider, + workflowModel, + workflowLevelOptions, + config + ); // Helper to log event store errors consistently const logEventStoreError = (err: Error, iteration: number): void => { @@ -2112,6 +2299,57 @@ async function executeLoopNode( }; } + if (loop.stuck_after_no_progress_iterations !== undefined) { + const currentProgressSnapshot = await captureLoopProgressSnapshot( + cwd, + workflowRun, + loop, + artifactsDir, + baseBranch, + docsDir, + issueContext + ); + + if ( + previousProgressSnapshot && + !didLoopProgressAdvance(previousProgressSnapshot, currentProgressSnapshot) + ) { + noProgressStreak += 1; + } else { + noProgressStreak = 0; + } + previousProgressSnapshot = currentProgressSnapshot; + + if (noProgressStreak >= loop.stuck_after_no_progress_iterations) { + const progressSummary = [ + currentProgressSnapshot.gitHead + ? `HEAD=${currentProgressSnapshot.gitHead.slice(0, 7)}` + : null, + currentProgressSnapshot.completedTaskCount !== undefined + ? `completed_tasks=${String(currentProgressSnapshot.completedTaskCount)}` + : null, + ] + .filter(Boolean) + .join(', '); + const errorMsg = + `Loop node '${node.id}' made no durable progress for ${String(noProgressStreak)} consecutive iteration` + + `${noProgressStreak === 1 ? '' : 's'}. ` + + 'Stop and inspect the current task before retrying.' + + (progressSummary ? ` Snapshot: ${progressSummary}` : ''); + getLog().warn( + { nodeId: node.id, iteration: i, noProgressStreak, progressSummary }, + 'loop_node.no_progress_streak_reached' + ); + await safeSendMessage(platform, conversationId, errorMsg, msgContext); + return { + state: 'failed', + output: lastIterationOutput, + error: errorMsg, + costUsd: loopTotalCostUsd, + }; + } + } + // Interactive loop gate — pause after every iteration where the AI did NOT emit the // completion signal. The user reviews the AI's output and provides feedback or approval. // On approval, the AI will emit the signal in the next iteration, exiting above. @@ -2382,6 +2620,9 @@ export async function executeDagWorkflow( fallbackModel: workflow.fallbackModel, betas: workflow.betas, sandbox: workflow.sandbox, + modelReasoningEffort: workflow.modelReasoningEffort, + webSearchMode: workflow.webSearchMode, + additionalDirectories: workflow.additionalDirectories, }; const layers = buildTopologicalLayers(workflow.nodes); const nodeOutputs = new Map(); @@ -2642,6 +2883,7 @@ export async function executeDagWorkflow( baseBranch, docsDir, nodeOutputs, + workflowLevelOptions, config, issueContext ); @@ -2939,11 +3181,11 @@ export async function executeDagWorkflow( const failMsg = `DAG workflow '${workflow.name}' completed with no successful nodes. ` + 'Check node conditions, trigger rules, and upstream failures.'; - // Note: nodeCounts not stored for failed runs — failWorkflowRun only stores { error }. - // Frontend guards with isValidNodeCounts so missing node_counts is safe. - await deps.store.failWorkflowRun(workflowRun.id, failMsg).catch((dbErr: Error) => { - getLog().error({ err: dbErr, workflowRunId: workflowRun.id }, 'dag_db_fail_failed'); - }); + await deps.store + .failWorkflowRun(workflowRun.id, failMsg, { node_counts: nodeCounts }) + .catch((dbErr: Error) => { + getLog().error({ err: dbErr, workflowRunId: workflowRun.id }, 'dag_db_fail_failed'); + }); await logWorkflowError(logDir, workflowRun.id, failMsg).catch((logErr: Error) => { getLog().error( { err: logErr, workflowRunId: workflowRun.id }, @@ -2957,6 +3199,18 @@ export async function executeDagWorkflow( workflowName: workflow.name, error: failMsg, }); + deps.store + .createWorkflowEvent({ + workflow_run_id: workflowRun.id, + event_type: 'workflow_failed', + data: { error: failMsg, node_counts: nodeCounts }, + }) + .catch((err: Error) => { + getLog().error( + { err, workflowRunId: workflowRun.id, eventType: 'workflow_failed' }, + 'workflow_event_persist_failed' + ); + }); emitterForFail.unregisterRun(workflowRun.id); await safeSendMessage(platform, conversationId, `\u274c ${failMsg}`, { workflowId: workflowRun.id, @@ -2970,12 +3224,48 @@ export async function executeDagWorkflow( .filter(([, o]) => o.state === 'failed') .map(([id, o]) => `'${id}': ${o.state === 'failed' ? o.error : 'unknown'}`) .join('; '); - await safeSendMessage( - platform, - conversationId, - `\u26a0\ufe0f Some DAG nodes failed: ${failedNodes}\nSuccessful nodes completed normally.`, - { workflowId: workflowRun.id } - ); + if (await skipIfStatusChanged('dag.skip_partial_fail_status_changed')) return; + const failMsg = + `DAG workflow '${workflow.name}' failed after partial execution. ` + + `Failed nodes: ${failedNodes}`; + await deps.store + .failWorkflowRun(workflowRun.id, failMsg, { + node_counts: nodeCounts, + failed_nodes: failedNodes, + }) + .catch((dbErr: Error) => { + getLog().error({ err: dbErr, workflowRunId: workflowRun.id }, 'dag_db_partial_fail_failed'); + }); + await logWorkflowError(logDir, workflowRun.id, failMsg).catch((logErr: Error) => { + getLog().error( + { err: logErr, workflowRunId: workflowRun.id }, + 'dag.workflow_error_log_write_failed' + ); + }); + const emitterForFail = getWorkflowEventEmitter(); + emitterForFail.emit({ + type: 'workflow_failed', + runId: workflowRun.id, + workflowName: workflow.name, + error: failMsg, + }); + deps.store + .createWorkflowEvent({ + workflow_run_id: workflowRun.id, + event_type: 'workflow_failed', + data: { error: failMsg, node_counts: nodeCounts, failed_nodes: failedNodes }, + }) + .catch((err: Error) => { + getLog().error( + { err, workflowRunId: workflowRun.id, eventType: 'workflow_failed' }, + 'workflow_event_persist_failed' + ); + }); + emitterForFail.unregisterRun(workflowRun.id); + await safeSendMessage(platform, conversationId, `\u274c ${failMsg}`, { + workflowId: workflowRun.id, + }); + return; } // Check if status was changed externally (e.g. cancelled) before marking complete. diff --git a/packages/workflows/src/defaults/bundled-defaults.test.ts b/packages/workflows/src/defaults/bundled-defaults.test.ts index e1e1cb5a30..d0200bad87 100644 --- a/packages/workflows/src/defaults/bundled-defaults.test.ts +++ b/packages/workflows/src/defaults/bundled-defaults.test.ts @@ -1,5 +1,10 @@ import { describe, it, expect } from 'bun:test'; -import { isBinaryBuild, BUNDLED_COMMANDS, BUNDLED_WORKFLOWS } from './bundled-defaults'; +import { + isBinaryBuild, + BUNDLED_COMMANDS, + BUNDLED_WORKFLOWS, + BUNDLED_SCRIPTS, +} from './bundled-defaults'; describe('bundled-defaults', () => { describe('isBinaryBuild', () => { @@ -16,6 +21,7 @@ describe('bundled-defaults', () => { it('should have all expected default commands', () => { const expectedCommands = [ 'archon-assist', + 'archon-assist-codex', 'archon-code-review-agent', 'archon-comment-quality-agent', 'archon-create-pr', @@ -42,7 +48,7 @@ describe('bundled-defaults', () => { expect(BUNDLED_COMMANDS).toHaveProperty(cmd); } - expect(Object.keys(BUNDLED_COMMANDS)).toHaveLength(21); + expect(Object.keys(BUNDLED_COMMANDS)).toHaveLength(22); }); it('should have non-empty content for all commands', () => { @@ -79,6 +85,7 @@ describe('bundled-defaults', () => { it('should have all expected default workflows', () => { const expectedWorkflows = [ 'archon-assist', + 'archon-assist-codex', 'archon-comprehensive-pr-review', 'archon-create-issue', 'archon-feature-development', @@ -89,6 +96,7 @@ describe('bundled-defaults', () => { 'archon-remotion-generate', 'archon-interactive-prd', 'archon-piv-loop', + 'archon-piv-loop-codex', 'archon-adversarial-dev', 'archon-workflow-builder', ]; @@ -97,7 +105,7 @@ describe('bundled-defaults', () => { expect(BUNDLED_WORKFLOWS).toHaveProperty(wf); } - expect(Object.keys(BUNDLED_WORKFLOWS)).toHaveLength(13); + expect(Object.keys(BUNDLED_WORKFLOWS)).toHaveLength(15); }); it('should have non-empty content for all workflows', () => { @@ -132,4 +140,13 @@ describe('bundled-defaults', () => { } }); }); + + describe('BUNDLED_SCRIPTS', () => { + it('should include the detect-project helper used by default workflows', () => { + expect(BUNDLED_SCRIPTS).toHaveProperty('detect-project'); + expect(BUNDLED_SCRIPTS['detect-project'].runtime).toBe('bun'); + expect(BUNDLED_SCRIPTS['detect-project'].extension).toBe('.ts'); + expect(BUNDLED_SCRIPTS['detect-project'].content).toContain('PROJECT_TYPE='); + }); + }); }); diff --git a/packages/workflows/src/defaults/bundled-defaults.ts b/packages/workflows/src/defaults/bundled-defaults.ts index a921171b9e..f815900b99 100644 --- a/packages/workflows/src/defaults/bundled-defaults.ts +++ b/packages/workflows/src/defaults/bundled-defaults.ts @@ -11,10 +11,11 @@ import { BUNDLED_IS_BINARY } from '@archon/paths'; // ============================================================================= -// Default Commands (21 total) +// Default Commands (22 total) // ============================================================================= import archonAssistCmd from '../../../../.archon/commands/defaults/archon-assist.md' with { type: 'text' }; +import archonAssistCodexCmd from '../../../../.archon/commands/defaults/archon-assist-codex.md' with { type: 'text' }; import archonCodeReviewAgentCmd from '../../../../.archon/commands/defaults/archon-code-review-agent.md' with { type: 'text' }; import archonCommentQualityAgentCmd from '../../../../.archon/commands/defaults/archon-comment-quality-agent.md' with { type: 'text' }; import archonCreatePrCmd from '../../../../.archon/commands/defaults/archon-create-pr.md' with { type: 'text' }; @@ -37,10 +38,19 @@ import archonValidatePrE2eMainCmd from '../../../../.archon/commands/defaults/ar import archonValidatePrReportCmd from '../../../../.archon/commands/defaults/archon-validate-pr-report.md' with { type: 'text' }; // ============================================================================= -// Default Workflows (13 total) +// Default Scripts +// ============================================================================= + +// @ts-expect-error Bun text import of a TypeScript source asset is valid at runtime, +// but TypeScript rejects the .ts extension in import-attribute mode. +import detectProjectScript from '../../../../.archon/scripts/detect-project.ts' with { type: 'text' }; + +// ============================================================================= +// Default Workflows (15 total) // ============================================================================= import archonAssistWf from '../../../../.archon/workflows/defaults/archon-assist.yaml' with { type: 'text' }; +import archonAssistCodexWf from '../../../../.archon/workflows/defaults/archon-assist-codex.yaml' with { type: 'text' }; import archonComprehensivePrReviewWf from '../../../../.archon/workflows/defaults/archon-comprehensive-pr-review.yaml' with { type: 'text' }; import archonCreateIssueWf from '../../../../.archon/workflows/defaults/archon-create-issue.yaml' with { type: 'text' }; import archonFeatureDevelopmentWf from '../../../../.archon/workflows/defaults/archon-feature-development.yaml' with { type: 'text' }; @@ -51,6 +61,7 @@ import archonValidatePrWf from '../../../../.archon/workflows/defaults/archon-va import archonRemotionGenerateWf from '../../../../.archon/workflows/defaults/archon-remotion-generate.yaml' with { type: 'text' }; import archonInteractivePrdWf from '../../../../.archon/workflows/defaults/archon-interactive-prd.yaml' with { type: 'text' }; import archonPivLoopWf from '../../../../.archon/workflows/defaults/archon-piv-loop.yaml' with { type: 'text' }; +import archonPivLoopCodexWf from '../../../../.archon/workflows/defaults/archon-piv-loop-codex.yaml' with { type: 'text' }; import archonAdversarialDevWf from '../../../../.archon/workflows/defaults/archon-adversarial-dev.yaml' with { type: 'text' }; import archonWorkflowBuilderWf from '../../../../.archon/workflows/defaults/archon-workflow-builder.yaml' with { type: 'text' }; @@ -63,6 +74,7 @@ import archonWorkflowBuilderWf from '../../../../.archon/workflows/defaults/arch */ export const BUNDLED_COMMANDS: Record = { 'archon-assist': archonAssistCmd, + 'archon-assist-codex': archonAssistCodexCmd, 'archon-code-review-agent': archonCodeReviewAgentCmd, 'archon-comment-quality-agent': archonCommentQualityAgentCmd, 'archon-create-pr': archonCreatePrCmd, @@ -85,11 +97,29 @@ export const BUNDLED_COMMANDS: Record = { 'archon-validate-pr-report': archonValidatePrReportCmd, }; +export interface BundledScriptAsset { + content: string; + runtime: 'bun' | 'uv'; + extension: '.ts' | '.js' | '.py'; +} + +/** + * Bundled default scripts - filename (without extension) -> runtime + content + */ +export const BUNDLED_SCRIPTS: Record = { + 'detect-project': { + content: detectProjectScript, + runtime: 'bun', + extension: '.ts', + }, +}; + /** * Bundled default workflows - filename (without extension) -> content */ export const BUNDLED_WORKFLOWS: Record = { 'archon-assist': archonAssistWf, + 'archon-assist-codex': archonAssistCodexWf, 'archon-comprehensive-pr-review': archonComprehensivePrReviewWf, 'archon-create-issue': archonCreateIssueWf, 'archon-feature-development': archonFeatureDevelopmentWf, @@ -100,6 +130,7 @@ export const BUNDLED_WORKFLOWS: Record = { 'archon-remotion-generate': archonRemotionGenerateWf, 'archon-interactive-prd': archonInteractivePrdWf, 'archon-piv-loop': archonPivLoopWf, + 'archon-piv-loop-codex': archonPivLoopCodexWf, 'archon-adversarial-dev': archonAdversarialDevWf, 'archon-workflow-builder': archonWorkflowBuilderWf, }; diff --git a/packages/workflows/src/loader.test.ts b/packages/workflows/src/loader.test.ts index 74b86a5977..15f2a2f5df 100644 --- a/packages/workflows/src/loader.test.ts +++ b/packages/workflows/src/loader.test.ts @@ -1622,6 +1622,8 @@ nodes: max_iterations: 10 fresh_context: true until_bash: "test -f done.txt" + progress_file: "$ARTIFACTS_DIR/progress.txt" + stuck_after_no_progress_iterations: 3 idle_timeout: 300000 ` ); @@ -1641,6 +1643,8 @@ nodes: expect(wf.nodes[0].loop.max_iterations).toBe(10); expect(wf.nodes[0].loop.fresh_context).toBe(true); expect(wf.nodes[0].loop.until_bash).toBe('test -f done.txt'); + expect(wf.nodes[0].loop.progress_file).toBe('$ARTIFACTS_DIR/progress.txt'); + expect(wf.nodes[0].loop.stuck_after_no_progress_iterations).toBe(3); expect(wf.nodes[0].idle_timeout).toBe(300000); } }); @@ -1671,6 +1675,8 @@ nodes: if (isLoopNode(wf.nodes[0])) { expect(wf.nodes[0].loop.fresh_context).toBe(false); expect(wf.nodes[0].loop.until_bash).toBeUndefined(); + expect(wf.nodes[0].loop.progress_file).toBeUndefined(); + expect(wf.nodes[0].loop.stuck_after_no_progress_iterations).toBeUndefined(); } }); diff --git a/packages/workflows/src/schemas/loop.ts b/packages/workflows/src/schemas/loop.ts index a21a6b2192..62a18675d0 100644 --- a/packages/workflows/src/schemas/loop.ts +++ b/packages/workflows/src/schemas/loop.ts @@ -15,6 +15,10 @@ export const loopNodeConfigSchema = z fresh_context: z.boolean().default(false), /** Optional bash script run after each iteration; exit 0 = complete. */ until_bash: z.string().optional(), + /** Optional progress file used to detect durable task completion across iterations. */ + progress_file: z.string().optional(), + /** Fail early when this many consecutive iterations make no durable progress. */ + stuck_after_no_progress_iterations: z.number().int().min(2).optional(), /** When true, pause between iterations for user input via /workflow approve. */ interactive: z.boolean().optional(), /** Message shown to user when paused (required when interactive is true). */ diff --git a/packages/workflows/src/script-discovery.test.ts b/packages/workflows/src/script-discovery.test.ts index 18bc9c58ef..49fdef1b5f 100644 --- a/packages/workflows/src/script-discovery.test.ts +++ b/packages/workflows/src/script-discovery.test.ts @@ -18,7 +18,11 @@ const mockLogger = { debug: mock(() => undefined), trace: mock(() => undefined), }; -mock.module('@archon/paths', () => ({ createLogger: mock(() => mockLogger) })); +mock.module('@archon/paths', () => ({ + createLogger: mock(() => mockLogger), + getDefaultScriptsPath: () => '/app/.archon/scripts', + BUNDLED_IS_BINARY: false, +})); import { discoverScripts, getDefaultScripts } from './script-discovery'; @@ -160,10 +164,15 @@ describe('discoverScripts', () => { }); describe('getDefaultScripts', () => { - test('returns an empty Map', () => { + test('returns the bundled detect-project script', () => { const defaults = getDefaultScripts(); expect(defaults).toBeInstanceOf(Map); - expect(defaults.size).toBe(0); + expect(defaults.size).toBeGreaterThan(0); + const detectProject = defaults.get('detect-project'); + expect(detectProject).toBeDefined(); + expect(detectProject?.runtime).toBe('bun'); + expect(detectProject?.path).toBe('[bundled:detect-project]'); + expect(detectProject?.content).toContain('function detectProject()'); }); test('returns a new Map each call', () => { diff --git a/packages/workflows/src/script-discovery.ts b/packages/workflows/src/script-discovery.ts index ce74b1a3bb..69033f335d 100644 --- a/packages/workflows/src/script-discovery.ts +++ b/packages/workflows/src/script-discovery.ts @@ -5,8 +5,9 @@ * from the file extension: .ts/.js -> bun, .py -> uv. */ import { readdir, stat } from 'fs/promises'; -import { join, basename, extname } from 'path'; -import { createLogger } from '@archon/paths'; +import { resolve, join, basename, extname } from 'path'; +import { createLogger, getDefaultScriptsPath } from '@archon/paths'; +import { BUNDLED_SCRIPTS, isBinaryBuild } from './defaults/bundled-defaults'; /** Normalize path separators to forward slashes for cross-platform consistency */ function normalizeSep(p: string): string { @@ -30,6 +31,15 @@ export interface ScriptDefinition { runtime: ScriptRuntime; } +/** A bundled script that exists only as embedded content. */ +export interface BundledScriptDefinition extends ScriptDefinition { + content: string; + bundled: true; +} + +/** A resolved script can come from the repo or from bundled defaults. */ +export type ResolvedScriptDefinition = ScriptDefinition | BundledScriptDefinition; + /** Supported file extensions and their runtimes */ const EXTENSION_RUNTIME_MAP: Record = { '.ts': 'bun', @@ -120,9 +130,52 @@ export async function discoverScripts(dir: string): Promise { + const defaults = new Map(); + + for (const [name, asset] of Object.entries(BUNDLED_SCRIPTS)) { + defaults.set(name, { + name, + path: `[bundled:${name}]`, + runtime: asset.runtime, + content: asset.content, + bundled: true, + }); + } + + return defaults; +} + +/** + * Discover default scripts shipped with Archon. + * + * In binary mode, scripts come from the embedded bundle. In dev mode, scripts are + * read from the app's own `.archon/scripts/` directory so default workflows can + * reference default scripts without each target repo copying them in. */ -export function getDefaultScripts(): Map { - return new Map(); +export async function discoverDefaultScripts(): Promise> { + if (isBinaryBuild()) { + return getDefaultScripts(); + } + + return discoverScripts(getDefaultScriptsPath()); +} + +/** + * Resolve a named script using repo-local scripts first, then Archon defaults. + */ +export async function resolveNamedScript( + cwd: string, + scriptName: string +): Promise { + const repoScripts = await discoverScripts(resolve(cwd, '.archon', 'scripts')); + const repoScript = repoScripts.get(scriptName); + if (repoScript) { + return repoScript; + } + + const defaultScripts = await discoverDefaultScripts(); + return defaultScripts.get(scriptName) ?? null; } diff --git a/packages/workflows/src/store.ts b/packages/workflows/src/store.ts index 9d9a85e275..c3cb045af9 100644 --- a/packages/workflows/src/store.ts +++ b/packages/workflows/src/store.ts @@ -54,7 +54,7 @@ export interface IWorkflowStore { updateWorkflowActivity(id: string): Promise; getWorkflowRunStatus(id: string): Promise; completeWorkflowRun(id: string, metadata?: Record): Promise; - failWorkflowRun(id: string, error: string): Promise; + failWorkflowRun(id: string, error: string, metadata?: Record): Promise; pauseWorkflowRun(id: string, approvalContext: ApprovalContext): Promise; cancelWorkflowRun(id: string): Promise; diff --git a/packages/workflows/src/validator.test.ts b/packages/workflows/src/validator.test.ts index 9a8c8979ba..cb3656507c 100644 --- a/packages/workflows/src/validator.test.ts +++ b/packages/workflows/src/validator.test.ts @@ -329,6 +329,15 @@ describe('validateWorkflowResources — script nodes', () => { expect(scriptErrors).toHaveLength(0); }); + test('no error when named bun script exists in Archon defaults', async () => { + const workflow = makeWorkflow('test', [ + { id: 'step1', script: 'detect-project', runtime: 'bun' } as unknown as DagNode, + ]); + const issues = await validateWorkflowResources(workflow, tmpDir); + const scriptErrors = issues.filter(i => i.level === 'error' && i.field === 'script'); + expect(scriptErrors).toHaveLength(0); + }); + test('no error for inline bun script (no file lookup needed)', async () => { const workflow = makeWorkflow('test', [ { diff --git a/packages/workflows/src/validator.ts b/packages/workflows/src/validator.ts index be0011763c..9a9eef8d98 100644 --- a/packages/workflows/src/validator.ts +++ b/packages/workflows/src/validator.ts @@ -31,7 +31,7 @@ function getLog(): ReturnType { import { isScriptNode } from './schemas'; import type { WorkflowDefinition, DagNode } from './schemas'; import type { ScriptRuntime } from './script-discovery'; -import { discoverScripts } from './script-discovery'; +import { discoverDefaultScripts, discoverScripts, resolveNamedScript } from './script-discovery'; import { isInlineScript } from './executor-shared'; // ============================================================================= @@ -410,23 +410,26 @@ export async function validateWorkflowResources( if (isScriptNode(node)) { const script = node.script; - // Named script: validate file exists in .archon/scripts/ + // Named script: validate file exists in repo scripts or Archon defaults if (!isInlineScript(script)) { - const scriptsDir = resolve(cwd, '.archon', 'scripts'); - const extensions = node.runtime === 'uv' ? ['.py'] : ['.ts', '.js']; - const existsResults = await Promise.all( - extensions.map(ext => fileExists(join(scriptsDir, `${script}${ext}`))) - ); - const scriptExists = existsResults.some(Boolean); - - if (!scriptExists) { + const resolvedScript = await resolveNamedScript(cwd, script); + + if (!resolvedScript) { issues.push({ level: 'error', nodeId: node.id, field: 'script', - message: `Named script '${script}' not found in .archon/scripts/`, + message: `Named script '${script}' not found in .archon/scripts/ or Archon defaults`, hint: `Create .archon/scripts/${script}.${node.runtime === 'uv' ? 'py' : 'ts'} with your script code`, }); + } else if (resolvedScript.runtime !== node.runtime) { + issues.push({ + level: 'error', + nodeId: node.id, + field: 'runtime', + message: `Script '${script}' resolves to runtime '${resolvedScript.runtime}', but node requests '${node.runtime}'`, + hint: `Update the node runtime or use a ${node.runtime === 'uv' ? '.py' : '.ts'} implementation of '${script}'`, + }); } } @@ -548,13 +551,22 @@ export interface ScriptValidationResult { export async function discoverAvailableScripts( cwd: string ): Promise<{ name: string; path: string; runtime: ScriptRuntime }[]> { - const scriptsDir = resolve(cwd, '.archon', 'scripts'); try { - const scripts = await discoverScripts(scriptsDir); - return [...scripts.values()].map(s => ({ name: s.name, path: s.path, runtime: s.runtime })); + const scripts = new Map(); + + for (const script of (await discoverDefaultScripts()).values()) { + scripts.set(script.name, { name: script.name, path: script.path, runtime: script.runtime }); + } + + const repoScripts = await discoverScripts(resolve(cwd, '.archon', 'scripts')); + for (const script of repoScripts.values()) { + scripts.set(script.name, { name: script.name, path: script.path, runtime: script.runtime }); + } + + return [...scripts.values()]; } catch (error) { const err = error as Error; - getLog().warn({ err, scriptsDir }, 'script_discovery_failed'); + getLog().warn({ err, cwd }, 'script_discovery_failed'); return []; } } @@ -567,40 +579,26 @@ export async function validateScript( cwd: string ): Promise { const issues: ValidationIssue[] = []; - const scriptsDir = resolve(cwd, '.archon', 'scripts'); - - // Find the script file (any supported extension) - const allExtensions = ['.ts', '.js', '.py']; - let foundPath: string | null = null; - let detectedRuntime: ScriptRuntime | null = null; - - for (const ext of allExtensions) { - const candidate = join(scriptsDir, `${scriptName}${ext}`); - if (await fileExists(candidate)) { - foundPath = candidate; - detectedRuntime = ext === '.py' ? 'uv' : 'bun'; - break; - } - } + const resolvedScript = await resolveNamedScript(cwd, scriptName); - if (!foundPath || !detectedRuntime) { + if (!resolvedScript) { issues.push({ level: 'error', field: 'file', - message: `Script '${scriptName}' not found in .archon/scripts/`, + message: `Script '${scriptName}' not found in .archon/scripts/ or Archon defaults`, hint: `Create .archon/scripts/${scriptName}.ts (bun) or .archon/scripts/${scriptName}.py (uv)`, }); return { scriptName, valid: false, issues }; } // Check runtime availability - const runtimeAvailable = await checkRuntimeAvailable(detectedRuntime); + const runtimeAvailable = await checkRuntimeAvailable(resolvedScript.runtime); if (!runtimeAvailable) { issues.push({ level: 'warning', field: 'runtime', - message: `Runtime '${detectedRuntime}' is not available on PATH`, - hint: RUNTIME_INSTALL_HINTS[detectedRuntime], + message: `Runtime '${resolvedScript.runtime}' is not available on PATH`, + hint: RUNTIME_INSTALL_HINTS[resolvedScript.runtime], }); }