From f3110fac0d442a58bc782e45dbedbbdd2f215385 Mon Sep 17 00:00:00 2001 From: matzls Date: Sat, 11 Apr 2026 11:03:16 +0200 Subject: [PATCH 01/21] feat(orchestrator): persist telegram user and assistant turns in DB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Chat platform adapters (Telegram, Slack, Discord) in @archon/adapters are pure transport and cannot call messageDb directly. Until now, only the Web adapter's PersistenceBuffer and the HTTP routes persisted messages, leaving telegram conversations with rows in remote_agent_conversations but zero rows in remote_agent_messages. The Web UI then rendered these conversations as empty. Add four persistence hooks inside handleMessage, gated strictly on platform.getPlatformType() === 'telegram' so the web path is completely untouched: 1. User message persistence after conversation creation + title generation but BEFORE the natural-language approval gate, so approval responses are captured. !message.startsWith('/') excludes deterministic slash commands. 2. Stream-mode assistant persistence after parseOrchestratorCommands, inside the "no retract" branch, so retracted /invoke-workflow text is never saved (matches Web's MessagePersistence.retractLastSegment semantics). 3. Batch-mode assistant persistence after platform.sendMessage succeeds, with the same retract guard. 4. Top-level catch persistence for error responses, so orphan user rows without assistant counterparts can't appear in the Web UI view. The conversation variable is hoisted out of the inner try block so the catch handler can reference it. All persistence errors are logged and swallowed — a DB hiccup must not break the user-facing Telegram reply. Gate is strict 'telegram' for MVP. Broadening to Slack/Discord/GitHub will require auditing those adapters' webhook replay behavior first. Known MVP limitations (will file as follow-ups): - Tool-call metadata not captured for telegram (web buffer still owns that) - Workflow dispatch progress messages from dag-executor not captured - Non-deterministic slash commands also excluded by the coarse startsWith('/') gate (acceptable — chat clients don't send ad-hoc slash commands) Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/orchestrator/orchestrator-agent.ts | 80 ++++++++++++++++++- 1 file changed, 78 insertions(+), 2 deletions(-) diff --git a/packages/core/src/orchestrator/orchestrator-agent.ts b/packages/core/src/orchestrator/orchestrator-agent.ts index 97d989f47c..07f5678c18 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.ts @@ -20,6 +20,7 @@ import { ConversationNotFoundError } from '../types'; import * as db from '../db/conversations'; import * as codebaseDb from '../db/codebases'; import * as sessionDb from '../db/sessions'; +import * as messageDb from '../db/messages'; import * as commandHandler from '../handlers/command-handler'; import { formatToolCall } from '@archon/workflows/utils/tool-formatter'; import { classifyAndFormatError } from '../utils/error-formatter'; @@ -502,11 +503,15 @@ export async function handleMessage( ): Promise { const { issueContext, threadContext, parentConversationId, isolationHints, attachedFiles } = context ?? {}; + // Hoisted so the top-level catch block can persist error messages for + // non-web platforms (telegram, etc.) — see the catch handler at the end + // of handleMessage for the rationale. + let conversation: Conversation | undefined; try { getLog().debug({ conversationId }, 'orchestrator_message_received'); // 1. Get/create conversation and inherit thread context - let conversation = await db.getOrCreateConversation( + conversation = await db.getOrCreateConversation( platform.getPlatformType(), conversationId, undefined, @@ -529,6 +534,27 @@ export async function handleMessage( ); } + // 1d. Persist inbound user message for non-web chat platforms (currently + // telegram only; broaden once slack/discord/github webhook replay is + // audited). Web's PersistenceBuffer and the HTTP routes already own the + // web path. Deterministic slash commands (/help, /status, etc.) skip + // this by design — they're ephemeral utility chatter, not conversation + // content. Gated before the approval-routing block on purpose so that + // natural-language approval responses ARE captured. + if (platform.getPlatformType() === 'telegram' && !message.startsWith('/')) { + try { + await messageDb.addMessage(conversation.id, 'user', message, { + platformType: 'telegram', + }); + } catch (persistErr) { + getLog().error( + { err: toError(persistErr), conversationId: conversation.id }, + 'telegram_user_message_persistence_failed' + ); + // Swallow — persistence failure must not break the user-facing reply. + } + } + // Natural-language approval routing — if a workflow is paused in this // conversation, treat any non-slash message as the approval response. if (!message.startsWith('/')) { @@ -809,6 +835,24 @@ export async function handleMessage( } catch (sendError) { getLog().error({ err: toError(sendError), conversationId }, 'error_notification_failed'); } + // Persist the error response as the assistant turn for non-web chat + // platforms. Without this, the Web UI view of a telegram conversation + // would show an orphan user row with no assistant counterpart whenever + // the orchestrator throws. Skipped silently if the conversation lookup + // failed before reaching getOrCreateConversation. + if (conversation && platform.getPlatformType() === 'telegram') { + try { + await messageDb.addMessage(conversation.id, 'assistant', userMessage, { + platformType: 'telegram', + error: true, + }); + } catch (persistErr) { + getLog().error( + { err: toError(persistErr), conversationId: conversation.id }, + 'telegram_error_message_persistence_failed' + ); + } + } } } @@ -925,7 +969,22 @@ async function handleStreamMode( return; } - // Text was already streamed — nothing more to send + // Text was already streamed — nothing more to send. + // Persist the assistant turn for non-web chat platforms (telegram today). + // Gated AFTER the retract branches above so retracted text is never saved, + // matching the semantics MessagePersistence.retractLastSegment uses on web. + if (platform.getPlatformType() === 'telegram' && fullResponse.trim().length > 0) { + try { + await messageDb.addMessage(conversation.id, 'assistant', fullResponse, { + platformType: 'telegram', + }); + } catch (persistErr) { + getLog().error( + { err: toError(persistErr), conversationId: conversation.id }, + 'telegram_assistant_message_persistence_failed' + ); + } + } } // ─── Batch Mode ───────────────────────────────────────────────────────────── @@ -1061,6 +1120,23 @@ async function handleBatchMode( // No orchestrator commands — send the clean response getLog().debug({ messageLength: finalMessage.length }, 'sending_final_message'); await platform.sendMessage(conversationId, finalMessage); + + // Persist the assistant turn for non-web chat platforms (telegram today). + // Placed after the successful send so we don't record messages the user + // never actually saw. Same retract semantics as stream mode — the two + // early returns above (workflowInvocation, projectRegistration) skip this. + if (platform.getPlatformType() === 'telegram' && finalMessage.trim().length > 0) { + try { + await messageDb.addMessage(conversation.id, 'assistant', finalMessage, { + platformType: 'telegram', + }); + } catch (persistErr) { + getLog().error( + { err: toError(persistErr), conversationId: conversation.id }, + 'telegram_assistant_message_persistence_failed' + ); + } + } } // ─── Orchestrator Command Handlers ────────────────────────────────────────── From 792fbdb4f47bf938bd48ab972581c57d971271ed Mon Sep 17 00:00:00 2001 From: matzls Date: Sat, 11 Apr 2026 11:03:38 +0200 Subject: [PATCH 02/21] feat(web): platform-aware reply hint for cross-platform conversations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Web UI disables the message input for any conversation whose platform_type is not 'web' because Scope B (bidirectional bridging) isn't shipped yet. The old disabledReason string — "Continuing chats from other platforms in the Web UI is coming soon" — was both vague and increasingly misleading now that Telegram conversations render their full history in the Web UI. Replace the hardcoded string with a platform keyed lookup map so each platform gets a clear "reply from the originating app" hint. The disable condition itself is unchanged; this is pure copy + a small constant. Only telegram is functionally wired up (persistence hooks land in a sibling commit); slack/discord/github entries are forward-compatible and take effect as soon as persistence is broadened to those platforms. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../web/src/components/chat/ChatInterface.tsx | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/web/src/components/chat/ChatInterface.tsx b/packages/web/src/components/chat/ChatInterface.tsx index fca7698390..581d8005ef 100644 --- a/packages/web/src/components/chat/ChatInterface.tsx +++ b/packages/web/src/components/chat/ChatInterface.tsx @@ -37,6 +37,18 @@ import { import { useProject } from '@/contexts/ProjectContext'; import { ensureUtc } from '@/lib/format'; +/** + * Human-readable reply hints for conversations whose input is disabled because + * the conversation originated on another platform. Web UI input remains + * disabled for these platforms — users reply from the originating app. + */ +const PLATFORM_REPLY_HINTS: Record = { + telegram: 'This conversation is running in Telegram — reply from the Telegram app.', + slack: 'This conversation is running in Slack — reply there.', + discord: 'This conversation is running in Discord — reply there.', + github: 'This conversation is running in a GitHub issue — reply there.', +}; + function mapMessageRow(row: MessageResponse): ChatMessage { let meta: { toolCalls?: { @@ -787,7 +799,8 @@ export function ChatInterface({ conversationId }: ChatInterfaceProps): React.Rea } disabledReason={ currentConv != null && currentConv.platform_type !== 'web' - ? 'Continuing chats from other platforms in the Web UI is coming soon' + ? (PLATFORM_REPLY_HINTS[currentConv.platform_type] ?? + 'This conversation is running on another platform — reply from there.') : undefined } /> From ac5b07ae2cf2c45dfd9a16061d4dc9c6a5d0b061 Mon Sep 17 00:00:00 2001 From: matzls Date: Sat, 11 Apr 2026 11:04:05 +0200 Subject: [PATCH 03/21] test(orchestrator): cover telegram user-message persistence path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a focused test suite for the telegram persistence gate added to handleMessage. Covers the three load-bearing cases on the user-message side of the gate: 1. Natural-language telegram messages persist exactly one row with role 'user', the DB conversation id, the raw message text, and metadata { platformType: 'telegram' }. 2. Deterministic slash commands (/help) skip persistence entirely — neither user nor assistant rows are created. 3. Web-platform conversations do NOT trigger the centralized path, so web's existing MessagePersistence buffer still owns that flow. Assistant-message persistence hooks (inside handleStreamMode, handleBatchMode, and the top-level catch) require mocking sendQuery to yield actual content, which needs a more elaborate mock setup than the existing test file provides. Tracking that as a follow-up rather than blocking the MVP on it — the user-persistence path is the primary new logic and is covered here. A new mock.module('../db/messages', ...) is added near the existing DB mocks so that orchestrator-agent.ts's new messageDb import does not try to open a real DB connection. orchestrator-agent.test.ts runs in its own bun test invocation per packages/core/package.json, so the new mock does not pollute sibling orchestrator tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../orchestrator/orchestrator-agent.test.ts | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/packages/core/src/orchestrator/orchestrator-agent.test.ts b/packages/core/src/orchestrator/orchestrator-agent.test.ts index 70080cc01a..1ea40c129c 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.test.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.test.ts @@ -125,6 +125,15 @@ mock.module('../db/workflow-events', () => ({ createWorkflowEvent: mockCreateWorkflowEvent, })); +// Mock db/messages so handleMessage persistence hooks (for non-web platforms) +// don't try to open a real DB connection. addMessage is the only function we +// exercise in these tests. +const mockAddMessage = mock(() => Promise.resolve({} as unknown)); +mock.module('../db/messages', () => ({ + addMessage: mockAddMessage, + listMessages: mock(() => Promise.resolve([])), +})); + mock.module('../config/config-loader', () => ({ loadConfig: mock(() => Promise.resolve({})), })); @@ -1407,3 +1416,110 @@ describe('discoverAllWorkflows — merge repo workflows over global', () => { expect(mockDiscoverWorkflowsWithConfig).toHaveBeenCalledTimes(2); }); }); + +// ─── Telegram user-message persistence ──────────────────────────────────────── + +/** + * These tests cover the `platform.getPlatformType() === 'telegram'` persistence + * gate added to handleMessage. They verify that: + * 1. natural-language telegram messages are persisted with role='user' + * 2. deterministic slash commands skip persistence (stay ephemeral) + * 3. web conversations do NOT trigger the centralized path (web's existing + * PersistenceBuffer still owns the web flow) + * + * Assistant-message persistence hooks (inside handleStreamMode / handleBatchMode + * and in the top-level catch) are not covered here — they require mocking + * sendQuery to yield actual content, which is out of scope for this test batch. + * Track as a follow-up. + */ +function makeTelegramPlatform(): IPlatformAdapter { + return { + sendMessage: mock(() => Promise.resolve()), + ensureThread: mock((id: string) => Promise.resolve(id)), + getStreamingMode: mock(() => 'stream' as const), + getPlatformType: mock(() => 'telegram'), + start: mock(() => Promise.resolve()), + stop: mock(() => {}), + }; +} + +describe('telegram user-message persistence', () => { + beforeEach(() => { + mockAddMessage.mockClear(); + mockGetOrCreateConversation.mockReset(); + mockGetCodebase.mockReset(); + mockListCodebases.mockReset(); + mockDiscoverWorkflowsWithConfig.mockReset(); + mockParseCommand.mockReset(); + + mockGetOrCreateConversation.mockImplementation(() => Promise.resolve(null)); + mockGetCodebase.mockImplementation(() => Promise.resolve(null)); + mockListCodebases.mockImplementation(() => Promise.resolve([])); + mockDiscoverWorkflowsWithConfig.mockImplementation(() => + Promise.resolve({ workflows: [], errors: [] }) + ); + }); + + test('natural-language telegram message is persisted as user turn', async () => { + const conversation = makeConversation({ + id: 'telegram-conv-db-id', + platform_type: 'telegram', + platform_conversation_id: '8579582275', + title: null, + }); + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(conversation)); + + const platform = makeTelegramPlatform(); + await handleMessage(platform, '8579582275', 'What does the orchestrator do?'); + + // user message persisted exactly once + expect(mockAddMessage).toHaveBeenCalled(); + const userCalls = mockAddMessage.mock.calls.filter(c => c[1] === 'user'); + expect(userCalls).toHaveLength(1); + expect(userCalls[0]?.[0]).toBe('telegram-conv-db-id'); + expect(userCalls[0]?.[2]).toBe('What does the orchestrator do?'); + expect(userCalls[0]?.[3]).toEqual({ platformType: 'telegram' }); + }); + + test('deterministic slash command (/help) skips persistence', async () => { + const conversation = makeConversation({ + id: 'telegram-conv-db-id', + platform_type: 'telegram', + platform_conversation_id: '8579582275', + title: null, + }); + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(conversation)); + mockParseCommand.mockReturnValueOnce({ command: 'help', args: [] }); + mockHandleCommand.mockReturnValueOnce( + Promise.resolve({ success: true, message: 'help text', workflow: undefined }) + ); + + const platform = makeTelegramPlatform(); + await handleMessage(platform, '8579582275', '/help'); + + // /help must not persist anything — neither user nor assistant row + const userCalls = mockAddMessage.mock.calls.filter(c => c[1] === 'user'); + const assistantCalls = mockAddMessage.mock.calls.filter(c => c[1] === 'assistant'); + expect(userCalls).toHaveLength(0); + expect(assistantCalls).toHaveLength(0); + }); + + test('web platform does not trigger centralized persistence path', async () => { + const conversation = makeConversation({ + id: 'web-conv-db-id', + platform_type: 'web', + platform_conversation_id: 'web-test-1', + title: 'Web Test', + }); + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(conversation)); + + // Default makePlatform() returns platform_type='web' + const platform = makePlatform(); + await handleMessage(platform, 'web-test-1', 'Hello from the web UI'); + + // Centralized path is gated to telegram only — web is handled by its own + // PersistenceBuffer at the server layer, not by this code path. + const userCalls = mockAddMessage.mock.calls.filter(c => c[1] === 'user'); + expect(userCalls).toHaveLength(0); + }); +}); From 9aaec85c716a8a5f1dcfd06a0310a5318395c795 Mon Sep 17 00:00:00 2001 From: matzls Date: Sat, 11 Apr 2026 11:04:27 +0200 Subject: [PATCH 04/21] docs(claude): document fork/upstream integration workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This repo is a clone of coleam00/Archon and will evolve upstream as the project moves through beta. To keep local customizations persistent across upstream releases without merge chaos, the working copy is set up fork-first: origin → matzls/Archon (push access), upstream → coleam00/Archon (read-only). Add a "Fork & Upstream Integration" section to CLAUDE.md next to the existing Git Workflow content so future sessions have a single grounded reference for: - Which remote is which and what dev tracks - Where different customization types belong (personal config vs. upstreamable code changes vs. personal code changes) - The exact commands to integrate upstream releases (fetch + ff merge + rebase feature branches) - The exact commands to contribute back via gh pr create Intentionally short — this is routing guidance, not a git tutorial. Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index f38cb29a98..6bcfda764f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -47,6 +47,38 @@ - Workspaces automatically sync with origin before worktree creation (ensures latest code) - **NEVER run `git clean -fd`** - it permanently deletes untracked files (use `git checkout .` instead) +**Fork & Upstream Integration (local clone only)** + +This working copy is a fork of `coleam00/Archon`. Remotes are set up fork-first so local customizations survive upstream releases without merge chaos: + +- `origin` → `https://github.com/matzls/Archon.git` (your fork, push access) +- `upstream` → `https://github.com/coleam00/Archon.git` (Cole's repo, read-only) +- `dev` tracks `upstream/dev` — keep it a clean mirror of upstream and never commit directly to it + +**Where different customizations belong:** + +1. **Personal config** — already outside git: `~/.archon/.env`, `~/.archon/config.yaml`, `~/.archon/archon.db`, per-target-repo `.archon/commands/` and `.archon/workflows/`. Upstream never touches these. +2. **Broadly useful code changes** — open a PR from a feature branch on the fork to `coleam00/Archon:dev`. If merged, zero ongoing maintenance. +3. **Personal code changes** — live on a feature branch on the fork; rebase on `upstream/dev` when pulling new releases. + +**Integrating upstream releases:** + +```bash +git fetch upstream +git checkout dev +git merge --ff-only upstream/dev # dev stays a clean mirror of upstream +git checkout feature/my-branch +git rebase dev # replay local commits on top of new dev +git push --force-with-lease origin feature/my-branch +``` + +**Contributing back upstream:** + +```bash +git push -u origin feature/my-branch +gh pr create --repo coleam00/Archon --base dev --head matzls:feature/my-branch +``` + ## Engineering Principles These are implementation constraints, not slogans. Apply them by default. From 73efefd537407e671a66c59212b999767ab779da Mon Sep 17 00:00:00 2001 From: matzls Date: Sun, 12 Apr 2026 08:36:50 +0200 Subject: [PATCH 05/21] feat(workflows): add archon-piv-loop-codex Codex/GPT variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 9-node DAG PIV loop tuned for Codex behavioral tendencies: numbered SIGNAL EMISSION CONTRACTs, task-scoped implement loop (no repo-wide validators mid-task), pre-existing failure tolerance in code-review, per-file git staging, and tightened COMPLETE signal. Validated end-to-end on my-second-brain-build (Python/Obsidian vault) with 32 pre-existing ruff violations — workflow correctly scoped fixes to branch-introduced issues only. Also add root-level artifacts/ to .gitignore (workflow runtime output). Co-Authored-By: Claude Opus 4.6 (1M context) --- .archon/workflows/archon-piv-loop-codex.yaml | 1186 ++++++++++++++++++ .gitignore | 1 + 2 files changed, 1187 insertions(+) create mode 100644 .archon/workflows/archon-piv-loop-codex.yaml diff --git a/.archon/workflows/archon-piv-loop-codex.yaml b/.archon/workflows/archon-piv-loop-codex.yaml new file mode 100644 index 0000000000..a360759657 --- /dev/null +++ b/.archon/workflows/archon-piv-loop-codex.yaml @@ -0,0 +1,1186 @@ +name: archon-piv-loop-codex +description: | + Use when: User wants guided Plan-Implement-Validate development with human-in-the-loop, + using Codex/GPT models instead of Claude. + Triggers: "piv codex", "codex piv", "piv loop codex", "gpt piv", "piv with gpt", + "guided development codex", "structured development codex". + NOT for: Claude-based PIV loops (use archon-piv-loop instead). + NOT for: Autonomous implementation without planning (use archon-feature-development). + NOT for: PRD creation (use archon-interactive-prd). + NOT for: Ralph story-based implementation (use archon-ralph-dag). + + Codex/GPT variant of archon-piv-loop — same 4-phase Plan-Implement-Validate methodology, + with prompt refinements tuned for Codex's behavioral tendencies: + * Explicit numbered SIGNAL EMISSION CONTRACTs in place of Claude-style CRITICAL framing + * Hard stop discipline for the implement loop (Phase 4.5 STOP, "SELECT EXACTLY ONE") + * Per-file staging instead of `git add -A` to prevent task-scope leaks + * Explicit negative guardrails on the implement loop's COMPLETE signal + * Tightened refine-plan approval branch (no tool use on approval) + + Interactive PIV loop workflow — the foundational AI coding methodology: + 1. EXPLORE: Iterative conversation with human to understand the problem (arbitrary rounds) + 2. PLAN: Create structured plan -> iterative review & revision (arbitrary rounds) + 3. IMPLEMENT: Autonomous task-by-task implementation from plan (Ralph loop, hard stop per task) + 4. VALIDATE: Automated code review -> iterative human feedback & fixes (arbitrary rounds) + + The PIV loop comes AFTER a PRD exists. Each PIV loop focuses on ONE granular feature or bug fix. + Input: A description of what to build, a path to an existing plan, or a GitHub issue number. + +provider: codex +interactive: true + +nodes: + # ═══════════════════════════════════════════════════════════════ + # PHASE 1: EXPLORE — Iterative exploration with human + # Understand the idea, explore the codebase, converge on approach + # Loops until the user says they're ready to create the plan. + # ═══════════════════════════════════════════════════════════════ + + - id: explore + loop: + prompt: | + # PIV Loop — Exploration + + You are a senior engineering partner in an iterative exploration session. + Your goal: DEEPLY UNDERSTAND what to build before any code is written. + + **User's request**: $ARGUMENTS + **User's latest input** (empty on first iteration): "$LOOP_USER_INPUT" + + --- + + ## If this is the FIRST iteration (no user input yet): + + **Important**: Do Steps 1-4 in order. Do not jump to questions until you have + shown the "## What I Understand" / "## What Already Exists" / "## Initial + Architecture Thoughts" sections first. + + ### Step 1: Parse the Input + + Determine what the user provided: + + **If it's a file path** (ends in `.md`, `.plan.md`, or `.prd.md`): + - Read the file + - If it's an existing plan → summarize it and ask if they want to refine or proceed + - If it's a PRD → identify the specific phase/feature to focus on + + **If it's a GitHub issue** (`#123` format): + - Fetch it: `gh issue view {number} --json title,body,labels,comments` + - Summarize the issue context + + **If it's free text**: + - This is a feature idea or bug description. Use it directly. + + ### Step 2: Explore the Codebase + + Before asking questions, DO YOUR HOMEWORK: + + 1. **Read CLAUDE.md** — understand project conventions, architecture, and constraints + 2. **Search for related code** — find existing implementations similar to what the user wants + 3. **Read key files** — understand the current state of code the user wants to change + 4. **Check recent git history** — `git log --oneline -20` for recent changes in the area + + ### Step 3: Present Your Understanding + + ``` + ## What I Understand + + You want to: {restated understanding in 2-3 sentences} + + ## What Already Exists + + - {file:line} — {what it does and how it relates} + - {file:line} — {what it does and how it relates} + - {pattern/component} — {how it could be extended or reused} + + ## Initial Architecture Thoughts + + Based on what exists, I'm thinking: + - {approach 1 — extend existing X} + - {approach 2 — if approach 1 doesn't work} + - {key architectural decision that needs your input} + ``` + + ### Step 4: Ask Targeted Questions + + Ask 4-6 questions focused on DECISIONS, not information gathering: + - Scope boundaries, architecture preferences, tech decisions + - Constraints, existing code extension vs fresh build, testing expectations + - Reference actual code you found — don't ask generic questions + + --- + + ## If the user has provided input (subsequent iterations): + + ### Step 1: Process Their Response + + Read their answers carefully. Identify: + - Decisions they've made + - Areas they want you to explore further + - Questions they asked YOU back (answer these with evidence!) + + ### Step 2: Do Targeted Research + + Based on their response: + - If they mentioned specific technologies → research best practices + - If they pointed you to specific code → read it thoroughly + - If they asked you to explore an area → do a thorough investigation + - If they made architecture decisions → validate against the codebase + + ### Step 3: Present Updated Understanding + + Show what you learned, answer their questions with file:line references, + and present your refined architecture recommendation. + + ### Step 4: Converge or Continue + + **If there are still important open questions:** + Ask 2-4 focused questions about remaining ambiguities. + + **If the picture is clear and you have enough to create a plan:** + Present a final implementation summary: + + ``` + ## Implementation Summary + + ### What We're Building + {Clear, specific description} + + ### Scope Boundary + - IN: {what's included} + - OUT: {what's explicitly excluded} + + ### Architecture + - {key decisions} + + ### Files That Will Change + - `{file}` — {what changes and why} + + ### Success Criteria + - [ ] {specific, testable criterion} + - [ ] All validation passes + + ### Key Risks + - {risk — and mitigation} + ``` + + Then tell the user: "I have a clear picture. Say **ready** and I'll create + the structured implementation plan, or share any final thoughts." + + **SIGNAL EMISSION CONTRACT** — emit `PLAN_READY` only when + ALL of these are true: + 1. The user's LATEST message (not any earlier one) contains an explicit + approval phrase from this list: "ready", "create the plan", "let's go", + "proceed", "I'm done" + 2. The user's message does NOT contain a question, a new request, or + additional feedback to explore + 3. You have addressed everything the user asked in previous turns + + If ANY condition is false, DO NOT emit the tag. Continue the conversation + instead. When you reference this contract in your own output, write "the + PLAN_READY signal" (no angle brackets) so you do not trigger it accidentally. + until: PLAN_READY + max_iterations: 15 + interactive: true + gate_message: | + Answer the questions above, ask me to explore specific areas, + or say "ready" when you're satisfied with the exploration. + + # ═══════════════════════════════════════════════════════════════ + # PHASE 1b: DETECT — Discover project validation commands + # Runs once, output referenced by every downstream node so the + # workflow is portable across bun / npm / python / go / rust / etc. + # ═══════════════════════════════════════════════════════════════ + + - id: detect-project + depends_on: [explore] + bash: | + set -e + + PROJECT_TYPE="unknown" + INSTALL_CMD="" + VALIDATE_CMD="" + TYPECHECK_CMD="" + LINT_CMD="" + TEST_CMD="" + FORMAT_CMD="" + + # -------- Bun / TypeScript -------- + if [ -f "bun.lock" ] || [ -f "bun.lockb" ]; then + PROJECT_TYPE="bun" + INSTALL_CMD="bun install --frozen-lockfile" + grep -q '"validate"' package.json 2>/dev/null && VALIDATE_CMD="bun run validate" + grep -q '"type-check"' package.json 2>/dev/null && TYPECHECK_CMD="bun run type-check" + grep -q '"lint"' package.json 2>/dev/null && LINT_CMD="bun run lint" + grep -q '"test"' package.json 2>/dev/null && TEST_CMD="bun run test" + grep -q '"format:check"' package.json 2>/dev/null && FORMAT_CMD="bun run format:check" + + # -------- Python (via repo signals + source-repo venv lookup) -------- + # Triggers on any Python signal (pyproject, requirements, setup.py, or tests/). + # In a git worktree, .venv is NOT copied from the source repo (git worktree add + # only copies tracked files), so we look up the source repo via git's + # --git-common-dir and reference its .venv/ directly from the worktree. + elif [ -f "pyproject.toml" ] || [ -f "requirements.txt" ] || \ + [ -f "setup.py" ] || compgen -G "tests/test_*.py" > /dev/null 2>&1; then + PROJECT_TYPE="python" + + # Resolve source repo root (the main checkout, not the worktree). + # git rev-parse --git-common-dir returns the main .git dir (even from a worktree). + SOURCE_REPO="" + GIT_COMMON="$(git rev-parse --git-common-dir 2>/dev/null)" + if [ -n "$GIT_COMMON" ]; then + case "$GIT_COMMON" in + /*) ABS_COMMON="$GIT_COMMON" ;; + *) ABS_COMMON="$(pwd)/$GIT_COMMON" ;; + esac + SOURCE_REPO="$(dirname "$ABS_COMMON")" + fi + + # Prefer worktree-local .venv if present (unusual), else source repo .venv. + VENV_BIN="" + if [ -d ".venv" ] && [ -x ".venv/bin/python" ]; then + VENV_BIN=".venv/bin" + elif [ -n "$SOURCE_REPO" ] && [ -x "$SOURCE_REPO/.venv/bin/python" ]; then + VENV_BIN="$SOURCE_REPO/.venv/bin" + fi + + # Detect installed tools via the resolved VENV_BIN (most reliable). + if [ -n "$VENV_BIN" ]; then + [ -x "$VENV_BIN/pytest" ] && TEST_CMD="$VENV_BIN/pytest tests/" + [ -x "$VENV_BIN/ruff" ] && { LINT_CMD="$VENV_BIN/ruff check ."; FORMAT_CMD="$VENV_BIN/ruff format --check ."; } + [ -x "$VENV_BIN/mypy" ] && TYPECHECK_CMD="$VENV_BIN/mypy ." + [ -x "$VENV_BIN/pyright" ] && TYPECHECK_CMD="${TYPECHECK_CMD:+$TYPECHECK_CMD && }$VENV_BIN/pyright" + fi + + # Install command based on project metadata. + if [ -f "pyproject.toml" ]; then + if [ -f "uv.lock" ]; then + INSTALL_CMD="uv sync" + elif [ -f "poetry.lock" ]; then + INSTALL_CMD="poetry install" + elif [ -n "$VENV_BIN" ]; then + INSTALL_CMD="$VENV_BIN/python -m pip install -e ." + fi + elif [ -f "requirements.txt" ] && [ -n "$VENV_BIN" ]; then + INSTALL_CMD="$VENV_BIN/python -m pip install -r requirements.txt" + fi + + # -------- Node (npm/yarn/pnpm) -------- + elif [ -f "package.json" ]; then + PROJECT_TYPE="node" + if [ -f "pnpm-lock.yaml" ]; then + RUN="pnpm" + INSTALL_CMD="pnpm install --frozen-lockfile" + elif [ -f "yarn.lock" ]; then + RUN="yarn" + INSTALL_CMD="yarn install --frozen-lockfile" + else + RUN="npm run" + INSTALL_CMD="npm ci" + fi + grep -q '"validate"' package.json 2>/dev/null && VALIDATE_CMD="$RUN validate" + grep -q '"type-check"' package.json 2>/dev/null && TYPECHECK_CMD="$RUN type-check" + grep -q '"lint"' package.json 2>/dev/null && LINT_CMD="$RUN lint" + grep -q '"test"' package.json 2>/dev/null && TEST_CMD="$RUN test" + grep -q '"format:check"' package.json 2>/dev/null && FORMAT_CMD="$RUN format:check" + + # -------- Go -------- + elif [ -f "go.mod" ]; then + PROJECT_TYPE="go" + INSTALL_CMD="go mod download" + TEST_CMD="go test ./..." + TYPECHECK_CMD="go vet ./..." + command -v golangci-lint >/dev/null 2>&1 && LINT_CMD="golangci-lint run" + FORMAT_CMD="gofmt -l ." + + # -------- Rust -------- + elif [ -f "Cargo.toml" ]; then + PROJECT_TYPE="rust" + INSTALL_CMD="cargo fetch" + TEST_CMD="cargo test" + TYPECHECK_CMD="cargo check" + LINT_CMD="cargo clippy -- -D warnings" + FORMAT_CMD="cargo fmt -- --check" + + # -------- Makefile fallback -------- + elif [ -f "Makefile" ]; then + PROJECT_TYPE="makefile" + grep -q "^test:" Makefile 2>/dev/null && TEST_CMD="make test" + grep -q "^lint:" Makefile 2>/dev/null && LINT_CMD="make lint" + grep -q "^check:" Makefile 2>/dev/null && VALIDATE_CMD="make check" + fi + + # Compose VALIDATE_CMD from available parts if not already set + if [ -z "$VALIDATE_CMD" ]; then + PARTS="" + [ -n "$TYPECHECK_CMD" ] && PARTS="${PARTS:+$PARTS && }$TYPECHECK_CMD" + [ -n "$LINT_CMD" ] && PARTS="${PARTS:+$PARTS && }$LINT_CMD" + [ -n "$TEST_CMD" ] && PARTS="${PARTS:+$PARTS && }$TEST_CMD" + [ -n "$FORMAT_CMD" ] && PARTS="${PARTS:+$PARTS && }$FORMAT_CMD" + VALIDATE_CMD="$PARTS" + fi + + echo "=== PROJECT DETECTION ===" + echo "PROJECT_TYPE=$PROJECT_TYPE" + echo "INSTALL_CMD=${INSTALL_CMD:-}" + echo "VALIDATE_CMD=${VALIDATE_CMD:-}" + echo "TYPECHECK_CMD=${TYPECHECK_CMD:-}" + echo "LINT_CMD=${LINT_CMD:-}" + echo "TEST_CMD=${TEST_CMD:-}" + echo "FORMAT_CMD=${FORMAT_CMD:-}" + echo "=== END DETECTION ===" + + if [ -z "$VALIDATE_CMD" ] && [ -z "$TEST_CMD" ] && [ -z "$TYPECHECK_CMD" ] && [ -z "$LINT_CMD" ]; then + echo "" + echo "NOTE: No automated validators detected in this project." + echo "The implement loop will proceed without automated validation gates." + echo "Human review is required before merging any changes." + fi + + # ═══════════════════════════════════════════════════════════════ + # PHASE 2: PLAN — Create the structured implementation plan + # ═══════════════════════════════════════════════════════════════ + + - id: create-plan + depends_on: [detect-project] + context: fresh + prompt: | + # PIV Loop — Create Structured Plan + + You are creating a structured implementation plan from a completed exploration phase. + This plan will be the SOLE GUIDE for the implementation agent — it must be complete, + specific, and actionable. + + **Original request**: $ARGUMENTS + **Final exploration summary**: $explore.output + + **Project detection output** (used ONLY to fill in the informational Validation Commands section of the plan): + $detect-project.output + + Parse the `PROJECT_TYPE=...`, `VALIDATE_CMD=...`, `TYPECHECK_CMD=...`, `LINT_CMD=...`, + `TEST_CMD=...`, `FORMAT_CMD=...` lines from the detection output above. These are + **informational** — copy them verbatim into the plan's `## Validation Commands` + section so the plan documents what detect-project found. + + **CRITICAL — DO NOT create a plan task whose purpose is running repo-wide + validators.** The implement loop uses each task's own per-task `**Validate:**` + field (task-scoped). Repo-wide validators (`TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, + `FORMAT_CMD`, `VALIDATE_CMD`) are executed once by the `code-review` node after all + implement tasks complete — never as part of the implement loop's Task List. + + Each `### Task N:` entry in the Task List must be EITHER: + - One CREATE or UPDATE change with a scoped `**Validate:**` field (a diff + readback, a targeted test command, or a file existence check), OR + - One verification-only task (like a pre-flight file-absence check) with a + `**Validate:**` field that reads current state without making changes. + + **Never** a task whose `**Details:**` or `**Validate:**` field runs the repo-wide + validator suite. That pattern creates a deadlock on any repo with pre-existing + validator failures. Pre-existing repo-wide issues are the `code-review` node's + concern, not the implement loop's. + + --- + + ## Step 1: Read the Codebase (Again) + + Before writing the plan, verify your understanding is current: + + 1. **Read CLAUDE.md** — capture all relevant conventions + 2. **Read every file you plan to change** — note exact current state + 3. **Read example test files** — understand testing patterns + 4. **Check for any recent changes** — `git log --oneline -10` + + ## Step 2: Determine Plan Location + + Generate a kebab-case slug from the feature name. + Save to `.claude/archon/plans/{slug}.plan.md`. + + ```bash + mkdir -p .claude/archon/plans + ``` + + ## Step 3: Write the Plan + + Use this template. Fill EVERY section with specific, verified information. + + ```markdown + # Feature: {Title} + + ## Summary + {1-2 sentences: what changes and why} + + ## Mission + {The core goal in one clear statement} + + ## Success Criteria + - [ ] {Specific, testable criterion for this task} + - [ ] No regressions introduced in files this task touches + - [ ] Code-review (which runs automatically after all implement tasks) surfaces + no NEW validator failures attributable to this change (pre-existing repo-wide + failures are out of scope for this task) + + ## Scope + ### In Scope + - {What we ARE building} + ### Out of Scope + - {What we are NOT building — and why} + + ## Codebase Context + ### Key Files + | File | Role | Action | + |------|------|--------| + | `{path}` | {what it does} | CREATE / UPDATE | + + ### Patterns to Follow + {Actual code snippets from the codebase to mirror} + + ## Architecture + - {Decision 1 — with rationale} + - {Decision 2 — with rationale} + + ## Task List + Execute in order. Each task is atomic and independently verifiable. + + ### Task 1: {ACTION} `{file path}` + **Action**: CREATE / UPDATE + **Details**: {Exact changes — specific enough for an agent with no context} + **Pattern**: Follow `{source file}:{lines}` + **Validate**: `{command to verify this task}` + + ## Testing Strategy + | Test File | Test Cases | Validates | + |-----------|-----------|-----------| + | `{path}` | {cases} | {what it validates} | + + ## Validation Commands + Fill these from the `detect-project` output above. If a command is empty, write + `(none available — human verification only)` instead of inventing one. + + 1. Type check: `{TYPECHECK_CMD from detect-project, or "none available"}` + 2. Lint: `{LINT_CMD, or "none available"}` + 3. Tests: `{TEST_CMD, or "none available"}` + 4. Format check: `{FORMAT_CMD, or "none available"}` + 5. Full validation: `{VALIDATE_CMD, or "none available"}` + 6. Project type: `{PROJECT_TYPE}` + + ## Risks + | Risk | Impact | Mitigation | + |------|--------|------------| + | {risk} | {HIGH/MED/LOW} | {specific mitigation} | + ``` + + ## Step 4: Verify the Plan + + 1. Check every file path referenced — verify they exist + 2. Check every pattern cited — verify the code matches + 3. Check task ordering — ensure dependencies are respected + 4. Check completeness — could an agent with NO context implement this? + + ## Step 5: Report + + ``` + ## Plan Created + + **File**: `.claude/archon/plans/{slug}.plan.md` + **Tasks**: {count} + **Files to change**: {count} + + Key decisions: + - {decision 1} + - {decision 2} + + Please review the plan and provide feedback. + ``` + + # ═══════════════════════════════════════════════════════════════ + # PHASE 2b: PLAN — Iterative plan refinement + # Review and revise the plan as many times as needed. + # ═══════════════════════════════════════════════════════════════ + + - id: refine-plan + depends_on: [create-plan] + loop: + prompt: | + # PIV Loop — Plan Refinement + + The user is reviewing the implementation plan and providing feedback. + + **User's feedback** (empty on first iteration): "$LOOP_USER_INPUT" + + --- + + ## Step 1: Find and Read the Plan + + ```bash + ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1 + ``` + + Read the entire plan file. Also read CLAUDE.md for conventions. + + ## Step 2: Process Feedback + + **If there is no user feedback yet** (first iteration, `$LOOP_USER_INPUT` is empty): + - Read the plan carefully + - Present a summary of the plan's key decisions and task list + - Ask the user to review and provide feedback + - Do NOT emit the completion signal on the first iteration + + **If the user EXPLICITLY approved** (said "approved", "looks good", "let's go", etc.): + - Do NOT open, read, or write the plan file. + - Do NOT run any tools. + - Output exactly this sentence: "Plan approved. Proceeding to implementation." + - Then emit `PLAN_APPROVED` on its own line. + + **If the user provided specific feedback:** + - Parse each piece of feedback + - Edit the plan file directly: + - Add/remove/modify tasks as requested + - Update success criteria if needed + - Adjust testing strategy if needed + - Re-verify file paths and patterns after changes + + **SIGNAL EMISSION CONTRACT** — emit `PLAN_APPROVED` only when + ALL of these are true: + 1. The user's LATEST message contains an explicit approval phrase: + "approved", "looks good", "ship it", "let's go", "proceed" + 2. The message does NOT contain questions, requested changes, or new feedback + 3. This is NOT the first iteration (empty `$LOOP_USER_INPUT`) + + If ANY condition is false, DO NOT emit the tag. When referencing this contract + in your output, write "the PLAN_APPROVED signal" (no angle brackets). + + ## Step 3: Show Changes + + ``` + ## Plan Revised + + Changes made: + - {change 1} + - {change 2} + + Updated stats: + - Tasks: {count} + - Files to change: {count} + + Review the updated plan and provide more feedback, or say "approved" to proceed. + ``` + until: PLAN_APPROVED + max_iterations: 10 + interactive: true + gate_message: | + Review the plan document. Provide specific feedback on what to change, + or say "approved" to begin implementation. + + # ═══════════════════════════════════════════════════════════════ + # PHASE 3: IMPLEMENT — Setup + # Read the plan, prepare the environment + # ═══════════════════════════════════════════════════════════════ + + - id: implement-setup + depends_on: [refine-plan, detect-project] + bash: | + set -e + + PLAN_FILE=$(ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1) + + if [ -z "$PLAN_FILE" ]; then + echo "ERROR: No plan file found in .claude/archon/plans/" + exit 1 + fi + + # Dependency install is portable — mirrors the detection node's logic. + # Failures here are non-fatal; the implement loop will surface them. + if [ -f "bun.lock" ] || [ -f "bun.lockb" ]; then + echo "Installing bun dependencies..." + bun install --frozen-lockfile 2>&1 | tail -3 || echo "NOTE: bun install failed; continuing" + elif [ -f "package-lock.json" ]; then + npm ci 2>&1 | tail -3 || echo "NOTE: npm ci failed; continuing" + elif [ -f "yarn.lock" ]; then + yarn install --frozen-lockfile 2>&1 | tail -3 || echo "NOTE: yarn install failed; continuing" + elif [ -f "pnpm-lock.yaml" ]; then + pnpm install --frozen-lockfile 2>&1 | tail -3 || echo "NOTE: pnpm install failed; continuing" + elif [ -f "uv.lock" ]; then + uv sync 2>&1 | tail -3 || echo "NOTE: uv sync failed; continuing" + elif [ -f "poetry.lock" ]; then + poetry install 2>&1 | tail -3 || echo "NOTE: poetry install failed; continuing" + elif [ -f "go.mod" ]; then + go mod download 2>&1 | tail -3 || echo "NOTE: go mod download failed; continuing" + elif [ -f "Cargo.toml" ]; then + cargo fetch 2>&1 | tail -3 || echo "NOTE: cargo fetch failed; continuing" + else + echo "NOTE: no recognized lockfile — skipping dependency install step" + fi + + echo "BRANCH=$(git branch --show-current)" + echo "GIT_ROOT=$(git rev-parse --show-toplevel)" + echo "PLAN_FILE=$PLAN_FILE" + + echo "=== PLAN_START ===" + cat "$PLAN_FILE" + echo "" + echo "=== PLAN_END ===" + + TASK_COUNT=$(grep -c "^### Task [0-9]" "$PLAN_FILE" || true) + echo "TASK_COUNT=${TASK_COUNT:-0}" + + # ═══════════════════════════════════════════════════════════════ + # PHASE 3b: IMPLEMENT — Task-by-Task Loop (Ralph pattern, hard stop per task) + # Fresh context each iteration. Reads plan from disk. + # EXACTLY ONE task per iteration. Validates before committing. + # ═══════════════════════════════════════════════════════════════ + + - id: implement + depends_on: [implement-setup] + idle_timeout: 600000 + loop: + prompt: | + # PIV Loop — Implementation Agent + + You are an autonomous coding agent in a FRESH session — no memory of previous iterations. + Your job: Read the plan from disk, implement EXACTLY ONE task, run the task's + own task-scoped verification, commit, update tracking, exit. **Repo-wide + validation is `code-review`'s responsibility — not yours.** You do not run + repo-wide validators at any phase of this loop. + + **Golden Rule 1**: If the task's OWN verification (Phase 3) fails, fix the + task-specific issue before committing. Never commit broken code. But NEVER + touch pre-existing repo-wide issues — those are out of your scope. + **Golden Rule 2**: One task per iteration. The loop engine starts a fresh iteration for the next task. + + --- + + ## Phase 0: CONTEXT — Load State + + The setup node produced this context: + + $implement-setup.output + + **Project detection** (repo-wide validators — INFORMATIONAL ONLY, do not run): + $detect-project.output + + **User's original request**: $USER_MESSAGE + + The `VALIDATE_CMD`, `TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, `FORMAT_CMD` values + from detect-project are **informational only**. The implement loop does **NOT** + run them at any phase. They run later in the `code-review` node. The implement + loop uses only the TASK-SCOPED `**Validate:**` field from each task in the plan. + + If you find yourself about to run `ruff check .`, `pytest tests/`, + `bun run validate`, or any similar repo-wide command — STOP. That is not your + job. Your job is the single task from the plan, with the task's own per-task + `**Validate:**` check. + + --- + + ### 0.1 Parse Plan File + + Extract the `PLAN_FILE=...` line from the context above. + + ### 0.2 Read Current State (from disk — not from context above) + + The context above is a snapshot from before the loop started. Previous iterations + may have changed things. **You MUST re-read from disk:** + + 1. **Read the plan file** — your implementation guide + 2. **Read progress tracking** — check if `$ARTIFACTS_DIR/progress.txt` exists + 3. **Read CLAUDE.md** — project conventions and constraints + + ### 0.3 Check Git State + + ```bash + git log --oneline -10 + git status + ``` + + Record the current HEAD hash — you will compare against this at the end of the iteration. + + --- + + ## Phase 1: SELECT EXACTLY ONE — Pick The Single Next Task + + From the plan file, identify tasks by `### Task N:` headers. + Cross-reference with commits from previous iterations and progress tracking. + + **STRICT ORDERING**: Always select the LOWEST-numbered incomplete task. Never + skip to a later task, even if it seems more actionable or if the earlier task + is "just a verification". Verification tasks are load-bearing — they are + numbered first precisely because they must run first and their evidence must + be captured before the state changes. + + If Task N is already completed (per `$ARTIFACTS_DIR/progress.txt`), move to + Task N+1. If Task N is incomplete, work on Task N — never Task N+1 or later. + + **If ALL tasks are complete** → Skip to Phase 5 (Completion). + + ### Announce Selection + + ``` + -- Task Selected ------------------------------------------------ + Task: {N} — {task title} + Action: {CREATE / UPDATE} + File: {file path} + ----------------------------------------------------------------- + ``` + + --- + + ## Phase 2: IMPLEMENT — Execute ONLY This Task + + 1. Read the file you're about to change (if it exists) + 2. Read the pattern file referenced in the plan + 3. Make changes following the plan EXACTLY — for Task {N} ONLY + + Do NOT read, edit, or prepare files for Task N+1 in this iteration. + + **DO NOT run repo-wide validators** (`TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, + `FORMAT_CMD`) during Phase 2. Those run once in `code-review` after the + implement loop finishes — not here. Task-scoped verification happens in Phase 3 + using the task's own `**Validate:**` field from the plan. + + --- + + ## Phase 3: VALIDATE — Verify the Task (task-scoped, NOT repo-wide) + + Run ONLY the task's own `**Validate:**` command from the plan file. This is a + TASK-SCOPED check (a diff readback, a targeted test command, a file existence + check). It is **NOT** the repo-wide validator suite. + + **DO NOT run `TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, or `FORMAT_CMD` in this + phase.** Those are repo-wide commands from the `detect-project` output. They + check whether the WHOLE REPO is healthy — a different question from "did my + single task land correctly". Pre-existing repo-wide failures are NOT this + task's responsibility; they are handled by the `code-review` node later, which + runs the repo-wide suite once (not per task) and may make fix commits for + issues it surfaces. + + If the task's `**Validate:**` command is missing or empty in the plan, fall + back to a minimal task-scoped check based on the task's Action: + - `CREATE` / `UPDATE`: `git diff --stat ` plus `cat ` + (confirm the diff shape and content match the intent) + - Verification-only task: no action — proceed directly to Phase 4 + + If the task's `**Validate:**` command fails: + 1. The failure is task-specific (not repo-wide). Fix the task-specific issue + and re-run, up to 3 attempts. + 2. If still unfixable, record the block in `$ARTIFACTS_DIR/progress.txt` and + do NOT commit. + + **Never fail the iteration because of pre-existing repo-wide issues that were + already broken before your task started.** Phase 4.5 HARD RULE 1 forbids you + from fixing those — they are legitimately out of your scope. The repo-wide + health check happens once, later, in the `code-review` node. + + --- + + ## Phase 4: COMMIT — Save Changes + + Before staging, verify the git diff contains ONLY files from the ONE task you just worked on: + + ```bash + git status --short + git diff --stat + ``` + + If you see files outside the scope of the current task (e.g. changes you made while + exploring or testing for Task N+1), revert those extra changes with + `git checkout -- ` BEFORE staging. Do NOT use `git add -A`. Stage explicitly + by file path: + + ```bash + git add # only the files from THIS task + git diff --cached --stat # confirm the staged set matches the task scope + git commit -m "$(cat <<'EOF' + {type}: {task description} + + PIV Task {N}: {brief details} + EOF + )" + ``` + + Track progress in `$ARTIFACTS_DIR/progress.txt` (outside the repo — zero + staging risk, pre-created by the executor, survives resume): + ``` + ## Task {N}: {title} — COMPLETED + Date: {ISO date} + Files: {list} + Commit: {short hash} + --- + ``` + + **DO NOT** stage or commit `$ARTIFACTS_DIR/progress.txt`. It lives outside + the repo entirely and is not a git-tracked file. It is local scratch state + for the loop only. + + --- + + ## Phase 4.5: STOP — Exit After One Task + + "One task" means literally ONE numbered `### Task N:` entry from the plan file. + Not "one logical change." Not "one feature unit." Not "everything that naturally + goes together." ONE numbered entry. + + **HARD RULES**: + 1. Count your commits in this iteration via `git log HEAD ^` (the + hash you recorded in Phase 0.3). If count > 1, you did too much — revert the + extras with `git reset --mixed ` and let the next iteration pick + up the rest. + 2. Verification tasks (like "VERIFY PRE-FLIGHT") count as numbered tasks and + need their own iteration, even if they produce no code change. + 3. If Task N is a verification-only task, your "commit" for that iteration is + writing to `$ARTIFACTS_DIR/progress.txt` and exiting — no git commit required. + 4. After committing (or recording verification), end the iteration with the + Phase 4.5 status block below. Do NOT read, edit, stage, or validate files + for any other task in this iteration. + + If tasks remain, end the iteration by reporting status in this exact format: + + ``` + Task {N} committed: {short hash} + Files touched: {list} + Remaining tasks: {count} + Next iteration will pick up Task {N+1}. + ``` + + Do NOT emit the completion tag here. Do NOT read or start any other task's files. + + --- + + ## Phase 5: COMPLETE — Check All Tasks + + **SIGNAL EMISSION CONTRACT** — emit `COMPLETE` only when + BOTH of these are true: + 1. Every `### Task N:` entry in the plan file has a matching "COMPLETED" + entry in `$ARTIFACTS_DIR/progress.txt` + 2. `git log` on the current branch shows a commit for each code-affecting + task (verification-only tasks have no commit; see Phase 4.5 Rule 3) + + **Phase 5 does NOT run repo-wide validation.** Repo-wide validators + (`TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, `FORMAT_CMD`, `VALIDATE_CMD`) are the + `code-review` node's responsibility — it runs them once after the implement + loop finishes. Pre-existing repo-wide failures are not this loop's problem. + The implement loop's job is "did I complete my assigned tasks?", not "is the + whole repo healthy?". + + If BOTH conditions hold: + 1. Push: `git push -u origin HEAD` + 2. Emit the completion tag on its own line. + + If either condition is false, tasks remain. Report status using the Phase 4.5 + format and end the iteration normally. The loop engine will start a fresh + iteration. When referencing this contract in your output, write "the COMPLETE + signal" (no angle brackets). + until: COMPLETE + max_iterations: 15 + fresh_context: true + + # ═══════════════════════════════════════════════════════════════ + # PHASE 4: VALIDATE — Automated code review + # Review all changes against the plan + # ═══════════════════════════════════════════════════════════════ + + - id: code-review + depends_on: [implement, detect-project] + context: fresh + prompt: | + # PIV Loop — Automated Code Review + + The implementation phase is complete. Review ALL changes against the plan. + + **Implementation output**: $implement.output + + **Project detection** (validation commands for this repo): + $detect-project.output + + Parse `VALIDATE_CMD`, `TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, `FORMAT_CMD` from + the detect-project output above. Use them in Step 4 below. + + --- + + ## Step 1: Find and Read the Plan + + ```bash + ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1 + ``` + + ## Step 2: Review All Changes + + ```bash + git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD + git diff $BASE_BRANCH..HEAD --stat + git diff $BASE_BRANCH..HEAD + ``` + + ## Step 3: Check Against Plan + + For EACH task: was it implemented correctly? Do success criteria hold? + For EACH file: check quality, security, patterns, CLAUDE.md compliance. + + ## Step 4: Run Validation + + Run `VALIDATE_CMD` from the detect-project output. If `VALIDATE_CMD` is empty, + run each non-empty command from `TYPECHECK_CMD`, `LINT_CMD`, `TEST_CMD`, + `FORMAT_CMD` individually. If all five are empty, note "No automated validators + available for this project; relying on code review + human verification" and + skip this step. + + ## Step 5: Fix Obvious Issues (SCOPED to files this branch changed) + + Get the list of files this branch touched: + ```bash + git diff --name-only $BASE_BRANCH..HEAD + ``` + + Fix ONLY issues that appear in those files. Do NOT fix issues elsewhere in the + repo — those are pre-existing and out of scope for this branch's review. They + go in the "Pre-existing repo findings" section of Step 6's report instead. + + To distinguish changed-file issues from pre-existing ones: + - Run validators scoped to the changed files where possible (e.g., + `$LINT_CMD ` if the tool supports per-file args, or run the + full validator and filter its output to only lines referencing changed files) + - For each issue in a changed file: fix it, stage only that file, add it to + a single scoped commit + + Stage explicitly by file path and commit: + ```bash + git add # NOT git add -A + git diff --cached --stat # confirm only changed-file fixes are staged + git commit -m "fix: address review findings in this branch's changes" 2>/dev/null || true + ``` + + If none of the changed files have issues, skip this step and proceed to Step 6. + + **Never fix pre-existing repo-wide issues in Step 5.** Document them in Step 6's + "Pre-existing repo findings" section instead. They are out of scope for this + branch — the user can address them in a separate PR if desired. + + ## Step 6: Present Review + + ``` + ## Code Review Complete + + ### Implementation Status + | Task | Status | Notes | + |------|--------|-------| + | {task} | DONE / PARTIAL / MISSING | {notes} | + + ### Validation Results (scoped to files changed by this branch) + - Type-check: PASS / FAIL / SKIPPED + - Lint: PASS / FAIL / SKIPPED + - Tests: PASS / FAIL / SKIPPED + - Format: PASS / FAIL / SKIPPED + + ### Code Quality Findings (in files changed by this branch) + {Issues found in files this branch touched, or "No issues found."} + + ### Pre-existing Repo Findings (OUT OF SCOPE — not fixed) + {List pre-existing issues in files NOT changed by this branch. These are + documented for user awareness but were NOT fixed as part of this review — + they are out of scope for the current branch. The user can address them in + a separate PR if desired. Example: "ruff check reported 12 failures in + .claude/scripts/*.py that pre-date this branch; not fixed."} + + ### Recommendation + {READY FOR REVIEW / NEEDS FIXES} + ``` + + # ═══════════════════════════════════════════════════════════════ + # PHASE 4b: VALIDATE — Iterative human feedback & fixes + # The user tests the implementation and provides feedback. + # Loops until the user approves. + # ═══════════════════════════════════════════════════════════════ + + - id: fix-feedback + depends_on: [code-review, detect-project] + loop: + prompt: | + # PIV Loop — Address Validation Feedback + + The human has reviewed the implementation and provided feedback. + + **Human's feedback** (empty on first iteration): "$LOOP_USER_INPUT" + + **Project detection** (validation commands for this repo): + $detect-project.output + + Parse `VALIDATE_CMD` etc. from the detect-project output for Step 3 below. + + --- + + ## Step 1: Read Context + + ```bash + ls -t .claude/archon/plans/*.plan.md 2>/dev/null | head -1 + ``` + + Read the plan file and CLAUDE.md for conventions. + + ## Step 2: Process Feedback + + **If there is no user feedback yet** (first iteration, `$LOOP_USER_INPUT` is empty): + - Present the code review results and ask the user to test the implementation + - Do NOT emit the completion signal on the first iteration + + **If the user EXPLICITLY approved** (said "approved", "looks good", "ship it", etc.): + - Output: "Implementation approved!" + - Then emit `VALIDATED` on its own line. + + **SIGNAL EMISSION CONTRACT** — emit `VALIDATED` only when + ALL of these are true: + 1. The user's LATEST message contains an explicit approval phrase: + "approved", "looks good", "ship it", "let's go", "proceed" + 2. The message does NOT contain new feedback or requested fixes + 3. This is NOT the first iteration (empty `$LOOP_USER_INPUT`) + + If ANY condition is false, DO NOT emit the tag. When referencing this contract + in your output, write "the VALIDATED signal" (no angle brackets). + + **If the user provided specific feedback:** + 1. Read the relevant files + 2. Understand each issue + 3. Make the fixes + 4. Type-check after each change + + ## Step 3: Validation (conditional — only when fixes were made) + + **If this iteration is an approval iteration** (Step 2 detected an explicit + approval and you are emitting the VALIDATED signal): SKIP this step entirely. + Do not run validators. Do not make commits. Proceed directly to Step 5 for + the final report and signal emission. + + **If this iteration made fixes in response to user feedback**: run validators + SCOPED to the files you fixed, not the repo-wide suite. Example: if you fixed + `foo.py`, run `$LINT_CMD foo.py` and `$TEST_CMD tests/test_foo.py` if those + tools accept per-file arguments. Do NOT run `ruff check .` or equivalent + repo-wide commands — the goal is "did my fixes work?", not "is the whole repo + healthy?". + + If scoped validation fails, attempt one more fix pass on the same file(s). + If still failing, report to the user in Step 5 and do NOT emit VALIDATED. + Pre-existing repo-wide failures unrelated to your fixes are out of scope — + never block VALIDATED on them. + + ## Step 4: Commit Fixes + + ```bash + git add -A + git commit -m "$(cat <<'EOF' + fix: address review feedback + + Changes: + - {fix 1} + - {fix 2} + EOF + )" + ``` + + ## Step 5: Report + + ``` + ## Feedback Addressed + + Changes made: + - {fix 1} + - {fix 2} + + Validation: {PASS / FAIL with details} + + Review again, or say "approved" to finalize. + ``` + until: VALIDATED + max_iterations: 10 + interactive: true + gate_message: | + Test the implementation yourself and review the code changes. + Provide specific feedback on what needs fixing, or say "approved" to finalize. + + # ═══════════════════════════════════════════════════════════════ + # PHASE 5: FINALIZE — Push, create PR, generate summary + # ═══════════════════════════════════════════════════════════════ + + - id: finalize + depends_on: [fix-feedback, implement-setup] + context: fresh + prompt: | + # PIV Loop — Finalize + + The implementation has been approved. Push changes and create a PR. + + **Plan file** (the exact one `create-plan` wrote and `implement` worked on — + use THIS as the source of truth for PR title and body. Do NOT search the repo + for other plan files): + + Parse the `PLAN_FILE=...` line from the implement-setup context below. If for + any reason that line is missing, re-derive with: + `ls -t .claude/archon/plans/*.plan.md | head -1` + + Do NOT read plan files under `docs/plans/` or any other location — those + belong to other features and will pollute the PR title/body. + + **Implementation setup context** (contains `PLAN_FILE=...`): + $implement-setup.output + + --- + + ## Step 1: Push Changes + + ```bash + git push -u origin HEAD 2>&1 || true + ``` + + ## Step 2: Generate Summary + + ```bash + git log --oneline --no-merges $(git merge-base HEAD $BASE_BRANCH)..HEAD + git diff --stat $(git merge-base HEAD $BASE_BRANCH)..HEAD + ``` + + Read the plan file from the `PLAN_FILE` path you parsed above (NOT any plan in + `docs/plans/`). The feature name for the PR title comes from the + `# Feature: {Title}` heading of THIS plan file only. + + Read progress tracking from `$ARTIFACTS_DIR/progress.txt` for task completion + context. + + ## Step 3: Create PR (if not already created) + + ```bash + gh pr view HEAD --json url 2>/dev/null || echo "NO_PR" + ``` + + If no PR exists: + + ```bash + cat .github/pull_request_template.md 2>/dev/null || echo "NO_TEMPLATE" + ``` + + Create with `gh pr create --draft --base $BASE_BRANCH`: + - Title from the plan's feature name + - Body summarizing the implementation + - Use a HEREDOC for the body + + ## Step 4: Output Summary + + ``` + =============================================================== + PIV LOOP — COMPLETE + =============================================================== + + Feature: {from plan} + Plan: {plan file path} + Branch: {branch name} + PR: {url} + + -- Tasks Completed ----------------------------------------------- + {list from progress tracking} + + -- Commits ------------------------------------------------------- + {git log output} + + -- Files Changed ------------------------------------------------- + {git diff --stat output} + + -- Validation ---------------------------------------------------- + All checks passed. + =============================================================== + ``` diff --git a/.gitignore b/.gitignore index a2f33c5d5c..af640ebf95 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,7 @@ e2e-screenshots/ # Archon logs and artifacts (generated at runtime) .archon/logs/ .archon/artifacts/ +artifacts/ # Agent artifacts (generated, local only) .agents/ From 7f90ab003feb6a23bf0bea7214b6025e7a64a6ee Mon Sep 17 00:00:00 2001 From: matzls Date: Sun, 12 Apr 2026 10:23:50 +0200 Subject: [PATCH 06/21] fix(orchestrator): persist slash-prefixed telegram prompts Reuse the deterministic slash-command allowlist for Telegram user-message persistence so slash-prefixed AI prompts are stored while ephemeral commands still skip persistence. Add a regression test covering /etc/hosts and stabilize the command-parser mock in the Telegram persistence test block. Co-authored-by: Codex --- .../orchestrator/orchestrator-agent.test.ts | 43 ++++++++++++++++++- .../src/orchestrator/orchestrator-agent.ts | 39 ++++++++++------- 2 files changed, 66 insertions(+), 16 deletions(-) diff --git a/packages/core/src/orchestrator/orchestrator-agent.test.ts b/packages/core/src/orchestrator/orchestrator-agent.test.ts index 1ea40c129c..a89ae47da8 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.test.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.test.ts @@ -1450,6 +1450,7 @@ describe('telegram user-message persistence', () => { mockGetCodebase.mockReset(); mockListCodebases.mockReset(); mockDiscoverWorkflowsWithConfig.mockReset(); + mockHandleCommand.mockReset(); mockParseCommand.mockReset(); mockGetOrCreateConversation.mockImplementation(() => Promise.resolve(null)); @@ -1458,6 +1459,28 @@ describe('telegram user-message persistence', () => { mockDiscoverWorkflowsWithConfig.mockImplementation(() => Promise.resolve({ workflows: [], errors: [] }) ); + mockHandleCommand.mockImplementation(() => + Promise.resolve({ success: true, message: 'ok', workflow: undefined }) + ); + mockParseCommand.mockImplementation((text: string) => { + const matches = text.match(/"[^"]+"|'[^']+'|\S+/g) ?? []; + if (matches.length === 0 || !matches[0] || !matches[0].startsWith('/')) { + return { command: '', args: [] }; + } + + return { + command: matches[0].slice(1), + args: matches.slice(1).map(arg => { + if ( + (arg.startsWith('"') && arg.endsWith('"')) || + (arg.startsWith("'") && arg.endsWith("'")) + ) { + return arg.slice(1, -1); + } + return arg; + }), + }; + }); }); test('natural-language telegram message is persisted as user turn', async () => { @@ -1489,7 +1512,6 @@ describe('telegram user-message persistence', () => { title: null, }); mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(conversation)); - mockParseCommand.mockReturnValueOnce({ command: 'help', args: [] }); mockHandleCommand.mockReturnValueOnce( Promise.resolve({ success: true, message: 'help text', workflow: undefined }) ); @@ -1504,6 +1526,25 @@ describe('telegram user-message persistence', () => { expect(assistantCalls).toHaveLength(0); }); + test('slash-prefixed AI prompt (/etc/hosts) is persisted as user turn', async () => { + const conversation = makeConversation({ + id: 'telegram-conv-db-id', + platform_type: 'telegram', + platform_conversation_id: '8579582275', + title: null, + }); + mockGetOrCreateConversation.mockReturnValueOnce(Promise.resolve(conversation)); + const platform = makeTelegramPlatform(); + await handleMessage(platform, '8579582275', '/etc/hosts'); + + const userCalls = mockAddMessage.mock.calls.filter(c => c[1] === 'user'); + expect(userCalls).toHaveLength(1); + expect(userCalls[0]?.[0]).toBe('telegram-conv-db-id'); + expect(userCalls[0]?.[2]).toBe('/etc/hosts'); + expect(userCalls[0]?.[3]).toEqual({ platformType: 'telegram' }); + expect(mockHandleCommand).not.toHaveBeenCalled(); + }); + test('web platform does not trigger centralized persistence path', async () => { const conversation = makeConversation({ id: 'web-conv-db-id', diff --git a/packages/core/src/orchestrator/orchestrator-agent.ts b/packages/core/src/orchestrator/orchestrator-agent.ts index 07f5678c18..5945e13b60 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.ts @@ -62,6 +62,18 @@ function getLog(): ReturnType { const MAX_BATCH_ASSISTANT_CHUNKS = 20; /** Max total chunks (assistant + tool) to keep in batch mode */ const MAX_BATCH_TOTAL_CHUNKS = 200; +const DETERMINISTIC_COMMANDS = new Set([ + 'help', + 'status', + 'reset', + 'workflow', + 'register-project', + 'update-project', + 'remove-project', + 'commands', + 'init', + 'worktree', +]); // ─── Types ────────────────────────────────────────────────────────────────── @@ -99,6 +111,15 @@ function findCodebaseByName( }); } +function isDeterministicCommandMessage(message: string): boolean { + if (!message.startsWith('/')) { + return false; + } + + const { command } = commandHandler.parseCommand(message); + return DETERMINISTIC_COMMANDS.has(command); +} + /** * Parse orchestrator commands from AI response text. * Scans for /invoke-workflow and /register-project patterns. @@ -541,7 +562,7 @@ export async function handleMessage( // this by design — they're ephemeral utility chatter, not conversation // content. Gated before the approval-routing block on purpose so that // natural-language approval responses ARE captured. - if (platform.getPlatformType() === 'telegram' && !message.startsWith('/')) { + if (platform.getPlatformType() === 'telegram' && !isDeterministicCommandMessage(message)) { try { await messageDb.addMessage(conversation.id, 'user', message, { platformType: 'telegram', @@ -673,20 +694,8 @@ export async function handleMessage( // 2. Check for deterministic commands if (message.startsWith('/')) { const { command } = commandHandler.parseCommand(message); - const deterministicCommands = [ - 'help', - 'status', - 'reset', - 'workflow', - 'register-project', - 'update-project', - 'remove-project', - 'commands', - 'init', - 'worktree', - ]; - - if (deterministicCommands.includes(command)) { + + if (DETERMINISTIC_COMMANDS.has(command)) { if (command === 'register-project') { getLog().debug({ command, conversationId }, 'deterministic_command'); const result = await handleRegisterProject(message, platform, conversationId); From 03880b727a257071d7afcb0574481b736e9df03a Mon Sep 17 00:00:00 2001 From: matzls Date: Sun, 12 Apr 2026 11:37:48 +0200 Subject: [PATCH 07/21] feat(workflows): add archon-assist-codex assist lane Context: preserve and land the Codex-specific assist workflow onto current dev while keeping the newer telegram persistence behavior already present on dev. Change: - add the bundled archon-assist-codex command and workflow defaults plus the tracked Archon skill files they depend on - default continue and orchestrator assist routing to archon-assist-codex when the assistant type is codex - extend server, web, docs, and core test coverage for the new workflow and the assistant-aware prompt-builder signatures Validation: - bun test packages/cli/src/commands/continue.test.ts - bun test packages/core/src/orchestrator/prompt-builder.test.ts - bun test packages/core/src/orchestrator/orchestrator.test.ts - bun test packages/server/src/routes/api.health.test.ts - bun test packages/server/src/routes/api.workflows.test.ts - bun test packages/web/src/lib/workflow-metadata.test.ts - bun test packages/workflows/src/defaults/bundled-defaults.test.ts - bun --filter @archon/cli type-check - bun --filter @archon/core type-check - bun --filter @archon/server type-check - bun --filter @archon/workflows type-check - bun --filter @archon/web type-check - bun run validate Codex-Session: 019d80c8-3cb7-79b1-8443-d09a42cb5020 Codex-Rollout: sessions/2026/04/12/rollout-2026-04-12T10-21-39-019d80c8-3cb7-79b1-8443-d09a42cb5020.jsonl Co-authored-by: Codex --- .agents/skills/archon/SKILL.md | 104 +++++++++++++++ .agents/skills/archon/agents/openai.yaml | 8 ++ .../commands/defaults/archon-assist-codex.md | 54 ++++++++ .../defaults/archon-assist-codex.yaml | 15 +++ .gitignore | 4 +- packages/cli/package.json | 2 +- packages/cli/src/cli.ts | 2 +- packages/cli/src/commands/continue.test.ts | 124 ++++++++++++++++++ packages/cli/src/commands/continue.ts | 28 +++- .../src/orchestrator/orchestrator-agent.ts | 9 +- .../src/orchestrator/orchestrator.test.ts | 9 +- .../src/orchestrator/prompt-builder.test.ts | 20 ++- .../core/src/orchestrator/prompt-builder.ts | 27 ++-- .../content/docs/book/essential-workflows.md | 15 ++- .../content/docs/book/first-five-minutes.md | 3 +- .../src/content/docs/book/quick-reference.md | 4 +- .../content/docs/getting-started/overview.md | 6 +- .../docs-web/src/content/docs/guides/index.md | 1 + .../src/content/docs/reference/cli.md | 4 + packages/server/src/routes/api.health.test.ts | 14 +- .../server/src/routes/api.workflows.test.ts | 39 +++++- .../web/src/lib/workflow-metadata.test.ts | 5 + .../src/defaults/bundled-defaults.test.ts | 6 +- .../src/defaults/bundled-defaults.ts | 8 +- 24 files changed, 478 insertions(+), 33 deletions(-) create mode 100644 .agents/skills/archon/SKILL.md create mode 100644 .agents/skills/archon/agents/openai.yaml create mode 100644 .archon/commands/defaults/archon-assist-codex.md create mode 100644 .archon/workflows/defaults/archon-assist-codex.yaml create mode 100644 packages/cli/src/commands/continue.test.ts diff --git a/.agents/skills/archon/SKILL.md b/.agents/skills/archon/SKILL.md new file mode 100644 index 0000000000..016002ad51 --- /dev/null +++ b/.agents/skills/archon/SKILL.md @@ -0,0 +1,104 @@ +--- +name: archon +description: | + Use when the user wants Codex to run or monitor Archon workflows, or when a task + should be delegated from Codex into an Archon workflow instead of being handled + directly in the current session. + Triggers: "use archon", "run archon", "archon workflow", "archon assist", + "codex archon assist", "have archon handle this", "use archon codex". + Also use when the user wants help choosing the Codex-safe Archon workflow for a task. + NOT for: Direct local implementation when the user wants Codex to do the work here + without handing off to Archon. +--- + +# Archon For Codex + +Archon runs long-form workflows through its own CLI and workflow engine. In Codex, +this skill exists to route work into the right Archon workflow and to avoid +Claude-specific workflow names or assumptions. + +## First Step + +Check the available workflows before suggesting or running one: + +```bash +archon workflow list --json +``` + +If `archon` is unavailable, report that the Archon CLI is not installed or not on +`PATH`. Do not perform setup unless the user explicitly asks. + +## Codex Naming Convention + +Prefer Archon workflows ending in `-codex` when they exist. That suffix indicates +the workflow has been tuned or separated for Codex behavior. + +Known Codex-specific lanes in this repo: + +- `archon-assist-codex` for general Archon help, debugging, exploration, and + one-off questions +- `archon-piv-loop-codex` for guided Plan-Implement-Validate workflows with + Codex + +If the user asks for a general Archon task and a Codex-specific workflow exists, +prefer that workflow over the Claude/default variant. + +If the user explicitly names a Claude-tuned workflow, respect that request but +warn when the workflow includes Claude-only features that Codex ignores. + +## Codex Limitations In Archon + +Archon already warns when a Codex workflow node contains Claude-only features. +Plan around those limits instead of assuming they work: + +- node-level `skills` +- node-level `hooks` +- node-level `mcp` +- node-level `allowed_tools` +- node-level `denied_tools` + +When a workflow relies on those features, prefer a `-codex` workflow if one +exists. Otherwise tell the user the workflow may run with degraded behavior on +Codex. + +## Running Workflows + +Use explicit workflow names whenever possible. + +General Codex assist: + +```bash +archon workflow run archon-assist-codex --branch "" +``` + +Guided Codex PIV: + +```bash +archon workflow run archon-piv-loop-codex --branch "" +``` + +Rules: + +1. Use `--branch` unless the user explicitly wants `--no-worktree`. +2. Use descriptive branch names, for example `assist/codex-readme` or + `piv/codex-auth-refactor`. +3. For read-only questions or exploration, `--no-worktree` is acceptable. +4. Prefer one Archon workflow per command rather than combining unrelated tasks. + +## Monitoring + +Use: + +```bash +archon workflow status --json +``` + +When an interactive workflow pauses, relay the workflow's question clearly and +pass the user's answer back through the Archon approval or reject command rather +than trying to continue locally. + +## Repo Guidance + +Do not assume Codex auto-loaded `CLAUDE.md` even if a fallback filename is +configured globally. If repo conventions are load-bearing for the delegated task, +read `CLAUDE.md` explicitly before recommending or running the workflow. diff --git a/.agents/skills/archon/agents/openai.yaml b/.agents/skills/archon/agents/openai.yaml new file mode 100644 index 0000000000..aef294ba55 --- /dev/null +++ b/.agents/skills/archon/agents/openai.yaml @@ -0,0 +1,8 @@ +interface: + display_name: "Archon" + short_description: "Run Codex-safe Archon workflows from this repo." + default_prompt: "Use Archon to " + +policy: + allow_implicit_invocation: true + diff --git a/.archon/commands/defaults/archon-assist-codex.md b/.archon/commands/defaults/archon-assist-codex.md new file mode 100644 index 0000000000..82bf363d3c --- /dev/null +++ b/.archon/commands/defaults/archon-assist-codex.md @@ -0,0 +1,54 @@ +--- +description: General Codex assistance - questions, debugging, one-off tasks, exploration +argument-hint: +--- + +# Codex Assist Mode + +**Request**: $ARGUMENTS + +--- + +You are helping with a request that did not match a more specific Codex-safe +workflow. + +## Instructions + +1. **Understand the request** - Identify whether this is a question, debugging + task, repo exploration, a one-off change, or a CI/problem investigation. +2. **Ground yourself in the repo** - Search the codebase, read the relevant + files, and understand the current implementation before acting. +3. **Read repo guidance explicitly when needed** + - Read `AGENTS.md` if it exists. + - Read `CLAUDE.md` if it exists and the task depends on repo conventions, + architecture guidance, or workflow rules stored there. + - Do not assume `CLAUDE.md` was automatically loaded by Codex. +4. **Use Codex capabilities directly** - Read and edit files, run commands, + inspect git state, and validate relevant changes. +5. **Call out routing gaps** - If this should have been a narrower Codex + workflow, mention: + "Note: Using archon-assist-codex. Consider creating or using a more specific + Codex workflow for this use case." + +## Guardrails + +- Prefer small, reversible changes. +- Use project-defined validation commands when relevant. +- Report validation failures honestly. +- Do not rely on Claude-only workflow-node features such as `skills`, `hooks`, + `mcp`, `allowed_tools`, or `denied_tools`. +- If the user explicitly wants the Claude-oriented assist lane instead, say so + and route them to `archon-assist`. + +## Capabilities + +You have full Codex capabilities as configured by Archon: +- Read and write files +- Run commands +- Search the codebase +- Make code changes +- Answer questions + +## Request + +$ARGUMENTS diff --git a/.archon/workflows/defaults/archon-assist-codex.yaml b/.archon/workflows/defaults/archon-assist-codex.yaml new file mode 100644 index 0000000000..e8d3ef59f8 --- /dev/null +++ b/.archon/workflows/defaults/archon-assist-codex.yaml @@ -0,0 +1,15 @@ +name: archon-assist-codex +description: | + Use when: No other Codex-safe workflow matches the request, or the user wants general help through Codex. + Triggers: "codex assist", "assist codex", "archon assist codex", "codex archon", + "use archon codex", "general help codex", "codex workflow help". + Handles: Questions, debugging, exploration, one-off tasks, explanations, CI failures, general help. + Capability: Full Codex agent session with file, shell, git, and network access as configured by Archon. + NOT for: Claude-tuned assist mode (use archon-assist) or guided Codex PIV work (use archon-piv-loop-codex). + Note: Will inform user when Codex assist mode is used for tracking. + +provider: codex + +nodes: + - id: assist + command: archon-assist-codex diff --git a/.gitignore b/.gitignore index af640ebf95..2f314ac2f3 100644 --- a/.gitignore +++ b/.gitignore @@ -47,7 +47,9 @@ e2e-screenshots/ artifacts/ # Agent artifacts (generated, local only) -.agents/ +.agents/* +!.agents/skills/ +!.agents/skills/** .agents/rca-reports/ .agents/plans/ .agents/pr-reviews diff --git a/packages/cli/package.json b/packages/cli/package.json index 9804ad7319..01f037c327 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -8,7 +8,7 @@ }, "scripts": { "cli": "bun src/cli.ts", - "test": "bun test src/commands/version.test.ts src/commands/setup.test.ts && bun test src/commands/workflow.test.ts && bun test src/commands/isolation.test.ts && bun test src/commands/chat.test.ts && bun test src/commands/serve.test.ts", + "test": "bun test src/commands/version.test.ts src/commands/setup.test.ts && bun test src/commands/workflow.test.ts && bun test src/commands/continue.test.ts && bun test src/commands/isolation.test.ts && bun test src/commands/chat.test.ts && bun test src/commands/serve.test.ts", "type-check": "bun x tsc --noEmit" }, "dependencies": { diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 96c0209666..f03854126f 100755 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -120,7 +120,7 @@ Options: --quiet, -q Reduce log verbosity to warnings and errors only --verbose, -v Show debug-level output --json Output machine-readable JSON (for workflow list) - --workflow Workflow to run for 'continue' (default: archon-assist) + --workflow Workflow to run for 'continue' (default: archon-assist or archon-assist-codex by assistant) --no-context Skip context injection for 'continue' --allow-env-keys Grant env-key consent during auto-registration (bypasses the env-leak gate for this codebase; diff --git a/packages/cli/src/commands/continue.test.ts b/packages/cli/src/commands/continue.test.ts new file mode 100644 index 0000000000..3da05480b9 --- /dev/null +++ b/packages/cli/src/commands/continue.test.ts @@ -0,0 +1,124 @@ +import { describe, test, expect, mock, beforeEach, spyOn } from 'bun:test'; +import { continueCommand } from './continue'; + +const mockWorkflowRunCommand = mock(async () => {}); +const mockFindActiveByBranchName = mock(async () => ({ + codebase_id: 'cb-1', + working_path: '/tmp/project', + branch_name: 'feature/test', +})); +const mockGetCodebase = mock(async () => ({ + id: 'cb-1', + ai_assistant_type: 'claude', +})); +const mockLoadConfig = mock(async () => ({ assistant: 'claude' })); +const mockFindLatestRunByWorkingPath = mock(async () => null); + +mock.module('./workflow', () => ({ + workflowRunCommand: mockWorkflowRunCommand, +})); + +mock.module('@archon/core/db/isolation-environments', () => ({ + findActiveByBranchName: mockFindActiveByBranchName, +})); + +mock.module('@archon/core/db/codebases', () => ({ + getCodebase: mockGetCodebase, +})); + +mock.module('@archon/core/db/workflows', () => ({ + findLatestRunByWorkingPath: mockFindLatestRunByWorkingPath, +})); + +mock.module('@archon/core', () => ({ + loadConfig: mockLoadConfig, +})); + +mock.module('@archon/git', () => ({ + execFileAsync: mock(async () => ({ stdout: '', stderr: '' })), +})); + +mock.module('@archon/paths', () => ({ + createLogger: () => ({ + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(function (this: unknown) { + return this; + }), + }), + getRunArtifactsPath: mock(() => '/tmp/artifacts'), + parseOwnerRepo: mock(() => null), +})); + +describe('continueCommand', () => { + const consoleLogSpy = spyOn(console, 'log').mockImplementation(() => {}); + + beforeEach(() => { + mockWorkflowRunCommand.mockClear(); + mockFindActiveByBranchName.mockClear(); + mockGetCodebase.mockClear(); + mockLoadConfig.mockClear(); + mockFindLatestRunByWorkingPath.mockClear(); + consoleLogSpy.mockClear(); + }); + + test('defaults to Claude assist workflow for Claude codebases', async () => { + mockGetCodebase.mockResolvedValueOnce({ id: 'cb-1', ai_assistant_type: 'claude' }); + + await continueCommand('feature/test', 'continue please', { noContext: true }); + + expect(mockWorkflowRunCommand).toHaveBeenCalledWith( + '/tmp/project', + 'archon-assist', + 'continue please', + expect.objectContaining({ noWorktree: true, codebaseId: 'cb-1' }) + ); + }); + + test('defaults to Codex assist workflow for Codex codebases', async () => { + mockGetCodebase.mockResolvedValueOnce({ id: 'cb-1', ai_assistant_type: 'codex' }); + + await continueCommand('feature/test', 'continue please', { noContext: true }); + + expect(mockWorkflowRunCommand).toHaveBeenCalledWith( + '/tmp/project', + 'archon-assist-codex', + 'continue please', + expect.objectContaining({ noWorktree: true, codebaseId: 'cb-1' }) + ); + }); + + test('falls back to config when codebase assistant is unavailable', async () => { + mockGetCodebase.mockResolvedValueOnce(null); + mockLoadConfig.mockResolvedValueOnce({ assistant: 'codex' }); + + await continueCommand('feature/test', 'continue please', { noContext: true }); + + expect(mockWorkflowRunCommand).toHaveBeenCalledWith( + '/tmp/project', + 'archon-assist-codex', + 'continue please', + expect.objectContaining({ noWorktree: true, codebaseId: 'cb-1' }) + ); + }); + + test('respects explicit workflow override', async () => { + mockGetCodebase.mockResolvedValueOnce({ id: 'cb-1', ai_assistant_type: 'codex' }); + + await continueCommand('feature/test', 'continue please', { + noContext: true, + workflow: 'archon-smart-pr-review', + }); + + expect(mockWorkflowRunCommand).toHaveBeenCalledWith( + '/tmp/project', + 'archon-smart-pr-review', + 'continue please', + expect.objectContaining({ noWorktree: true, codebaseId: 'cb-1' }) + ); + }); +}); diff --git a/packages/cli/src/commands/continue.ts b/packages/cli/src/commands/continue.ts index bec7d79c67..ce2819f0a3 100644 --- a/packages/cli/src/commands/continue.ts +++ b/packages/cli/src/commands/continue.ts @@ -5,6 +5,7 @@ import { workflowRunCommand } from './workflow'; import * as isolationDb from '@archon/core/db/isolation-environments'; import * as codebaseDb from '@archon/core/db/codebases'; import * as workflowDb from '@archon/core/db/workflows'; +import { loadConfig } from '@archon/core'; import { execFileAsync } from '@archon/git'; import { createLogger, getRunArtifactsPath, parseOwnerRepo } from '@archon/paths'; import type { WorkflowRun } from '@archon/workflows/schemas/workflow-run'; @@ -24,6 +25,7 @@ export interface ContinueOptions { } const DEFAULT_WORKFLOW = 'archon-assist'; +const DEFAULT_CODEX_WORKFLOW = 'archon-assist-codex'; /** * Continue work on an existing worktree with prior run context injected. @@ -37,8 +39,6 @@ export async function continueCommand( userMessage: string, options: ContinueOptions = {} ): Promise { - const workflowName = options.workflow ?? DEFAULT_WORKFLOW; - // 1. Resolve branch → isolation environment const env = await isolationDb.findActiveByBranchName(branch); if (!env) { @@ -48,6 +48,9 @@ export async function continueCommand( ); } + const workflowName = + options.workflow ?? (await getDefaultContinueWorkflow(env.working_path, env.codebase_id)); + // 2. Find prior run on this worktree path const priorRun = await workflowDb.findLatestRunByWorkingPath(env.working_path); @@ -86,6 +89,27 @@ export async function continueCommand( } } +async function getDefaultContinueWorkflow(cwd: string, codebaseId: string): Promise { + try { + const codebase = await codebaseDb.getCodebase(codebaseId); + if (codebase?.ai_assistant_type === 'codex') { + return DEFAULT_CODEX_WORKFLOW; + } + if (codebase?.ai_assistant_type === 'claude') { + return DEFAULT_WORKFLOW; + } + } catch { + // Fall through to config lookup. + } + + try { + const config = await loadConfig(cwd); + return config.assistant === 'codex' ? DEFAULT_CODEX_WORKFLOW : DEFAULT_WORKFLOW; + } catch { + return DEFAULT_WORKFLOW; + } +} + /** * Build a markdown context preamble from git state and prior run artifacts. * Each section is independently try/caught — failures produce empty strings, never throw. diff --git a/packages/core/src/orchestrator/orchestrator-agent.ts b/packages/core/src/orchestrator/orchestrator-agent.ts index 5945e13b60..430b71f719 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.ts @@ -480,8 +480,13 @@ function buildFullPrompt( : undefined; const systemPrompt = scopedCodebase - ? buildProjectScopedPrompt(scopedCodebase, codebases, workflows) - : buildOrchestratorPrompt(codebases, workflows); + ? buildProjectScopedPrompt( + scopedCodebase, + codebases, + workflows, + scopedCodebase.ai_assistant_type || conversation.ai_assistant_type + ) + : buildOrchestratorPrompt(codebases, workflows, conversation.ai_assistant_type); const contextSuffix = issueContext ? '\n\n---\n\n## Additional Context\n\n' + issueContext : ''; diff --git a/packages/core/src/orchestrator/orchestrator.test.ts b/packages/core/src/orchestrator/orchestrator.test.ts index d5e81038da..86770ed4b5 100644 --- a/packages/core/src/orchestrator/orchestrator.test.ts +++ b/packages/core/src/orchestrator/orchestrator.test.ts @@ -619,7 +619,11 @@ describe('orchestrator-agent handleMessage', () => { await handleMessage(platform, 'chat-456', 'help me'); expect(mockListCodebases).toHaveBeenCalled(); - expect(mockBuildOrchestratorPrompt).toHaveBeenCalledWith([mockCodebase], expect.any(Array)); + expect(mockBuildOrchestratorPrompt).toHaveBeenCalledWith( + [mockCodebase], + expect.any(Array), + 'claude' + ); }); test('builds project-scoped prompt when conversation has codebase_id', async () => { @@ -636,7 +640,8 @@ describe('orchestrator-agent handleMessage', () => { expect(mockBuildProjectScopedPrompt).toHaveBeenCalledWith( mockCodebase, [mockCodebase], - expect.any(Array) + expect.any(Array), + 'claude' ); }); diff --git a/packages/core/src/orchestrator/prompt-builder.test.ts b/packages/core/src/orchestrator/prompt-builder.test.ts index 7a734950b1..6d7cccbd23 100644 --- a/packages/core/src/orchestrator/prompt-builder.test.ts +++ b/packages/core/src/orchestrator/prompt-builder.test.ts @@ -1,5 +1,5 @@ import { describe, test, expect } from 'bun:test'; -import { buildRoutingRulesWithProject } from './prompt-builder'; +import { buildRoutingRulesWithProject, getAssistWorkflowName } from './prompt-builder'; describe('buildRoutingRulesWithProject', () => { test('routing rules include --prompt in invocation format', () => { @@ -30,4 +30,22 @@ describe('buildRoutingRulesWithProject', () => { expect(rules).toContain('NO knowledge of the conversation history'); }); + + test('uses Codex assist workflow when assistant type is codex', () => { + const rules = buildRoutingRulesWithProject('my-project', 'codex'); + + expect(rules).toContain('**archon-assist-codex**'); + expect(rules).toContain('/invoke-workflow archon-assist-codex --project my-project'); + }); +}); + +describe('getAssistWorkflowName', () => { + test('returns Codex assist workflow for codex assistant', () => { + expect(getAssistWorkflowName('codex')).toBe('archon-assist-codex'); + }); + + test('returns Claude assist workflow by default', () => { + expect(getAssistWorkflowName('claude')).toBe('archon-assist'); + expect(getAssistWorkflowName()).toBe('archon-assist'); + }); }); diff --git a/packages/core/src/orchestrator/prompt-builder.ts b/packages/core/src/orchestrator/prompt-builder.ts index d5f307db5b..b8482240c4 100644 --- a/packages/core/src/orchestrator/prompt-builder.ts +++ b/packages/core/src/orchestrator/prompt-builder.ts @@ -6,6 +6,10 @@ import type { Codebase } from '../types'; import type { WorkflowDefinition } from '@archon/workflows/schemas/workflow'; +export function getAssistWorkflowName(assistantType?: string): string { + return assistantType === 'codex' ? 'archon-assist-codex' : 'archon-assist'; +} + /** * Format a single project for the orchestrator prompt. */ @@ -40,18 +44,19 @@ export function formatWorkflowSection(workflows: readonly WorkflowDefinition[]): /** * Build the routing rules section of the prompt. */ -export function buildRoutingRules(): string { - return buildRoutingRulesWithProject(); +export function buildRoutingRules(assistantType?: string): string { + return buildRoutingRulesWithProject(undefined, assistantType); } /** * Build the routing rules section, optionally scoped to a specific project. * When projectName is provided, rule #4 defaults to that project instead of asking. */ -export function buildRoutingRulesWithProject(projectName?: string): string { +export function buildRoutingRulesWithProject(projectName?: string, assistantType?: string): string { const rule4 = projectName ? `4. If ambiguous which project → use **${projectName}** (the active project)` : '4. If ambiguous which project → ask the user'; + const assistWorkflow = getAssistWorkflowName(assistantType); return `## Routing Rules @@ -77,15 +82,15 @@ Rules: Routing behavior: - If the user clearly wants work done (e.g., "create a plan for X", "implement Y", "fix Z") → include a brief explanation of what you're doing, then invoke the workflow. -- If the user is asking a question or it's unclear whether they want a workflow → answer their question directly. You may suggest a workflow by name (e.g., "I can run the **archon-assist** workflow for this if you'd like"), but do NOT include /invoke-workflow in your response. +- If the user is asking a question or it's unclear whether they want a workflow → answer their question directly. You may suggest a workflow by name (e.g., "I can run the **${assistWorkflow}** workflow for this if you'd like"), but do NOT include /invoke-workflow in your response. Example (clear intent): I'll analyze the orchestrator module architecture for you. -/invoke-workflow archon-assist --project my-project --prompt "Analyze the orchestrator module architecture: explain how it routes messages, manages sessions, and dispatches workflows to AI clients" +/invoke-workflow ${assistWorkflow} --project my-project --prompt "Analyze the orchestrator module architecture: explain how it routes messages, manages sessions, and dispatches workflows to AI clients" Example (ambiguous — answer directly): User: "What do you think about adding dark mode?" -Response: "Adding dark mode would involve... [answer the question]. If you'd like me to create a plan for this, I can run the **archon-idea-to-pr** workflow." +Response: "Adding dark mode would involve... [answer the question]. If you'd like me to create a plan for this, I can run the **archon-idea-to-pr** workflow." ## Project Setup @@ -113,7 +118,8 @@ IMPORTANT: Always clone into ~/.archon/workspaces/{owner}/{repo}/source unless t */ export function buildOrchestratorPrompt( codebases: readonly Codebase[], - workflows: readonly WorkflowDefinition[] + workflows: readonly WorkflowDefinition[], + assistantType?: string ): string { let prompt = `# Archon Orchestrator @@ -138,7 +144,7 @@ You can answer questions directly or invoke workflows for structured development prompt += '## Available Workflows\n\n'; prompt += formatWorkflowSection(workflows); - prompt += buildRoutingRules(); + prompt += buildRoutingRules(assistantType); return prompt; } @@ -151,7 +157,8 @@ You can answer questions directly or invoke workflows for structured development export function buildProjectScopedPrompt( scopedCodebase: Codebase, allCodebases: readonly Codebase[], - workflows: readonly WorkflowDefinition[] + workflows: readonly WorkflowDefinition[], + assistantType?: string ): string { const otherCodebases = allCodebases.filter(c => c.id !== scopedCodebase.id); @@ -179,7 +186,7 @@ ${formatProjectSection(scopedCodebase)} prompt += '## Available Workflows\n\n'; prompt += formatWorkflowSection(workflows); - prompt += buildRoutingRulesWithProject(scopedCodebase.name); + prompt += buildRoutingRulesWithProject(scopedCodebase.name, assistantType); return prompt; } diff --git a/packages/docs-web/src/content/docs/book/essential-workflows.md b/packages/docs-web/src/content/docs/book/essential-workflows.md index 7d2c3bc43e..9fe4462dc0 100644 --- a/packages/docs-web/src/content/docs/book/essential-workflows.md +++ b/packages/docs-web/src/content/docs/book/essential-workflows.md @@ -21,6 +21,7 @@ What do you want to do? │ ├── Ask a question or explore the codebase │ └── archon-assist +│ └── archon-assist-codex │ ├── Fix a bug from a GitHub issue │ └── archon-fix-github-issue @@ -60,6 +61,17 @@ The starting point for anything that doesn't fit elsewhere. It runs a single ful archon workflow run archon-assist "What does the orchestrator do?" archon workflow run archon-assist "Why are tests failing in the auth module?" archon workflow run archon-assist "Explain the isolation system to me" + +#### `archon-assist-codex` + +Codex-tuned catch-all for the same class of work when you want the assist lane +to stay on the Codex side of the house. + +```bash +archon workflow run archon-assist-codex "What does the orchestrator do?" +archon workflow run archon-assist-codex "Why are tests failing in the auth module?" +archon workflow run archon-assist-codex "Explain the isolation system to me" +``` ``` **What it produces**: A direct answer. No PR, no artifacts — just the AI working through your question with full access to your code. @@ -209,6 +221,7 @@ archon workflow run archon-resolve-conflicts "Resolve conflicts on PR #94" | Workflow | Use When | Creates PR? | Uses Isolation? | |----------|----------|-------------|-----------------| | `archon-assist` | Questions, exploration, debugging | No | No | +| `archon-assist-codex` | Questions, exploration, debugging (Codex-tuned) | No | No | | `archon-fix-github-issue` | Fix a GitHub issue (smart routing) | Yes (draft) | Yes | | `archon-idea-to-pr` | Feature from description | Yes | Yes | | `archon-plan-to-pr` | Execute an existing plan | Yes | Yes | @@ -229,7 +242,7 @@ To see all workflows available in your current directory: archon workflow list ``` -The list shows both Archon's bundled defaults and any custom workflows in your repo's `.archon/workflows/` directory. Custom workflows override bundled ones by name — if you create a workflow named `archon-assist`, it replaces the built-in. +The list shows both Archon's bundled defaults and any custom workflows in your repo's `.archon/workflows/` directory. Custom workflows override bundled ones by name — if you create a workflow named `archon-assist`, it replaces the built-in. Codex-tuned variants use the `-codex` suffix, for example `archon-assist-codex`. Ready to build your own? In [Chapter 7: Creating Your First Workflow →](/book/first-workflow/), you'll build one from scratch — incrementally, version by version, until you've got a mini version of `archon-idea-to-pr`. diff --git a/packages/docs-web/src/content/docs/book/first-five-minutes.md b/packages/docs-web/src/content/docs/book/first-five-minutes.md index aec29aa212..3e53d0a4f9 100644 --- a/packages/docs-web/src/content/docs/book/first-five-minutes.md +++ b/packages/docs-web/src/content/docs/book/first-five-minutes.md @@ -54,13 +54,14 @@ Navigate to any git repository on your machine, then run: cd /path/to/your/project archon workflow run archon-assist "What's the entry point for this application?" +archon workflow run archon-assist-codex "What's the entry point for this application?" ``` Archon will analyze your codebase and answer the question with full context. You'll see it thinking through your files in real time, streamed to your terminal. **You just ran your first Archon workflow.** It's a single-step workflow — one command, one AI call, one answer. Simple, but useful. -> **Tip:** `archon-assist` works for any question. "How does auth work?", "Where is the database configured?", "What does this function do?" — it's your always-available codebase expert. +> **Tip:** `archon-assist` is the default catch-all, and `archon-assist-codex` is the Codex-tuned variant. Use the Codex lane when you want general help to stay on Codex-specific prompts and workflow names. --- diff --git a/packages/docs-web/src/content/docs/book/quick-reference.md b/packages/docs-web/src/content/docs/book/quick-reference.md index ede87c0dab..9ed1bcfbd7 100644 --- a/packages/docs-web/src/content/docs/book/quick-reference.md +++ b/packages/docs-web/src/content/docs/book/quick-reference.md @@ -269,7 +269,8 @@ defaults: |-------|-------------|-----| | `Workflow "X" not found` | YAML file not discovered | Check file is in `.archon/workflows/` and `archon workflow list` shows it | | `Command "X" not found` | Command file missing | Check `.archon/commands/X.md` exists and `archon validate commands X` passes | -| `Routing unclear — falling back to archon-assist` | No workflow matched the input | Use an explicit workflow name: `archon workflow run my-workflow "..."` | +| `Routing unclear — falling back to archon-assist` | No workflow matched the input on Claude/default flows | Use an explicit workflow name: `archon workflow run my-workflow "..."` | +| `Routing unclear — falling back to archon-assist-codex` | No workflow matched the input on Codex-oriented flows | Use an explicit workflow name: `archon workflow run my-workflow "..."` | | `Worktree already exists for branch X` | Prior run left a worktree | Run `archon complete X` or `archon isolation cleanup` | | `Not a git repository` | Running outside a repo | `cd` into a git repo first — workflow and isolation commands require one | | `Model X is not valid for provider Y` | Provider/model mismatch | Use Claude models (`sonnet`, `opus`, `haiku`) with `provider: claude`; use other models with `provider: codex` | @@ -303,6 +304,7 @@ archon workflow run my-workflow --no-worktree "..." **Test a command directly** before embedding it in a workflow: ```bash archon workflow run archon-assist "/command-invoke my-command some-arg" +archon workflow run archon-assist-codex "/command-invoke my-command some-arg" ``` ### Getting Help diff --git a/packages/docs-web/src/content/docs/getting-started/overview.md b/packages/docs-web/src/content/docs/getting-started/overview.md index f1d58ae402..16e0034b4b 100644 --- a/packages/docs-web/src/content/docs/getting-started/overview.md +++ b/packages/docs-web/src/content/docs/getting-started/overview.md @@ -264,6 +264,9 @@ archon workflow list # Ask a question about the codebase archon workflow run archon-assist "How does the auth module work?" +# Codex-tuned catch-all +archon workflow run archon-assist-codex "How does the auth module work?" + # Plan a feature on an isolated branch archon workflow run archon-feature-development --branch feat/dark-mode "Add dark mode" @@ -339,6 +342,7 @@ archon complete --force # skip uncommitted-changes check | Workflow | What It Does | |----------|-------------| | `archon-assist` | General Q&A, debugging, exploration, CI failures — catch-all | +| `archon-assist-codex` | Codex-tuned catch-all for general Q&A, debugging, exploration, and CI failures | | `archon-fix-github-issue` | Investigate, root cause analysis, implement fix, validate, PR | | `archon-idea-to-pr` | Feature idea, plan, implement, validate, PR, parallel reviews, self-fix | | `archon-plan-to-pr` | Execute existing plan, implement, validate, PR, review | @@ -360,7 +364,7 @@ archon complete --force # skip uncommitted-changes check These bundled workflows work for most projects. To customize, copy one from `.archon/workflows/defaults/` into `.archon/workflows/` and modify it — same-named files override the defaults. -> **Auto-selection:** You don't need to remember workflow names. Just describe what you want — the router reads all workflow descriptions and picks the best match. For example, "fix issue #42" routes to `archon-fix-github-issue`, while "review this PR" routes to `archon-smart-pr-review`. If nothing matches clearly, it falls back to `archon-assist`. +> **Auto-selection:** You don't need to remember workflow names. Just describe what you want — the router reads all workflow descriptions and picks the best match. For example, "fix issue #42" routes to `archon-fix-github-issue`, while "review this PR" routes to `archon-smart-pr-review`. If nothing matches clearly, it falls back to `archon-assist` for Claude and `archon-assist-codex` for Codex-oriented flows. --- diff --git a/packages/docs-web/src/content/docs/guides/index.md b/packages/docs-web/src/content/docs/guides/index.md index 0d53209fb6..4acf1eaafe 100644 --- a/packages/docs-web/src/content/docs/guides/index.md +++ b/packages/docs-web/src/content/docs/guides/index.md @@ -34,6 +34,7 @@ Archon ships with ready-to-use workflows that cover common coding tasks. You do | Workflow | What It Does | |----------|-------------| | `archon-assist` | General Q&A, debugging, exploration -- the catch-all | +| `archon-assist-codex` | Codex-tuned catch-all for general Q&A, debugging, and exploration | | `archon-fix-github-issue` | Investigate, root cause, implement fix, validate, PR | | `archon-smart-pr-review` | Complexity-adaptive PR review | | `archon-comprehensive-pr-review` | Multi-agent PR review (5 parallel reviewers) | diff --git a/packages/docs-web/src/content/docs/reference/cli.md b/packages/docs-web/src/content/docs/reference/cli.md index f2821a1b8b..8ff186943a 100644 --- a/packages/docs-web/src/content/docs/reference/cli.md +++ b/packages/docs-web/src/content/docs/reference/cli.md @@ -146,6 +146,10 @@ Workflow names are resolved using a 4-tier fallback hierarchy. This applies cons 3. **Suffix match** - `assist` matches `archon-assist` (looks for `-assist` suffix) 4. **Substring match** - `smart` matches `archon-smart-pr-review` +Codex-specific workflow names use the `-codex` suffix. For example: +- `assist-codex` matches `archon-assist-codex` +- `piv-loop-codex` matches `archon-piv-loop-codex` + If multiple workflows match at the same tier, an error lists the candidates: ``` Ambiguous workflow 'review'. Did you mean: diff --git a/packages/server/src/routes/api.health.test.ts b/packages/server/src/routes/api.health.test.ts index 6cf895464e..fffd65a337 100644 --- a/packages/server/src/routes/api.health.test.ts +++ b/packages/server/src/routes/api.health.test.ts @@ -87,6 +87,7 @@ mock.module('@archon/workflows/defaults', () => ({ BUNDLED_WORKFLOWS: {}, BUNDLED_COMMANDS: { 'archon-assist': '# archon-assist command', + 'archon-assist-codex': '# archon-assist-codex command', plan: '# plan command', implement: '# implement command', }, @@ -433,7 +434,7 @@ describe('GET /api/commands', () => { const body = (await response.json()) as { commands: Array<{ name: string; source: string }> }; expect(Array.isArray(body.commands)).toBe(true); - // BUNDLED_COMMANDS mock has 3 entries + // BUNDLED_COMMANDS mock includes the bundled defaults for this test const bundledCommands = body.commands.filter(c => c.source === 'bundled'); expect(bundledCommands.length).toBeGreaterThan(0); }); @@ -449,6 +450,17 @@ describe('GET /api/commands', () => { expect(archonAssist?.source).toBe('bundled'); }); + test('includes archon-assist-codex as bundled command', async () => { + const app = makeApp(); + const response = await app.request('/api/commands'); + expect(response.status).toBe(200); + + const body = (await response.json()) as { commands: Array<{ name: string; source: string }> }; + const archonAssistCodex = body.commands.find(c => c.name === 'archon-assist-codex'); + expect(archonAssistCodex).toBeDefined(); + expect(archonAssistCodex?.source).toBe('bundled'); + }); + test('includes plan and implement as bundled commands', async () => { const app = makeApp(); const response = await app.request('/api/commands'); diff --git a/packages/server/src/routes/api.workflows.test.ts b/packages/server/src/routes/api.workflows.test.ts index e50b252640..6356985558 100644 --- a/packages/server/src/routes/api.workflows.test.ts +++ b/packages/server/src/routes/api.workflows.test.ts @@ -21,10 +21,20 @@ const mockDiscoverWorkflows = mock(async (_cwd: string) => ({ })); // Default: returns a valid workflow. Use mockReturnValueOnce in tests that need a parse failure. -const mockParseWorkflow = mock((_content: string, _filename: string) => ({ - workflow: makeTestWorkflow({ name: 'test', description: 'Test workflow' }), - error: null, -})); +const mockParseWorkflow = mock((content: string, _filename: string) => { + const nameMatch = /^name:\s*(.+)$/m.exec(content); + const descriptionMatch = /^description:\s*(.+)$/m.exec(content); + const providerMatch = /^provider:\s*(.+)$/m.exec(content); + + return { + workflow: makeTestWorkflow({ + name: nameMatch?.[1] ?? 'test', + description: descriptionMatch?.[1] ?? 'Test workflow', + ...(providerMatch?.[1] ? { provider: providerMatch[1] } : {}), + }), + error: null, + }; +}); mock.module('@archon/core', () => ({ handleMessage: mock(async () => {}), @@ -74,9 +84,12 @@ mock.module('@archon/workflows/command-validation', () => ({ mock.module('@archon/workflows/defaults', () => ({ BUNDLED_WORKFLOWS: { 'archon-assist': 'name: archon-assist\ndescription: Archon Assist\nnodes: []', + 'archon-assist-codex': + 'name: archon-assist-codex\ndescription: Archon Assist Codex\nprovider: codex\nnodes: []', }, BUNDLED_COMMANDS: { 'archon-assist': '# archon-assist command', + 'archon-assist-codex': '# archon-assist-codex command', }, isBinaryBuild: mock(() => false), })); @@ -224,6 +237,24 @@ describe('GET /api/workflows/:name', () => { expect(body.workflow).toBeDefined(); }); + test('returns bundled Codex workflow with source:bundled', async () => { + const app = createTestApp(); + registerApiRoutes(app, {} as WebAdapter, {} as ConversationLockManager); + + mockListCodebases.mockImplementationOnce(async () => []); + + const response = await app.request('/api/workflows/archon-assist-codex'); + expect(response.status).toBe(200); + const body = (await response.json()) as { + source: string; + filename: string; + workflow: { provider?: string }; + }; + expect(body.source).toBe('bundled'); + expect(body.filename).toBe('archon-assist-codex.yaml'); + expect(body.workflow.provider).toBe('codex'); + }); + test('returns project workflow with source:project when file exists on disk', async () => { const testDir = join(tmpdir(), `wf-get-test-${Date.now()}`); const workflowDir = join(testDir, '.archon', 'workflows'); diff --git a/packages/web/src/lib/workflow-metadata.test.ts b/packages/web/src/lib/workflow-metadata.test.ts index 18af743267..8fe693fe97 100644 --- a/packages/web/src/lib/workflow-metadata.test.ts +++ b/packages/web/src/lib/workflow-metadata.test.ts @@ -135,6 +135,10 @@ describe('getWorkflowDisplayName', () => { test('handles single-word names', () => { expect(getWorkflowDisplayName('archon-assist')).toBe('Assist'); }); + + test('handles Codex suffixed workflow names', () => { + expect(getWorkflowDisplayName('archon-assist-codex')).toBe('Assist Codex'); + }); }); describe('getWorkflowCategory', () => { @@ -172,6 +176,7 @@ describe('getWorkflowCategory', () => { 'Development' ); expect(getWorkflowCategory('archon-assist', 'General help')).toBe('Development'); + expect(getWorkflowCategory('archon-assist-codex', 'General Codex help')).toBe('Development'); expect(getWorkflowCategory('archon-idea-to-pr', 'From idea to PR')).toBe('Development'); }); }); diff --git a/packages/workflows/src/defaults/bundled-defaults.test.ts b/packages/workflows/src/defaults/bundled-defaults.test.ts index e1e1cb5a30..b21513360f 100644 --- a/packages/workflows/src/defaults/bundled-defaults.test.ts +++ b/packages/workflows/src/defaults/bundled-defaults.test.ts @@ -16,6 +16,7 @@ describe('bundled-defaults', () => { it('should have all expected default commands', () => { const expectedCommands = [ 'archon-assist', + 'archon-assist-codex', 'archon-code-review-agent', 'archon-comment-quality-agent', 'archon-create-pr', @@ -42,7 +43,7 @@ describe('bundled-defaults', () => { expect(BUNDLED_COMMANDS).toHaveProperty(cmd); } - expect(Object.keys(BUNDLED_COMMANDS)).toHaveLength(21); + expect(Object.keys(BUNDLED_COMMANDS)).toHaveLength(22); }); it('should have non-empty content for all commands', () => { @@ -79,6 +80,7 @@ describe('bundled-defaults', () => { it('should have all expected default workflows', () => { const expectedWorkflows = [ 'archon-assist', + 'archon-assist-codex', 'archon-comprehensive-pr-review', 'archon-create-issue', 'archon-feature-development', @@ -97,7 +99,7 @@ describe('bundled-defaults', () => { expect(BUNDLED_WORKFLOWS).toHaveProperty(wf); } - expect(Object.keys(BUNDLED_WORKFLOWS)).toHaveLength(13); + expect(Object.keys(BUNDLED_WORKFLOWS)).toHaveLength(14); }); it('should have non-empty content for all workflows', () => { diff --git a/packages/workflows/src/defaults/bundled-defaults.ts b/packages/workflows/src/defaults/bundled-defaults.ts index a921171b9e..a4e9c166b4 100644 --- a/packages/workflows/src/defaults/bundled-defaults.ts +++ b/packages/workflows/src/defaults/bundled-defaults.ts @@ -11,10 +11,11 @@ import { BUNDLED_IS_BINARY } from '@archon/paths'; // ============================================================================= -// Default Commands (21 total) +// Default Commands (22 total) // ============================================================================= import archonAssistCmd from '../../../../.archon/commands/defaults/archon-assist.md' with { type: 'text' }; +import archonAssistCodexCmd from '../../../../.archon/commands/defaults/archon-assist-codex.md' with { type: 'text' }; import archonCodeReviewAgentCmd from '../../../../.archon/commands/defaults/archon-code-review-agent.md' with { type: 'text' }; import archonCommentQualityAgentCmd from '../../../../.archon/commands/defaults/archon-comment-quality-agent.md' with { type: 'text' }; import archonCreatePrCmd from '../../../../.archon/commands/defaults/archon-create-pr.md' with { type: 'text' }; @@ -37,10 +38,11 @@ import archonValidatePrE2eMainCmd from '../../../../.archon/commands/defaults/ar import archonValidatePrReportCmd from '../../../../.archon/commands/defaults/archon-validate-pr-report.md' with { type: 'text' }; // ============================================================================= -// Default Workflows (13 total) +// Default Workflows (14 total) // ============================================================================= import archonAssistWf from '../../../../.archon/workflows/defaults/archon-assist.yaml' with { type: 'text' }; +import archonAssistCodexWf from '../../../../.archon/workflows/defaults/archon-assist-codex.yaml' with { type: 'text' }; import archonComprehensivePrReviewWf from '../../../../.archon/workflows/defaults/archon-comprehensive-pr-review.yaml' with { type: 'text' }; import archonCreateIssueWf from '../../../../.archon/workflows/defaults/archon-create-issue.yaml' with { type: 'text' }; import archonFeatureDevelopmentWf from '../../../../.archon/workflows/defaults/archon-feature-development.yaml' with { type: 'text' }; @@ -63,6 +65,7 @@ import archonWorkflowBuilderWf from '../../../../.archon/workflows/defaults/arch */ export const BUNDLED_COMMANDS: Record = { 'archon-assist': archonAssistCmd, + 'archon-assist-codex': archonAssistCodexCmd, 'archon-code-review-agent': archonCodeReviewAgentCmd, 'archon-comment-quality-agent': archonCommentQualityAgentCmd, 'archon-create-pr': archonCreatePrCmd, @@ -90,6 +93,7 @@ export const BUNDLED_COMMANDS: Record = { */ export const BUNDLED_WORKFLOWS: Record = { 'archon-assist': archonAssistWf, + 'archon-assist-codex': archonAssistCodexWf, 'archon-comprehensive-pr-review': archonComprehensivePrReviewWf, 'archon-create-issue': archonCreateIssueWf, 'archon-feature-development': archonFeatureDevelopmentWf, From cd91ff7350c12e3293e54a2c1f4680fa740e07a3 Mon Sep 17 00:00:00 2001 From: matzls Date: Sun, 12 Apr 2026 09:35:00 +0200 Subject: [PATCH 08/21] refactor(workflows): clean up codex piv loop Extract the detect-project workflow node into a reusable Bun script while preserving its stdout contract. Also tighten the Codex loop prompts so feedback fixes use per-file staging, scoped validation stays tool-accurate, and iteration violations fail without rewriting history. Extend typed ESLint coverage to .archon/scripts so the new script participates in the existing pre-commit checks. Co-authored-by: Codex --- .archon/scripts/detect-project.ts | 229 +++++++++++++++++++ .archon/scripts/tsconfig.json | 7 + .archon/workflows/archon-piv-loop-codex.yaml | 186 +++------------ eslint.config.mjs | 2 +- 4 files changed, 268 insertions(+), 156 deletions(-) create mode 100644 .archon/scripts/detect-project.ts create mode 100644 .archon/scripts/tsconfig.json diff --git a/.archon/scripts/detect-project.ts b/.archon/scripts/detect-project.ts new file mode 100644 index 0000000000..a02f775bb9 --- /dev/null +++ b/.archon/scripts/detect-project.ts @@ -0,0 +1,229 @@ +#!/usr/bin/env bun + +import { spawnSync } from 'node:child_process'; +import { existsSync, readFileSync, readdirSync } from 'node:fs'; +import { dirname, isAbsolute, join } from 'node:path'; + +interface DetectionResult { + projectType: string; + installCmd: string; + validateCmd: string; + typecheckCmd: string; + lintCmd: string; + testCmd: string; + formatCmd: string; +} + +function fileExists(path: string): boolean { + return existsSync(path); +} + +function readText(path: string): string | null { + try { + return readFileSync(path, 'utf8'); + } catch { + return null; + } +} + +function packageHasScript(name: string): boolean { + const raw = readText('package.json'); + if (raw === null) return false; + + try { + const parsed = JSON.parse(raw) as { scripts?: Record }; + return typeof parsed.scripts?.[name] === 'string'; + } catch { + return raw.includes(`"${name}"`); + } +} + +function makefileHasTarget(name: string): boolean { + const raw = readText('Makefile'); + if (raw === null) return false; + return new RegExp(`^${name}:`, 'm').test(raw); +} + +function hasPythonTestSignal(): boolean { + if (!fileExists('tests')) return false; + + try { + return readdirSync('tests').some(entry => entry.startsWith('test_') && entry.endsWith('.py')); + } catch { + return false; + } +} + +function resolveSourceRepo(): string { + const result = spawnSync('git', ['rev-parse', '--git-common-dir'], { encoding: 'utf8' }); + if (result.status !== 0) return ''; + + const gitCommon = result.stdout.trim(); + if (gitCommon.length === 0) return ''; + + const absCommon = isAbsolute(gitCommon) ? gitCommon : join(process.cwd(), gitCommon); + return dirname(absCommon); +} + +function commandExists(command: string): boolean { + const result = spawnSync('which', [command], { stdio: 'ignore' }); + return result.status === 0; +} + +function detectProject(): DetectionResult { + const result: DetectionResult = { + projectType: 'unknown', + installCmd: '', + validateCmd: '', + typecheckCmd: '', + lintCmd: '', + testCmd: '', + formatCmd: '', + }; + + if (fileExists('bun.lock') || fileExists('bun.lockb')) { + result.projectType = 'bun'; + result.installCmd = 'bun install --frozen-lockfile'; + if (packageHasScript('validate')) result.validateCmd = 'bun run validate'; + if (packageHasScript('type-check')) result.typecheckCmd = 'bun run type-check'; + if (packageHasScript('lint')) result.lintCmd = 'bun run lint'; + if (packageHasScript('test')) result.testCmd = 'bun run test'; + if (packageHasScript('format:check')) result.formatCmd = 'bun run format:check'; + return finalize(result); + } + + if ( + fileExists('pyproject.toml') || + fileExists('requirements.txt') || + fileExists('setup.py') || + hasPythonTestSignal() + ) { + result.projectType = 'python'; + + const sourceRepo = resolveSourceRepo(); + let venvBin = ''; + if (fileExists('.venv') && fileExists('.venv/bin/python')) { + venvBin = '.venv/bin'; + } else if (sourceRepo && fileExists(join(sourceRepo, '.venv/bin/python'))) { + venvBin = join(sourceRepo, '.venv/bin'); + } + + if (venvBin) { + if (fileExists(join(venvBin, 'pytest'))) result.testCmd = `${venvBin}/pytest tests/`; + if (fileExists(join(venvBin, 'ruff'))) { + result.lintCmd = `${venvBin}/ruff check .`; + result.formatCmd = `${venvBin}/ruff format --check .`; + } + if (fileExists(join(venvBin, 'mypy'))) result.typecheckCmd = `${venvBin}/mypy .`; + if (fileExists(join(venvBin, 'pyright'))) { + result.typecheckCmd = result.typecheckCmd + ? `${result.typecheckCmd} && ${venvBin}/pyright` + : `${venvBin}/pyright`; + } + } + + if (fileExists('pyproject.toml')) { + if (fileExists('uv.lock')) { + result.installCmd = 'uv sync'; + } else if (fileExists('poetry.lock')) { + result.installCmd = 'poetry install'; + } else if (venvBin) { + result.installCmd = `${venvBin}/python -m pip install -e .`; + } + } else if (fileExists('requirements.txt') && venvBin) { + result.installCmd = `${venvBin}/python -m pip install -r requirements.txt`; + } + + return finalize(result); + } + + if (fileExists('package.json')) { + result.projectType = 'node'; + let run = 'npm run'; + if (fileExists('pnpm-lock.yaml')) { + run = 'pnpm'; + result.installCmd = 'pnpm install --frozen-lockfile'; + } else if (fileExists('yarn.lock')) { + run = 'yarn'; + result.installCmd = 'yarn install --frozen-lockfile'; + } else { + result.installCmd = 'npm ci'; + } + + if (packageHasScript('validate')) result.validateCmd = `${run} validate`; + if (packageHasScript('type-check')) result.typecheckCmd = `${run} type-check`; + if (packageHasScript('lint')) result.lintCmd = `${run} lint`; + if (packageHasScript('test')) result.testCmd = `${run} test`; + if (packageHasScript('format:check')) result.formatCmd = `${run} format:check`; + return finalize(result); + } + + if (fileExists('go.mod')) { + result.projectType = 'go'; + result.installCmd = 'go mod download'; + result.testCmd = 'go test ./...'; + result.typecheckCmd = 'go vet ./...'; + if (commandExists('golangci-lint')) result.lintCmd = 'golangci-lint run'; + result.formatCmd = 'gofmt -l .'; + return finalize(result); + } + + if (fileExists('Cargo.toml')) { + result.projectType = 'rust'; + result.installCmd = 'cargo fetch'; + result.testCmd = 'cargo test'; + result.typecheckCmd = 'cargo check'; + result.lintCmd = 'cargo clippy -- -D warnings'; + result.formatCmd = 'cargo fmt -- --check'; + return finalize(result); + } + + if (fileExists('Makefile')) { + result.projectType = 'makefile'; + if (makefileHasTarget('test')) result.testCmd = 'make test'; + if (makefileHasTarget('lint')) result.lintCmd = 'make lint'; + if (makefileHasTarget('check')) result.validateCmd = 'make check'; + } + + return finalize(result); +} + +function finalize(result: DetectionResult): DetectionResult { + if (result.validateCmd.length === 0) { + const parts = [ + result.typecheckCmd, + result.lintCmd, + result.testCmd, + result.formatCmd, + ].filter(part => part.length > 0); + result.validateCmd = parts.join(' && '); + } + + return result; +} + +function emit(result: DetectionResult): void { + console.log('=== PROJECT DETECTION ==='); + console.log(`PROJECT_TYPE=${result.projectType}`); + console.log(`INSTALL_CMD=${result.installCmd}`); + console.log(`VALIDATE_CMD=${result.validateCmd}`); + console.log(`TYPECHECK_CMD=${result.typecheckCmd}`); + console.log(`LINT_CMD=${result.lintCmd}`); + console.log(`TEST_CMD=${result.testCmd}`); + console.log(`FORMAT_CMD=${result.formatCmd}`); + console.log('=== END DETECTION ==='); + + if ( + result.validateCmd.length === 0 && + result.testCmd.length === 0 && + result.typecheckCmd.length === 0 && + result.lintCmd.length === 0 + ) { + console.log(''); + console.log('NOTE: No automated validators detected in this project.'); + console.log('The implement loop will proceed without automated validation gates.'); + console.log('Human review is required before merging any changes.'); + } +} + +emit(detectProject()); diff --git a/.archon/scripts/tsconfig.json b/.archon/scripts/tsconfig.json new file mode 100644 index 0000000000..433ecdf6b4 --- /dev/null +++ b/.archon/scripts/tsconfig.json @@ -0,0 +1,7 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "noEmit": true + }, + "include": ["./**/*.ts"] +} diff --git a/.archon/workflows/archon-piv-loop-codex.yaml b/.archon/workflows/archon-piv-loop-codex.yaml index a360759657..844482b217 100644 --- a/.archon/workflows/archon-piv-loop-codex.yaml +++ b/.archon/workflows/archon-piv-loop-codex.yaml @@ -194,148 +194,8 @@ nodes: - id: detect-project depends_on: [explore] - bash: | - set -e - - PROJECT_TYPE="unknown" - INSTALL_CMD="" - VALIDATE_CMD="" - TYPECHECK_CMD="" - LINT_CMD="" - TEST_CMD="" - FORMAT_CMD="" - - # -------- Bun / TypeScript -------- - if [ -f "bun.lock" ] || [ -f "bun.lockb" ]; then - PROJECT_TYPE="bun" - INSTALL_CMD="bun install --frozen-lockfile" - grep -q '"validate"' package.json 2>/dev/null && VALIDATE_CMD="bun run validate" - grep -q '"type-check"' package.json 2>/dev/null && TYPECHECK_CMD="bun run type-check" - grep -q '"lint"' package.json 2>/dev/null && LINT_CMD="bun run lint" - grep -q '"test"' package.json 2>/dev/null && TEST_CMD="bun run test" - grep -q '"format:check"' package.json 2>/dev/null && FORMAT_CMD="bun run format:check" - - # -------- Python (via repo signals + source-repo venv lookup) -------- - # Triggers on any Python signal (pyproject, requirements, setup.py, or tests/). - # In a git worktree, .venv is NOT copied from the source repo (git worktree add - # only copies tracked files), so we look up the source repo via git's - # --git-common-dir and reference its .venv/ directly from the worktree. - elif [ -f "pyproject.toml" ] || [ -f "requirements.txt" ] || \ - [ -f "setup.py" ] || compgen -G "tests/test_*.py" > /dev/null 2>&1; then - PROJECT_TYPE="python" - - # Resolve source repo root (the main checkout, not the worktree). - # git rev-parse --git-common-dir returns the main .git dir (even from a worktree). - SOURCE_REPO="" - GIT_COMMON="$(git rev-parse --git-common-dir 2>/dev/null)" - if [ -n "$GIT_COMMON" ]; then - case "$GIT_COMMON" in - /*) ABS_COMMON="$GIT_COMMON" ;; - *) ABS_COMMON="$(pwd)/$GIT_COMMON" ;; - esac - SOURCE_REPO="$(dirname "$ABS_COMMON")" - fi - - # Prefer worktree-local .venv if present (unusual), else source repo .venv. - VENV_BIN="" - if [ -d ".venv" ] && [ -x ".venv/bin/python" ]; then - VENV_BIN=".venv/bin" - elif [ -n "$SOURCE_REPO" ] && [ -x "$SOURCE_REPO/.venv/bin/python" ]; then - VENV_BIN="$SOURCE_REPO/.venv/bin" - fi - - # Detect installed tools via the resolved VENV_BIN (most reliable). - if [ -n "$VENV_BIN" ]; then - [ -x "$VENV_BIN/pytest" ] && TEST_CMD="$VENV_BIN/pytest tests/" - [ -x "$VENV_BIN/ruff" ] && { LINT_CMD="$VENV_BIN/ruff check ."; FORMAT_CMD="$VENV_BIN/ruff format --check ."; } - [ -x "$VENV_BIN/mypy" ] && TYPECHECK_CMD="$VENV_BIN/mypy ." - [ -x "$VENV_BIN/pyright" ] && TYPECHECK_CMD="${TYPECHECK_CMD:+$TYPECHECK_CMD && }$VENV_BIN/pyright" - fi - - # Install command based on project metadata. - if [ -f "pyproject.toml" ]; then - if [ -f "uv.lock" ]; then - INSTALL_CMD="uv sync" - elif [ -f "poetry.lock" ]; then - INSTALL_CMD="poetry install" - elif [ -n "$VENV_BIN" ]; then - INSTALL_CMD="$VENV_BIN/python -m pip install -e ." - fi - elif [ -f "requirements.txt" ] && [ -n "$VENV_BIN" ]; then - INSTALL_CMD="$VENV_BIN/python -m pip install -r requirements.txt" - fi - - # -------- Node (npm/yarn/pnpm) -------- - elif [ -f "package.json" ]; then - PROJECT_TYPE="node" - if [ -f "pnpm-lock.yaml" ]; then - RUN="pnpm" - INSTALL_CMD="pnpm install --frozen-lockfile" - elif [ -f "yarn.lock" ]; then - RUN="yarn" - INSTALL_CMD="yarn install --frozen-lockfile" - else - RUN="npm run" - INSTALL_CMD="npm ci" - fi - grep -q '"validate"' package.json 2>/dev/null && VALIDATE_CMD="$RUN validate" - grep -q '"type-check"' package.json 2>/dev/null && TYPECHECK_CMD="$RUN type-check" - grep -q '"lint"' package.json 2>/dev/null && LINT_CMD="$RUN lint" - grep -q '"test"' package.json 2>/dev/null && TEST_CMD="$RUN test" - grep -q '"format:check"' package.json 2>/dev/null && FORMAT_CMD="$RUN format:check" - - # -------- Go -------- - elif [ -f "go.mod" ]; then - PROJECT_TYPE="go" - INSTALL_CMD="go mod download" - TEST_CMD="go test ./..." - TYPECHECK_CMD="go vet ./..." - command -v golangci-lint >/dev/null 2>&1 && LINT_CMD="golangci-lint run" - FORMAT_CMD="gofmt -l ." - - # -------- Rust -------- - elif [ -f "Cargo.toml" ]; then - PROJECT_TYPE="rust" - INSTALL_CMD="cargo fetch" - TEST_CMD="cargo test" - TYPECHECK_CMD="cargo check" - LINT_CMD="cargo clippy -- -D warnings" - FORMAT_CMD="cargo fmt -- --check" - - # -------- Makefile fallback -------- - elif [ -f "Makefile" ]; then - PROJECT_TYPE="makefile" - grep -q "^test:" Makefile 2>/dev/null && TEST_CMD="make test" - grep -q "^lint:" Makefile 2>/dev/null && LINT_CMD="make lint" - grep -q "^check:" Makefile 2>/dev/null && VALIDATE_CMD="make check" - fi - - # Compose VALIDATE_CMD from available parts if not already set - if [ -z "$VALIDATE_CMD" ]; then - PARTS="" - [ -n "$TYPECHECK_CMD" ] && PARTS="${PARTS:+$PARTS && }$TYPECHECK_CMD" - [ -n "$LINT_CMD" ] && PARTS="${PARTS:+$PARTS && }$LINT_CMD" - [ -n "$TEST_CMD" ] && PARTS="${PARTS:+$PARTS && }$TEST_CMD" - [ -n "$FORMAT_CMD" ] && PARTS="${PARTS:+$PARTS && }$FORMAT_CMD" - VALIDATE_CMD="$PARTS" - fi - - echo "=== PROJECT DETECTION ===" - echo "PROJECT_TYPE=$PROJECT_TYPE" - echo "INSTALL_CMD=${INSTALL_CMD:-}" - echo "VALIDATE_CMD=${VALIDATE_CMD:-}" - echo "TYPECHECK_CMD=${TYPECHECK_CMD:-}" - echo "LINT_CMD=${LINT_CMD:-}" - echo "TEST_CMD=${TEST_CMD:-}" - echo "FORMAT_CMD=${FORMAT_CMD:-}" - echo "=== END DETECTION ===" - - if [ -z "$VALIDATE_CMD" ] && [ -z "$TEST_CMD" ] && [ -z "$TYPECHECK_CMD" ] && [ -z "$LINT_CMD" ]; then - echo "" - echo "NOTE: No automated validators detected in this project." - echo "The implement loop will proceed without automated validation gates." - echo "Human review is required before merging any changes." - fi + script: detect-project + runtime: bun # ═══════════════════════════════════════════════════════════════ # PHASE 2: PLAN — Create the structured implementation plan @@ -821,9 +681,10 @@ nodes: **HARD RULES**: 1. Count your commits in this iteration via `git log HEAD ^` (the - hash you recorded in Phase 0.3). If count > 1, you did too much — revert the - extras with `git reset --mixed ` and let the next iteration pick - up the rest. + hash you recorded in Phase 0.3). If count > 1, iteration discipline was + violated. **Do NOT rewrite history automatically.** Report the violation in + the Phase 4.5 status block, state how many commits were created, and end the + iteration immediately so a human or later cleanup pass can reconcile it. 2. Verification tasks (like "VERIFY PRE-FLIGHT") count as numbered tasks and need their own iteration, even if they produce no code change. 3. If Task N is a verification-only task, your "commit" for that iteration is @@ -935,9 +796,13 @@ nodes: go in the "Pre-existing repo findings" section of Step 6's report instead. To distinguish changed-file issues from pre-existing ones: - - Run validators scoped to the changed files where possible (e.g., - `$LINT_CMD ` if the tool supports per-file args, or run the - full validator and filter its output to only lines referencing changed files) + - Use a changed-file scoped validator invocation ONLY when the command syntax + clearly supports it. + - Otherwise run the full validator and filter its output to findings that point + at files from `git diff --name-only $BASE_BRANCH..HEAD`. + - Never invent a scoped form of a repo command by appending file paths to + commands like `npm run lint`, `cargo test`, `go test ./...`, or similar + wrappers unless the command itself clearly accepts those extra args. - For each issue in a changed file: fix it, stage only that file, add it to a single scoped commit @@ -1048,12 +913,17 @@ nodes: Do not run validators. Do not make commits. Proceed directly to Step 5 for the final report and signal emission. - **If this iteration made fixes in response to user feedback**: run validators - SCOPED to the files you fixed, not the repo-wide suite. Example: if you fixed - `foo.py`, run `$LINT_CMD foo.py` and `$TEST_CMD tests/test_foo.py` if those - tools accept per-file arguments. Do NOT run `ruff check .` or equivalent - repo-wide commands — the goal is "did my fixes work?", not "is the whole repo - healthy?". + **If this iteration made fixes in response to user feedback**: validate ONLY + the files you changed in this iteration, not the repo-wide suite. + - Use a file-scoped validator invocation only when the command syntax clearly + supports it. + - Otherwise run the full validator and filter the findings to the files you + just changed. + - Never invent a file-scoped form of `npm run lint`, `cargo test`, + `go test ./...`, or any similar wrapper by simply appending paths unless the + command itself clearly accepts those extra args. + Do NOT run `ruff check .` or equivalent repo-wide commands as a release gate + here — the goal is "did my fixes work?", not "is the whole repo healthy?". If scoped validation fails, attempt one more fix pass on the same file(s). If still failing, report to the user in Step 5 and do NOT emit VALIDATED. @@ -1062,8 +932,14 @@ nodes: ## Step 4: Commit Fixes + If this iteration is an approval-only iteration, SKIP this step entirely. + Commit only when you actually made code changes in response to feedback. + + Stage explicitly by file path and confirm the staged set before committing: + ```bash - git add -A + git add # only the files changed in THIS feedback pass + git diff --cached --stat git commit -m "$(cat <<'EOF' fix: address review feedback diff --git a/eslint.config.mjs b/eslint.config.mjs index 69bf635bd5..ec06379d32 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -40,7 +40,7 @@ export default tseslint.config( // Project-specific settings { - files: ['packages/*/src/**/*.{ts,tsx}'], + files: ['packages/*/src/**/*.{ts,tsx}', '.archon/scripts/**/*.{ts,tsx}'], languageOptions: { parserOptions: { projectService: true, From bf08fed8f0ba8a99b5b0fdcd02f46d54e9bb314e Mon Sep 17 00:00:00 2001 From: matzls Date: Sun, 12 Apr 2026 16:46:50 +0200 Subject: [PATCH 09/21] docs: add assistant architecture reference Document how assistant selection works across host skills, conversations, workflows, and nodes. Capture fork-specific Codex additions, upstream differences, and the current Codex limitations for workflow nodes. Co-authored-by: Codex --- .../docs/reference/assistant-architecture.md | 447 ++++++++++++++++++ .../src/content/docs/reference/index.md | 1 + 2 files changed, 448 insertions(+) create mode 100644 packages/docs-web/src/content/docs/reference/assistant-architecture.md diff --git a/packages/docs-web/src/content/docs/reference/assistant-architecture.md b/packages/docs-web/src/content/docs/reference/assistant-architecture.md new file mode 100644 index 0000000000..7d3096bc6a --- /dev/null +++ b/packages/docs-web/src/content/docs/reference/assistant-architecture.md @@ -0,0 +1,447 @@ +--- +title: Assistant Architecture +description: How Archon selects Claude or Codex across host skills, conversations, workflows, and nodes, including fork-specific differences and Codex limitations. +category: reference +audience: [developer] +status: current +sidebar: + order: 2 +--- + +This document explains the full assistant-selection stack in Archon. + +It exists to answer questions like: + +- What does "Codex-driven repo" actually mean? +- How is that different from workflow `provider: codex`? +- What is the difference between an Archon skill and workflow-node `skills:`? +- Which parts are standard upstream Archon, and which parts are specific to this fork? +- Which nodes can run on Codex today, and what breaks or degrades? + +## Executive Summary + +Archon has multiple independent selection layers: + +1. **Host harness layer**: Claude Code or Codex is the outer tool you are using to invoke Archon. +2. **Conversation/orchestrator layer**: Archon stores an assistant type on codebases and conversations. That decides whether the top-level Archon conversation runs through the Claude client or the Codex client. +3. **Workflow default provider layer**: When a workflow runs, Archon resolves a default provider for the workflow from workflow YAML or merged config. +4. **Per-node provider/model layer**: Individual AI nodes can override provider and model again. + +Those layers are related, but they are not the same setting. + +## Mental Model + +```mermaid +flowchart TD + U["User request"] --> H["Host harness\nClaude Code or Codex"] + H --> S["Archon host skill\noptional routing layer"] + S --> O["Archon orchestrator conversation"] + O -->|"direct answer"| C["Conversation assistant client\nClaudeClient or CodexClient"] + O -->|"invoke workflow"| W["Workflow executor"] + W --> WD["Workflow default provider"] + WD --> N["Per-node provider/model override"] + N --> R["Actual runtime for the node"] + + NS["workflow node skills:"] -. Claude-only node feature .-> N +``` + +The critical point is that the workflow executor can choose a provider that is different from the top-level conversation assistant. + +## Layer 1: Host Harness + +This is the outer coding agent you are currently running: + +- Claude Code +- Codex + +At this layer, Archon can be exposed through a **host skill** that teaches the outer agent how to call Archon workflows and how to route requests. + +In this repo today: + +- Claude-oriented host skill: `.claude/skills/archon/` +- Codex-oriented host skill: `.agents/skills/archon/` + +This layer is about **how the outer assistant learns to use Archon**. It does **not** decide how workflow nodes run once Archon is executing them. + +## Layer 2: Conversation And Orchestrator Assistant + +Archon stores `ai_assistant_type` on codebases and conversations in the database. That value determines which top-level assistant client powers the Archon conversation. + +What it affects: + +- direct chat answers from Archon +- top-level orchestration and routing +- which default "assist" workflow the orchestrator suggests when routing is unclear + +What it does not automatically affect: + +- the provider used by every workflow node + +### How conversation assistant type is chosen + +At conversation creation time, Archon uses this order: + +1. existing conversation value if conversation already exists +2. parent conversation assistant type when inheriting context +3. codebase `ai_assistant_type` from the database when a codebase is attached +4. `DEFAULT_AI_ASSISTANT` env var +5. built-in default `claude` + +Important implementation detail: + +- the codebase `ai_assistant_type` is currently set when the repo is registered +- registration auto-detects `codex` if a `.codex/` folder exists, otherwise `claude` if a `.claude/` folder exists +- repo `.archon/config.yaml` does **not** currently write back into `remote_agent_codebases.ai_assistant_type` + +So "this repo is Codex-driven in the database" means: + +- the **top-level Archon conversation** for that codebase will use Codex by default +- it does **not** guarantee that every workflow node runs on Codex + +## Layer 3: Workflow Default Provider + +When a workflow starts, Archon resolves a default provider for the workflow separately from the conversation assistant. + +Resolution order: + +1. workflow `provider` +2. infer provider from workflow `model` +3. merged config `assistant` +4. built-in default `claude` + +The merged config order is: + +1. built-in defaults +2. `~/.archon/config.yaml` +3. repo `.archon/config.yaml` +4. environment variables + +That means a workflow can run on: + +- Claude even when the top-level conversation is Codex +- Codex even when the top-level conversation is Claude + +This is the main reason the system can feel confusing if you think there is only one "assistant" switch. + +## Layer 4: Per-Node Provider And Model + +For AI nodes, Archon resolves provider and model again at node execution time. + +Current resolution order for a command or prompt node: + +1. node `provider` +2. infer provider from node `model` +3. workflow default provider + +Model resolution: + +1. node `model` +2. workflow model if provider matches workflow provider +3. config default model for that provider + +This means a single workflow can mix: + +- mostly Claude nodes with a few Codex nodes +- mostly Codex nodes with a few Claude nodes + +provided those nodes do not rely on features unsupported by the chosen provider. + +## Two Different Meanings Of "Skills" + +This is a major source of confusion. + +### Host skill + +Examples: + +- `.claude/skills/archon/SKILL.md` +- `.agents/skills/archon/SKILL.md` + +Purpose: + +- teach the outer assistant how to use Archon +- route a request into the correct Archon workflow +- explain conventions like branch naming and workflow selection + +This is **outside** the workflow engine. + +### Workflow-node `skills:` + +Example in workflow YAML: + +```yaml +nodes: + - id: review + prompt: "Review the implementation" + skills: + - code-review +``` + +Purpose: + +- preload domain-specific knowledge into a workflow node +- currently implemented using Claude Agent SDK agent definitions + +This is a **workflow node feature**, not a host-routing feature. + +Current behavior: + +- supported for Claude nodes +- ignored with warnings for Codex nodes + +## Node Types And Assistant Relevance + +Not every node type depends on Claude or Codex. + +| Node type | Uses AI provider? | Notes | +| --- | --- | --- | +| `command` | Yes | Named markdown prompt file loaded, then executed by Claude or Codex | +| `prompt` | Yes | Inline AI prompt executed by Claude or Codex | +| `loop` | Yes | Iterative AI execution; special runtime path | +| `bash` | No | Shell only | +| `script` | No | `bun` or `uv` runtime only | +| `approval` | No AI execution of its own | Human gate | +| `cancel` | No | Terminates workflow | + +So the real Codex/Claude compatibility question applies primarily to `command`, `prompt`, and `loop` nodes. + +## Codex Compatibility By Node Type + +### Command and prompt nodes + +These are the best candidates for Codex. + +They can run on Codex if they rely only on: + +- prompt text +- repository access +- shell/file/git/network capabilities provided by Codex +- `output_format` + +They should **not** rely on Claude-only node features listed later in this document. + +### Loop nodes + +Current code supports loop-node provider/model selection, but the docs still say otherwise. + +Actual current behavior: + +- loop nodes do resolve `provider` and `model` +- loop nodes can therefore run on Codex +- loop nodes still do **not** support most of the richer Claude-only node features + +This is a load-bearing doc/code mismatch. Treat the implementation as authoritative until docs are corrected. + +### Bash/script/approval/cancel nodes + +These are provider-agnostic. They can exist in a Codex-oriented workflow because they do not call either AI assistant directly. + +## Exact Codex Limitations For Workflow Nodes + +### Supported on Codex nodes + +Supported today: + +- `provider: codex` +- `model: ` +- `output_format` +- config-level Codex defaults: + - `assistants.codex.model` + - `assistants.codex.modelReasoningEffort` + - `assistants.codex.webSearchMode` + - `assistants.codex.additionalDirectories` + +### Ignored with warnings on Codex command/prompt nodes + +These features are currently Claude-only and are ignored on Codex nodes: + +- `skills` +- `hooks` +- `mcp` +- `allowed_tools` +- `denied_tools` +- Claude advanced options: + - `effort` + - `thinking` + - `maxBudgetUsd` + - `systemPrompt` + - `fallbackModel` + - `betas` + - `sandbox` + +### Loop-node limitations + +Loop nodes have a separate limitation set. + +Current implementation: + +- `provider` and `model` do work for loop nodes +- these still do **not** apply to loop iterations: + - `hooks` + - `mcp` + - `skills` + - `allowed_tools` + - `denied_tools` + - `output_format` + +### Parsed but not fully applied: workflow-level Codex options + +There is one important implementation gap: + +- workflow YAML supports `modelReasoningEffort`, `webSearchMode`, and `additionalDirectories` +- the loader parses them successfully +- the runtime executor currently pulls Codex options from merged config, not from the workflow object + +Practical effect: + +- `model:` on a workflow is effective +- workflow-level `modelReasoningEffort`, `webSearchMode`, and `additionalDirectories` are currently best treated as **documented intent, not guaranteed runtime behavior** +- if you need deterministic current behavior, set those in `assistants.codex` config + +## When Codex Can Realistically Replace Claude For A Node + +A node is a good candidate for Codex when all of these are true: + +1. it is a `command`, `prompt`, or simple `loop` node +2. it does not depend on `skills`, `hooks`, `mcp`, or tool restriction fields +3. it does not depend on Claude-only advanced options +4. the prompt is generic and does not assume Claude-specific behavior +5. the required tools are available through Codex's own runtime setup + +A node is **not** a good candidate for Codex when it depends on: + +- Claude skill injection +- Claude hook behavior +- per-node MCP wiring +- Claude-specific system-prompt or thinking controls + +## Upstream vs This Fork + +### Standard upstream implementation + +Upstream Archon already supports the broad architecture: + +- Claude and Codex are both first-class assistant providers +- workflow YAML supports `provider` and `model` +- workflow nodes can select provider and model +- config supports both Claude and Codex defaults +- conversation/orchestrator assistant selection exists + +Upstream public docs also already describe: + +- Codex as an AI assistant +- per-node `provider` / `model` +- workflow-level Codex settings + +### Fork-specific additions in this repo + +This fork adds a more explicit Codex-facing routing surface. + +Verified additions in this checkout: + +1. **Codex-specific assist workflow** + - `.archon/workflows/defaults/archon-assist-codex.yaml` + - `.archon/commands/defaults/archon-assist-codex.md` + +2. **Codex-specific host skill** + - `.agents/skills/archon/SKILL.md` + +3. **Docs updated to mention Codex-specific assist lane** + - `archon-assist-codex` appears in the local docs and workflow catalog + +### Fork-specific caveats + +There are also fork-local inconsistencies worth knowing: + +1. **The setup wizard still installs the Claude skill surface** + - it copies `.claude/skills/archon/` + - it does not install `.agents/skills/archon/` + +2. **The Codex host skill references `archon-piv-loop-codex`** + - no matching workflow file exists in this checkout today + +3. **Repo-local workflow default is not pinned here** + - this repo's `.archon/config.yaml` does not set `assistant:` + - so workflow default provider for this repo depends on global config or environment unless a workflow sets its own provider + +4. **Docs still overstate some Codex workflow capabilities** + - workflow-level Codex YAML options are parsed but not fully consumed by the executor + +5. **Docs still understate some loop-node capabilities** + - docs say loop-node `provider` / `model` are ignored + - current code resolves them + +## Practical Precedence Tables + +### Conversation/orchestrator assistant + +| Order | Source | +| --- | --- | +| 1 | existing conversation | +| 2 | parent conversation | +| 3 | codebase `ai_assistant_type` from DB | +| 4 | `DEFAULT_AI_ASSISTANT` | +| 5 | built-in default `claude` | + +### Workflow default provider + +| Order | Source | +| --- | --- | +| 1 | workflow `provider` | +| 2 | inferred from workflow `model` | +| 3 | merged config `assistant` | +| 4 | built-in default `claude` | + +### Per-node provider + +| Order | Source | +| --- | --- | +| 1 | node `provider` | +| 2 | inferred from node `model` | +| 3 | workflow default provider | + +## Recommended Migration Strategy For Claude-To-Codex Workflow Changes + +When converting a workflow or node from Claude to Codex: + +1. start with `command` or `prompt` nodes +2. remove or replace `skills`, `hooks`, `mcp`, and tool restriction fields +3. keep the prompt generic +4. test one node at a time before converting the entire workflow +5. prefer a `-codex` workflow variant when behavior meaningfully diverges +6. keep Claude as the provider for nodes that truly depend on Claude-only features + +This usually leads to a mixed-provider workflow rather than an all-or-nothing migration. + +## Decision Checklist + +Use this checklist before changing a node to Codex: + +- Is this node AI-driven at all? +- Does it use `skills`, `hooks`, `mcp`, `allowed_tools`, or `denied_tools`? +- Does it rely on Claude-only advanced fields? +- Is the desired behavior actually controlled by the conversation assistant, the workflow default, or the node override? +- If the repo is "Codex-driven" only in the database, do we also want the workflow YAML or repo config to reflect that? + +If you answer those questions first, the assistant-selection model becomes much less ambiguous. + +## Verified Source Anchors + +The implementation details in this document were verified against: + +- `packages/core/src/db/conversations.ts` +- `packages/core/src/orchestrator/orchestrator-agent.ts` +- `packages/core/src/config/config-loader.ts` +- `packages/core/src/handlers/clone.ts` +- `packages/workflows/src/executor.ts` +- `packages/workflows/src/dag-executor.ts` +- `.agents/skills/archon/SKILL.md` +- `.archon/workflows/defaults/archon-assist-codex.yaml` +- `.archon/commands/defaults/archon-assist-codex.md` + +And compared against upstream/public docs: + +- `https://archon.diy/getting-started/ai-assistants/` +- `https://archon.diy/guides/authoring-workflows/` +- `https://archon.diy/guides/skills/` +- `https://archon.diy/book/essential-workflows/` diff --git a/packages/docs-web/src/content/docs/reference/index.md b/packages/docs-web/src/content/docs/reference/index.md index f20bf30943..5621c89454 100644 --- a/packages/docs-web/src/content/docs/reference/index.md +++ b/packages/docs-web/src/content/docs/reference/index.md @@ -12,6 +12,7 @@ Technical reference documentation for Archon internals. ## Reference Docs - **[Architecture](/reference/architecture/)** -- System overview, interfaces, data flow, extension guides +- **[Assistant Architecture](/reference/assistant-architecture/)** -- Claude vs Codex selection across host skills, conversations, workflows, and nodes - **[Archon Directories](/reference/archon-directories/)** -- Directory structure, path resolution, configuration system - **[CLI Reference](/reference/cli/)** -- All CLI commands, flags, and usage examples - **[Commands Reference](/reference/commands/)** -- Slash commands available in all platform adapters From 183ecb6230c05eaf7e0b7c7cc689c6a3f7327dd6 Mon Sep 17 00:00:00 2001 From: matzls Date: Sun, 12 Apr 2026 17:23:10 +0200 Subject: [PATCH 10/21] docs(workflow): add piv loop codex execution notes Document the phase-by-phase session model for archon-piv-loop-codex, including fresh-context boundaries, interactive loop resume behavior, and a flow diagram for future review. Co-authored-by: Codex --- .../workflows/archon-piv-loop-codex.README.md | 188 ++++++++++++++++++ .archon/workflows/archon-piv-loop-codex.yaml | 2 + 2 files changed, 190 insertions(+) create mode 100644 .archon/workflows/archon-piv-loop-codex.README.md diff --git a/.archon/workflows/archon-piv-loop-codex.README.md b/.archon/workflows/archon-piv-loop-codex.README.md new file mode 100644 index 0000000000..f4d4b9ff0d --- /dev/null +++ b/.archon/workflows/archon-piv-loop-codex.README.md @@ -0,0 +1,188 @@ +--- +title: archon-piv-loop-codex execution notes +workflow: archon-piv-loop-codex +doc_type: workflow-reference +updated: 2026-04-12 +--- + +# archon-piv-loop-codex + +This is a companion note for [`archon-piv-loop-codex.yaml`](./archon-piv-loop-codex.yaml). + +It answers one specific operational question: when the workflow keeps model context, +when it starts a fresh Codex thread, and where human approval pauses/resumes happen. + +## Short answer + +The workflow is **not** one single context window from start to finish. + +It is one workflow run, but it executes as a series of nodes: + +- some nodes are **non-AI** (`script` / `bash`) and invoke no model +- some nodes are **fresh Codex sessions** +- some nodes are **interactive loops** that keep their own loop session across + iterations +- the `implement` loop is intentionally **fresh every iteration** + +## Source of truth + +The behavior below is grounded in: + +- [`archon-piv-loop-codex.yaml`](./archon-piv-loop-codex.yaml) +- [`packages/workflows/src/dag-executor.ts`](../../packages/workflows/src/dag-executor.ts) +- [`packages/workflows/src/schemas/loop.ts`](../../packages/workflows/src/schemas/loop.ts) +- [`packages/core/src/clients/codex.ts`](../../packages/core/src/clients/codex.ts) + +## Context model + +Two different things matter: + +1. **Workflow run** + One Archon workflow record spanning the whole PIV process. +2. **Codex session/thread** + The actual model conversation context used for a node or loop iteration. + +The workflow run is continuous. The Codex thread is not. + +## Phase-by-phase session behavior + +| Phase | Node | Model invocation | Context behavior | +|------|------|------------------|------------------| +| Explore | `explore` | Codex loop | Fresh on iteration 1, then reuses the loop session across approval/feedback rounds | +| Detect | `detect-project` | None | `script` node, no model | +| Plan | `create-plan` | Codex prompt node | Fresh session because `context: fresh` | +| Plan refine | `refine-plan` | Codex loop | Fresh on iteration 1, then reuses the loop session across review rounds | +| Implement setup | `implement-setup` | None | `bash` node, no model | +| Implement | `implement` | Codex loop | Fresh session every iteration because `fresh_context: true` | +| Code review | `code-review` | Codex prompt node | Fresh session because `context: fresh` | +| Fix feedback | `fix-feedback` | Codex loop | Fresh on iteration 1, then reuses the loop session across feedback rounds | +| Finalize | `finalize` | Codex prompt node | Fresh session because `context: fresh` | + +## Where context is reset + +These are the explicit reset points in the YAML: + +- `create-plan` sets `context: fresh` +- `implement` sets `loop.fresh_context: true` +- `code-review` sets `context: fresh` +- `finalize` sets `context: fresh` + +These are the implicit loop reset rules enforced by the executor: + +- every loop starts with a fresh session on **iteration 1** +- later loop iterations reuse the loop's saved session unless + `loop.fresh_context: true` + +That means: + +- `explore`, `refine-plan`, and `fix-feedback` keep loop-local context after the + first turn +- `implement` does not; each task iteration is a fresh Codex thread + +## Flow + +```mermaid +flowchart TD + A["explore loop + fresh on first turn + then reuse loop session"] --> B["detect-project + script node + no model"] + B --> C["create-plan + fresh Codex session"] + C --> D["refine-plan loop + fresh on first turn + then reuse loop session"] + D --> E["implement-setup + bash node + no model"] + E --> F["implement loop + fresh Codex session every iteration"] + F --> G["code-review + fresh Codex session"] + G --> H["fix-feedback loop + fresh on first turn + then reuse loop session"] + H --> I["finalize + fresh Codex session"] + + A -. "/workflow approve " .-> A + D -. "/workflow approve " .-> D + H -. "/workflow approve " .-> H +``` + +## What the executor actually does + +### Prompt and command nodes + +For normal AI nodes, the DAG executor decides whether to reuse a prior session: + +- if the node is in a parallel layer, it is fresh +- if the node has `context: fresh`, it is fresh +- otherwise it can inherit the last sequential session + +In this workflow, the prompt nodes that matter are already marked fresh where a +reset is desired, so they do not inherit prior prompt-node context. + +### Loop nodes + +Loop nodes are handled on a separate execution path. They do **not** use the +same sequential-session logic as normal prompt nodes. + +Instead: + +- iteration 1 is always fresh +- later iterations reuse the loop's `currentSessionId` +- unless `fresh_context: true`, in which case every iteration is fresh + +This is why `implement` behaves differently from `explore`, `refine-plan`, and +`fix-feedback`. + +### Interactive pauses + +When an interactive loop does not emit its completion signal: + +1. Archon pauses the workflow run +2. stores loop metadata including the current loop `sessionId` +3. waits for `/workflow approve ` +4. resumes the loop on the next iteration with that feedback in + `$LOOP_USER_INPUT` + +This is a workflow pause/resume, not a whole-workflow context reset. + +## Important nuance on resume + +There are two resume cases: + +1. **Resume inside an interactive loop** + The loop can continue with the saved loop `sessionId`, unless that loop is + configured to force fresh iterations. +2. **Resume a prior DAG run more generally** + Archon can skip already-completed nodes, but the executor explicitly warns + that prior-node AI session context is not restored automatically. + +So "resume workflow" does not mean "restore one giant conversation across all +phases." + +## Why implement is intentionally fresh + +The implement loop prompt explicitly says the agent is in a **fresh session** +with no memory of previous iterations and must re-read the plan, progress, git +state, and files from disk. + +That makes task execution more deterministic: + +- one task per iteration +- less context drift across tasks +- each task grounded in repo state and artifacts, not remembered chat state + +## Practical takeaway + +If you are reasoning about this workflow operationally: + +- treat **disk artifacts and repo state** as the durable memory +- treat **loop-local session reuse** as available only inside `explore`, + `refine-plan`, and `fix-feedback` +- treat **implement** as stateless between task iterations except for what it + re-reads from disk +- treat `create-plan`, `code-review`, and `finalize` as clean-session nodes diff --git a/.archon/workflows/archon-piv-loop-codex.yaml b/.archon/workflows/archon-piv-loop-codex.yaml index 844482b217..a6c417f083 100644 --- a/.archon/workflows/archon-piv-loop-codex.yaml +++ b/.archon/workflows/archon-piv-loop-codex.yaml @@ -1,3 +1,5 @@ +# Execution notes: see `.archon/workflows/archon-piv-loop-codex.README.md` +# for workflow flow, session reuse, and context reset behavior. name: archon-piv-loop-codex description: | Use when: User wants guided Plan-Implement-Validate development with human-in-the-loop, From f04fde8382e19853bf28199f280527edd28b6c77 Mon Sep 17 00:00:00 2001 From: matzls Date: Sun, 12 Apr 2026 19:20:10 +0200 Subject: [PATCH 11/21] feat(workflows): bundle codex piv loop Promote archon-piv-loop-codex into the bundled default workflow set and update the overview docs to advertise it there. Also fix the execution-notes path references after the workflow moved under .archon/workflows/defaults. Co-authored-by: Codex --- .../{ => defaults}/archon-piv-loop-codex.README.md | 6 +++--- .archon/workflows/{ => defaults}/archon-piv-loop-codex.yaml | 2 +- .../docs-web/src/content/docs/getting-started/overview.md | 1 + packages/workflows/src/defaults/bundled-defaults.test.ts | 3 ++- packages/workflows/src/defaults/bundled-defaults.ts | 4 +++- 5 files changed, 10 insertions(+), 6 deletions(-) rename .archon/workflows/{ => defaults}/archon-piv-loop-codex.README.md (95%) rename .archon/workflows/{ => defaults}/archon-piv-loop-codex.yaml (99%) diff --git a/.archon/workflows/archon-piv-loop-codex.README.md b/.archon/workflows/defaults/archon-piv-loop-codex.README.md similarity index 95% rename from .archon/workflows/archon-piv-loop-codex.README.md rename to .archon/workflows/defaults/archon-piv-loop-codex.README.md index f4d4b9ff0d..21e3969dbc 100644 --- a/.archon/workflows/archon-piv-loop-codex.README.md +++ b/.archon/workflows/defaults/archon-piv-loop-codex.README.md @@ -29,9 +29,9 @@ It is one workflow run, but it executes as a series of nodes: The behavior below is grounded in: - [`archon-piv-loop-codex.yaml`](./archon-piv-loop-codex.yaml) -- [`packages/workflows/src/dag-executor.ts`](../../packages/workflows/src/dag-executor.ts) -- [`packages/workflows/src/schemas/loop.ts`](../../packages/workflows/src/schemas/loop.ts) -- [`packages/core/src/clients/codex.ts`](../../packages/core/src/clients/codex.ts) +- [`packages/workflows/src/dag-executor.ts`](../../../packages/workflows/src/dag-executor.ts) +- [`packages/workflows/src/schemas/loop.ts`](../../../packages/workflows/src/schemas/loop.ts) +- [`packages/core/src/clients/codex.ts`](../../../packages/core/src/clients/codex.ts) ## Context model diff --git a/.archon/workflows/archon-piv-loop-codex.yaml b/.archon/workflows/defaults/archon-piv-loop-codex.yaml similarity index 99% rename from .archon/workflows/archon-piv-loop-codex.yaml rename to .archon/workflows/defaults/archon-piv-loop-codex.yaml index a6c417f083..9b8a6952b0 100644 --- a/.archon/workflows/archon-piv-loop-codex.yaml +++ b/.archon/workflows/defaults/archon-piv-loop-codex.yaml @@ -1,4 +1,4 @@ -# Execution notes: see `.archon/workflows/archon-piv-loop-codex.README.md` +# Execution notes: see `.archon/workflows/defaults/archon-piv-loop-codex.README.md` # for workflow flow, session reuse, and context reset behavior. name: archon-piv-loop-codex description: | diff --git a/packages/docs-web/src/content/docs/getting-started/overview.md b/packages/docs-web/src/content/docs/getting-started/overview.md index 16e0034b4b..65982b649d 100644 --- a/packages/docs-web/src/content/docs/getting-started/overview.md +++ b/packages/docs-web/src/content/docs/getting-started/overview.md @@ -360,6 +360,7 @@ archon complete --force # skip uncommitted-changes check | `archon-remotion-generate` | Generate or modify Remotion video compositions with AI | | `archon-interactive-prd` | Create a PRD through guided conversation | | `archon-piv-loop` | Guided Plan-Implement-Validate development with human-in-the-loop | +| `archon-piv-loop-codex` | Guided Plan-Implement-Validate development using Codex/GPT models | | `archon-adversarial-dev` | Build a complete application from scratch using adversarial development | These bundled workflows work for most projects. To customize, copy one from `.archon/workflows/defaults/` into `.archon/workflows/` and modify it — same-named files override the defaults. diff --git a/packages/workflows/src/defaults/bundled-defaults.test.ts b/packages/workflows/src/defaults/bundled-defaults.test.ts index b21513360f..84826a3352 100644 --- a/packages/workflows/src/defaults/bundled-defaults.test.ts +++ b/packages/workflows/src/defaults/bundled-defaults.test.ts @@ -91,6 +91,7 @@ describe('bundled-defaults', () => { 'archon-remotion-generate', 'archon-interactive-prd', 'archon-piv-loop', + 'archon-piv-loop-codex', 'archon-adversarial-dev', 'archon-workflow-builder', ]; @@ -99,7 +100,7 @@ describe('bundled-defaults', () => { expect(BUNDLED_WORKFLOWS).toHaveProperty(wf); } - expect(Object.keys(BUNDLED_WORKFLOWS)).toHaveLength(14); + expect(Object.keys(BUNDLED_WORKFLOWS)).toHaveLength(15); }); it('should have non-empty content for all workflows', () => { diff --git a/packages/workflows/src/defaults/bundled-defaults.ts b/packages/workflows/src/defaults/bundled-defaults.ts index a4e9c166b4..3e3fdec5df 100644 --- a/packages/workflows/src/defaults/bundled-defaults.ts +++ b/packages/workflows/src/defaults/bundled-defaults.ts @@ -38,7 +38,7 @@ import archonValidatePrE2eMainCmd from '../../../../.archon/commands/defaults/ar import archonValidatePrReportCmd from '../../../../.archon/commands/defaults/archon-validate-pr-report.md' with { type: 'text' }; // ============================================================================= -// Default Workflows (14 total) +// Default Workflows (15 total) // ============================================================================= import archonAssistWf from '../../../../.archon/workflows/defaults/archon-assist.yaml' with { type: 'text' }; @@ -53,6 +53,7 @@ import archonValidatePrWf from '../../../../.archon/workflows/defaults/archon-va import archonRemotionGenerateWf from '../../../../.archon/workflows/defaults/archon-remotion-generate.yaml' with { type: 'text' }; import archonInteractivePrdWf from '../../../../.archon/workflows/defaults/archon-interactive-prd.yaml' with { type: 'text' }; import archonPivLoopWf from '../../../../.archon/workflows/defaults/archon-piv-loop.yaml' with { type: 'text' }; +import archonPivLoopCodexWf from '../../../../.archon/workflows/defaults/archon-piv-loop-codex.yaml' with { type: 'text' }; import archonAdversarialDevWf from '../../../../.archon/workflows/defaults/archon-adversarial-dev.yaml' with { type: 'text' }; import archonWorkflowBuilderWf from '../../../../.archon/workflows/defaults/archon-workflow-builder.yaml' with { type: 'text' }; @@ -104,6 +105,7 @@ export const BUNDLED_WORKFLOWS: Record = { 'archon-remotion-generate': archonRemotionGenerateWf, 'archon-interactive-prd': archonInteractivePrdWf, 'archon-piv-loop': archonPivLoopWf, + 'archon-piv-loop-codex': archonPivLoopCodexWf, 'archon-adversarial-dev': archonAdversarialDevWf, 'archon-workflow-builder': archonWorkflowBuilderWf, }; From 3c5ef3295a11028ec3218aebf6e6ef370d51154b Mon Sep 17 00:00:00 2001 From: matzls Date: Sun, 12 Apr 2026 19:20:26 +0200 Subject: [PATCH 12/21] fix(workflows): preserve cli-owned runs on restart Exclude CLI-backed workflow runs from the server startup orphan-failure sweep and document the distinction in the workflow authoring guide. This keeps server restarts from marking active CLI executions failed while they continue in a separate process against the same database. Co-authored-by: Codex --- packages/core/src/db/workflows.test.ts | 3 ++- packages/core/src/db/workflows.ts | 7 ++++++- .../src/content/docs/guides/authoring-workflows.md | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/packages/core/src/db/workflows.test.ts b/packages/core/src/db/workflows.test.ts index bbbfa6ccf4..b6feb15845 100644 --- a/packages/core/src/db/workflows.test.ts +++ b/packages/core/src/db/workflows.test.ts @@ -618,7 +618,7 @@ describe('workflows database', () => { }); describe('failOrphanedRuns', () => { - test('transitions all running runs to failed with completed_at and returns count', async () => { + test('transitions non-CLI running runs to failed with completed_at and returns count', async () => { mockQuery.mockResolvedValueOnce(createQueryResult([], 2)); const result = await failOrphanedRuns(); @@ -628,6 +628,7 @@ describe('workflows database', () => { expect(query).toContain("status = 'failed'"); expect(query).toContain('completed_at = NOW()'); expect(query).toContain("status = 'running'"); + expect(query).toContain("platform_type != 'cli'"); expect(params).toContain(JSON.stringify({ failure_reason: 'server_restart' })); }); diff --git a/packages/core/src/db/workflows.ts b/packages/core/src/db/workflows.ts index 0abfb0474d..190cd245ca 100644 --- a/packages/core/src/db/workflows.ts +++ b/packages/core/src/db/workflows.ts @@ -828,6 +828,8 @@ export async function updateWorkflowActivity(id: string): Promise { /** * Transition all 'running' workflow runs to 'failed'. * Called on server startup to mark runs orphaned by process termination. + * Excludes CLI-owned runs because they may still be executing in a separate + * process while sharing the same database with the server. * The next invocation of the same workflow at the same path will auto-resume * from completed nodes via findResumableRun. */ @@ -839,7 +841,10 @@ export async function failOrphanedRuns(): Promise<{ count: number }> { SET status = 'failed', completed_at = ${dialect.now()}, metadata = ${dialect.jsonMerge('metadata', 1)} - WHERE status = 'running'`, + WHERE status = 'running' + AND conversation_id IN ( + SELECT id FROM remote_agent_conversations WHERE platform_type != 'cli' + )`, [JSON.stringify({ failure_reason: 'server_restart' })] ); const count = result.rowCount ?? 0; diff --git a/packages/docs-web/src/content/docs/guides/authoring-workflows.md b/packages/docs-web/src/content/docs/guides/authoring-workflows.md index 6481aefac7..3ab2efc064 100644 --- a/packages/docs-web/src/content/docs/guides/authoring-workflows.md +++ b/packages/docs-web/src/content/docs/guides/authoring-workflows.md @@ -483,7 +483,7 @@ When a `nodes:` (DAG) workflow fails (including due to a server restart), the ne 3. Completed nodes are skipped; only failed and not-yet-run nodes are executed. 4. You receive a platform message like: `Resuming workflow — skipping 3 already-completed node(s).` -**Server restart**: If a server restart leaves runs in `running` status, they are automatically marked as `failed` on the next startup (with `metadata.failure_reason = 'server_restart'`). The next invocation of the same workflow at the same path auto-resumes from completed nodes. +**Server restart**: If a server restart leaves server-owned runs in `running` status, they are automatically marked as `failed` on the next startup (with `metadata.failure_reason = 'server_restart'`). CLI-owned runs are excluded because they can continue executing outside the server process while sharing the same database. The next invocation of the same workflow at the same path auto-resumes from completed nodes. **Known limitation**: AI session context from prior nodes is not restored. If a downstream node relies on in-context knowledge from a prior run's session (rather than artifacts), it may need to re-read those artifacts explicitly. From 3488654c383b946ba34dc9b466ea75bee6af576b Mon Sep 17 00:00:00 2001 From: matzls Date: Sun, 12 Apr 2026 19:20:43 +0200 Subject: [PATCH 13/21] docs(archon): add workflow log debugging guide Add a reusable reference for repeated Archon log-debugging sessions and surface it from the Codex assist lane and the top-level Archon skill routing table. The new guide explains the three log layers, run discovery, JSONL filtering, event interpretation, and when to use UI or raw logs. Co-authored-by: Codex --- .../commands/defaults/archon-assist-codex.md | 18 ++ .claude/skills/archon/SKILL.md | 1 + .../skills/archon/references/log-debugging.md | 293 ++++++++++++++++++ 3 files changed, 312 insertions(+) create mode 100644 .claude/skills/archon/references/log-debugging.md diff --git a/.archon/commands/defaults/archon-assist-codex.md b/.archon/commands/defaults/archon-assist-codex.md index 82bf363d3c..db101917a4 100644 --- a/.archon/commands/defaults/archon-assist-codex.md +++ b/.archon/commands/defaults/archon-assist-codex.md @@ -30,6 +30,24 @@ workflow. "Note: Using archon-assist-codex. Consider creating or using a more specific Codex workflow for this use case." +## Workflow Log Debugging + +When the request is mainly about a failed, paused, or confusing workflow run: + +1. **Check the active surface first** + - Terminal or server output for Archon runtime logs + - Web UI run details or `archon workflow status --verbose` for current run + state +2. **Open the raw per-run JSONL when you need the full trace** + - Default path: + `~/.archon/workspaces///logs/.jsonl` + - If `ARCHON_HOME` is set, use that base directory instead of `~/.archon` +3. **Increase verbosity when current output is too thin** + - `archon --verbose workflow run "..."` + - `LOG_LEVEL=debug ` for Archon process logs +4. **Use the detailed reference for repeated log analysis** + - Read `.claude/skills/archon/references/log-debugging.md` + ## Guardrails - Prefer small, reversible changes. diff --git a/.claude/skills/archon/SKILL.md b/.claude/skills/archon/SKILL.md index f36e7391b8..43080fa128 100644 --- a/.claude/skills/archon/SKILL.md +++ b/.claude/skills/archon/SKILL.md @@ -41,6 +41,7 @@ Determine the user's intent and dispatch to the appropriate guide: | **Create a command file** | Read `references/authoring-commands.md` | | **Variable substitution reference** | Read `references/variables.md` | | **CLI command reference** | Read `references/cli-commands.md` | +| **Debug workflow logs / inspect a run** | Read `references/log-debugging.md` — log locations, layers, filtering, interpretation | | **Run an interactive workflow** | Read `references/interactive-workflows.md` — transparent relay protocol | | **Run a workflow (default)** | Continue with "Running Workflows" below | diff --git a/.claude/skills/archon/references/log-debugging.md b/.claude/skills/archon/references/log-debugging.md new file mode 100644 index 0000000000..51d66feff4 --- /dev/null +++ b/.claude/skills/archon/references/log-debugging.md @@ -0,0 +1,293 @@ +# Archon Log Debugging Reference + +Use this guide when the main job is understanding what Archon just did during a +workflow run, why it failed, why it paused, or where the useful evidence lives. + +## Three Log Layers + +Archon exposes three different evidence surfaces. They overlap, but they are +not interchangeable. + +### 1. Runtime process logs + +Use these when you need to debug Archon itself: startup, config loading, +database errors, adapter issues, API route failures, or unexpected process +behavior. + +- Output goes to the current terminal or process log sink +- Verbosity is controlled by `LOG_LEVEL` +- `archon --verbose ...` sets the CLI logger to `debug` + +Examples: + +```bash +LOG_LEVEL=debug archon workflow list +LOG_LEVEL=debug archon workflow run archon-assist "help me debug this run" +LOG_LEVEL=debug bun run dev +``` + +### 2. Per-run workflow JSONL logs + +Use these when you need the raw workflow trace for one run: assistant messages, +tool calls, node boundaries, validation events, and workflow-level failures. + +Default location: + +```text +~/.archon/workspaces///logs/.jsonl +``` + +If `ARCHON_HOME` is set, replace `~/.archon` with that directory. + +### 3. Web UI and API run details + +Use these when you want a quick run summary, node progress, artifacts, and the +conversation view without opening the raw JSONL file. + +- Web UI run details show node state, logs, and artifacts +- `GET /api/workflows/runs/:runId` returns the run plus lean DB events +- `archon workflow status --verbose` gives a CLI summary of active runs + +Important: the UI/API event stream is intentionally lean. It does not replace +the raw JSONL file when you need the full assistant or tool trace. + +## What Each Layer Contains + +### Runtime process logs + +Best for: + +- startup and shutdown failures +- SQLite or PostgreSQL connection errors +- API route errors +- adapter or orchestration errors +- configuration problems + +### Workflow JSONL logs + +Best for: + +- a single run's assistant output +- tool inputs for that run +- node-by-node flow +- validation pass/fail details +- interactive workflow pause output + +The raw JSONL logger writes these event types: + +- `workflow_start` +- `workflow_complete` +- `workflow_error` +- `assistant` +- `tool` +- `validation` +- `node_start` +- `node_complete` +- `node_skipped` +- `node_error` + +### UI/API events + +Best for: + +- current node status +- elapsed time and progress +- artifacts +- recent workflow state in the app + +Expect the naming to differ slightly from the JSONL file. The UI/API layer is +built from `remote_agent_workflow_events` and persisted messages, so event names +such as `node_started` or `tool_called` may appear there instead of the raw +JSONL names. + +## Quick Triage Order + +Use this sequence unless you already know the failing layer: + +1. Get the run ID and current status. +2. Look at the UI run details or `archon workflow status --verbose`. +3. Open the per-run JSONL file for the full trace. +4. Turn on `LOG_LEVEL=debug` or `--verbose` only if the current evidence is too + thin. +5. Return to process logs if the failure looks like Archon runtime behavior + rather than workflow logic. + +## Finding the Run + +For active runs: + +```bash +archon workflow status +archon workflow status --verbose +archon workflow status --json +archon workflow status --json --verbose +``` + +If you already have the run ID, locate the file directly: + +```bash +find "${ARCHON_HOME:-$HOME/.archon}/workspaces" -name ".jsonl" 2>/dev/null +``` + +## Reading the JSONL File + +Set a shell variable first: + +```bash +LOG_FILE="${ARCHON_HOME:-$HOME/.archon}/workspaces///logs/.jsonl" +``` + +Show the last lines: + +```bash +tail -n 40 "$LOG_FILE" +``` + +Search for failures: + +```bash +rg '"type":"workflow_error"|"type":"node_error"' "$LOG_FILE" +``` + +Search for one node: + +```bash +rg '"step":"implement"' "$LOG_FILE" +``` + +Search for validations: + +```bash +rg '"type":"validation"' "$LOG_FILE" +``` + +## Filtering Patterns + +### With `rg` + +Assistant messages: + +```bash +rg '"type":"assistant"' "$LOG_FILE" +``` + +Tool calls: + +```bash +rg '"type":"tool"' "$LOG_FILE" +``` + +Skipped nodes: + +```bash +rg '"type":"node_skipped"' "$LOG_FILE" +``` + +### With `jq` if installed + +Latest assistant message: + +```bash +jq -r 'select(.type=="assistant") | .content' "$LOG_FILE" | tail -n 1 +``` + +Node errors with timestamps: + +```bash +jq -c 'select(.type=="node_error") | {ts, step, error}' "$LOG_FILE" +``` + +Validation results: + +```bash +jq -c 'select(.type=="validation") | {ts, step, check, result, error}' "$LOG_FILE" +``` + +## How To Interpret Common Events + +### `workflow_start` + +The run was created and the workflow began. This is the anchor for the rest of +the file. + +### `node_start` and `node_complete` + +The workflow crossed a node boundary. These tell you which step ran, in what +order, and where time was spent. + +### `node_skipped` + +This usually means a `when:` condition or trigger rule prevented the node from +running. It is not necessarily a failure. + +### `node_error` + +The node failed. Start here for step-local failures. + +### `validation` + +A named check ran and produced `pass`, `fail`, `warn`, or `unknown`. + +### `assistant` + +This is the workflow agent's textual output for the run. In interactive +workflows, this is the content you relay back to the user. + +### `tool` + +A raw tool invocation was recorded in the JSONL trace. Use this when you need +to see what the workflow attempted, not just the summarized UI status. + +## Interactive Workflow Note + +For interactive workflows, the important readback pattern is: + +1. get the run ID +2. open the JSONL file +3. extract the last `assistant` event +4. relay its `content` directly + +That is the canonical way to surface pause output from the raw log. + +## UI Versus Raw File + +Use the UI or API when: + +- you need quick node status +- you want artifacts and high-level progress +- you are navigating several runs quickly + +Use the raw JSONL file when: + +- you need the exact assistant text +- you need the raw tool trace +- UI summaries feel incomplete +- you are investigating a single run deeply + +## Common Failure Patterns + +`workflow appears active but progress is unclear`: +Open the JSONL file and check the most recent `assistant`, `tool`, and +`node_*` events. + +`UI shows state but not enough context`: +Use the raw JSONL for the detailed trace. + +`run failed but nothing obvious appears in JSONL`: +Check Archon runtime logs with `LOG_LEVEL=debug`; the problem may be outside the +workflow trace itself. + +`interactive workflow is paused and you need the exact wording`: +Extract the last `assistant` event from the JSONL file. + +## Minimal Operator Checklist + +When debugging a run for someone else, report: + +1. run ID +2. workflow name +3. current status +4. failing node or last completed node +5. most recent assistant output +6. most relevant error or validation event +7. whether the problem looks like workflow logic or Archon runtime behavior From 5f2377ee9a0787df0a6a3595c0457db7468f5671 Mon Sep 17 00:00:00 2001 From: matzls Date: Mon, 13 Apr 2026 11:00:24 +0200 Subject: [PATCH 14/21] feat(workflows): harden bundled scripts and PIV loop runtime - bundle the detect-project helper as a default script and resolve Archon default scripts when repo-local scripts are absent - stop PIV loop nodes early when git HEAD and task-progress tracking stop advancing - fail workflow CLI commands early when ~/.archon is not writable and clarify the sandbox failure mode in docs - persist richer DAG failure metadata for partial-run diagnostics Co-authored-by: Codex --- .../defaults/archon-piv-loop-codex.yaml | 7 + .../workflows/defaults/archon-piv-loop.yaml | 7 + packages/cli/src/commands/workflow.test.ts | 18 + packages/cli/src/commands/workflow.ts | 67 +++- packages/core/src/db/workflows.test.ts | 16 + packages/core/src/db/workflows.ts | 8 +- .../src/content/docs/guides/loop-nodes.md | 18 + .../content/docs/reference/troubleshooting.md | 6 + packages/paths/src/archon-paths.ts | 7 + packages/paths/src/index.ts | 1 + packages/workflows/src/dag-executor.test.ts | 224 ++++++++++++ packages/workflows/src/dag-executor.ts | 318 ++++++++++++++++-- .../src/defaults/bundled-defaults.test.ts | 16 +- .../src/defaults/bundled-defaults.ts | 25 ++ packages/workflows/src/loader.test.ts | 6 + packages/workflows/src/schemas/loop.ts | 4 + .../workflows/src/script-discovery.test.ts | 15 +- packages/workflows/src/script-discovery.ts | 65 +++- packages/workflows/src/store.ts | 2 +- packages/workflows/src/validator.test.ts | 9 + packages/workflows/src/validator.ts | 68 ++-- 21 files changed, 826 insertions(+), 81 deletions(-) diff --git a/.archon/workflows/defaults/archon-piv-loop-codex.yaml b/.archon/workflows/defaults/archon-piv-loop-codex.yaml index 9b8a6952b0..6ed4779e48 100644 --- a/.archon/workflows/defaults/archon-piv-loop-codex.yaml +++ b/.archon/workflows/defaults/archon-piv-loop-codex.yaml @@ -695,6 +695,11 @@ nodes: Phase 4.5 status block below. Do NOT read, edit, stage, or validate files for any other task in this iteration. + Archon monitors durable progress across iterations using the current git HEAD + plus `$ARTIFACTS_DIR/progress.txt`. If multiple iterations finish without a + new commit or a new completed-task entry, the loop will stop as stuck so a + human can inspect the blocked task directly. + If tasks remain, end the iteration by reporting status in this exact format: ``` @@ -735,6 +740,8 @@ nodes: until: COMPLETE max_iterations: 15 fresh_context: true + progress_file: "$ARTIFACTS_DIR/progress.txt" + stuck_after_no_progress_iterations: 3 # ═══════════════════════════════════════════════════════════════ # PHASE 4: VALIDATE — Automated code review diff --git a/.archon/workflows/defaults/archon-piv-loop.yaml b/.archon/workflows/defaults/archon-piv-loop.yaml index 7227900c2f..e32a1a41f2 100644 --- a/.archon/workflows/defaults/archon-piv-loop.yaml +++ b/.archon/workflows/defaults/archon-piv-loop.yaml @@ -520,6 +520,11 @@ nodes: --- ``` + Archon monitors durable progress across iterations using the current git HEAD + plus `.claude/archon/plans/progress.txt`. If multiple iterations finish + without a new commit or a new completed-task entry, the loop will stop as + stuck so a human can inspect the blocked task directly. + --- ## Phase 5: COMPLETE — Check All Tasks @@ -533,6 +538,8 @@ nodes: until: COMPLETE max_iterations: 15 fresh_context: true + progress_file: ".claude/archon/plans/progress.txt" + stuck_after_no_progress_iterations: 3 # ═══════════════════════════════════════════════════════════════ # PHASE 4: VALIDATE — Automated code review diff --git a/packages/cli/src/commands/workflow.test.ts b/packages/cli/src/commands/workflow.test.ts index 7f13f8d83f..c10a4f7106 100644 --- a/packages/cli/src/commands/workflow.test.ts +++ b/packages/cli/src/commands/workflow.test.ts @@ -25,6 +25,12 @@ const mockLogger = { child: mock(() => mockLogger), }; +const mockFsAccess = mock(() => Promise.resolve()); + +mock.module('fs/promises', () => ({ + access: mockFsAccess, +})); + // Mock @archon/paths (createLogger moved here from @archon/core) mock.module('@archon/paths', () => ({ createLogger: mock(() => mockLogger), @@ -345,6 +351,8 @@ describe('workflowRunCommand', () => { consoleSpy = spyOn(console, 'log').mockImplementation(() => {}); mockLogger.warn.mockClear(); mockLogger.info.mockClear(); + mockFsAccess.mockClear(); + mockFsAccess.mockResolvedValue(undefined); }); afterEach(() => { @@ -363,6 +371,16 @@ describe('workflowRunCommand', () => { ); }); + it('fails early when Archon home is not writable for workflow state', async () => { + mockFsAccess.mockRejectedValueOnce( + Object.assign(new Error('Operation not permitted'), { code: 'EPERM' }) + ); + + await expect(workflowRunCommand('/test/path', 'assist', 'hello')).rejects.toThrow( + "requires write access to '/home/test/.archon'" + ); + }); + it('should throw error when workflow not found', async () => { const { discoverWorkflowsWithConfig } = await import('@archon/workflows/workflow-discovery'); (discoverWorkflowsWithConfig as ReturnType).mockResolvedValueOnce({ diff --git a/packages/cli/src/commands/workflow.ts b/packages/cli/src/commands/workflow.ts index 89dd5911e4..d95dad1c90 100644 --- a/packages/cli/src/commands/workflow.ts +++ b/packages/cli/src/commands/workflow.ts @@ -1,6 +1,9 @@ /** * Workflow command - list and run workflows */ +import { constants as fsConstants } from 'fs'; +import { access } from 'fs/promises'; +import { dirname, join } from 'path'; import { registerRepository, loadConfig, @@ -79,6 +82,56 @@ function generateConversationId(): string { return `cli-${String(timestamp)}-${random}`; } +async function assertArchonStateWritable(commandName: string): Promise { + if (process.env.DATABASE_URL) { + return; + } + + const archonHome = getArchonHome(); + const dbPath = join(archonHome, 'archon.db'); + const homeAccessTarget = dirname(archonHome); + + try { + await access(archonHome, fsConstants.W_OK); + } catch (error) { + const err = error as NodeJS.ErrnoException; + if (err.code === 'ENOENT') { + try { + await access(homeAccessTarget, fsConstants.W_OK); + return; + } catch (parentError) { + const parentErr = parentError as NodeJS.ErrnoException; + throw new Error( + `Archon CLI '${commandName}' requires write access to '${homeAccessTarget}' ` + + `so it can create '${archonHome}' and its SQLite state.\n` + + `Current failure: ${parentErr.message}\n` + + `Fix: rerun outside the outer workspace sandbox or grant write access to '${homeAccessTarget}'.` + ); + } + } + throw new Error( + `Archon CLI '${commandName}' requires write access to '${archonHome}' ` + + `because local workflow state uses SQLite at '${dbPath}' when DATABASE_URL is unset.\n` + + `Current failure: ${err.message}\n` + + `Fix: rerun outside the outer workspace sandbox or grant write access to '${archonHome}'.` + ); + } + + try { + await access(dbPath, fsConstants.W_OK); + } catch (error) { + const err = error as NodeJS.ErrnoException; + if (err.code === 'ENOENT') { + return; + } + throw new Error( + `Archon CLI '${commandName}' requires write access to '${dbPath}'.\n` + + `Current failure: ${err.message}\n` + + `Fix: rerun outside the outer workspace sandbox or grant write access to '${archonHome}'.` + ); + } +} + /** Render a workflow event to stderr as a progress line. Called only when --quiet is not set. */ function renderWorkflowEvent(event: WorkflowEmitterEvent, verbose: boolean): void { switch (event.type) { @@ -210,6 +263,8 @@ export async function workflowRunCommand( userMessage: string, options: WorkflowRunOptions = {} ): Promise { + await assertArchonStateWritable('workflow run'); + const { workflows: workflowEntries, errors } = await loadWorkflows(cwd); if (workflowEntries.length === 0 && errors.length === 0) { @@ -279,9 +334,10 @@ export async function workflowRunCommand( conversation = await conversationDb.getOrCreateConversation('cli', conversationId); } catch (error) { const err = error as Error; - throw new Error( - `Failed to access database: ${err.message}\nHint: Check that DATABASE_URL is set and the database is running.` - ); + const readOnlyHint = err.message.toLowerCase().includes('readonly') + ? `\nHint: Archon needs write access to '${getArchonHome()}' (SQLite state lives there when DATABASE_URL is unset).` + : '\nHint: Check that DATABASE_URL is set and the database is running.'; + throw new Error(`Failed to access database: ${err.message}${readOnlyHint}`); } // Try to find a codebase for this directory @@ -806,6 +862,7 @@ export async function workflowStatusCommand(json?: boolean, verbose?: boolean): * findResumableRun picks up the prior failed run and skips completed nodes. */ export async function workflowResumeCommand(runId: string): Promise { + await assertArchonStateWritable('workflow resume'); const run = await resumeWorkflowOp(runId); if (!run.working_path) { throw new Error( @@ -839,6 +896,7 @@ export async function workflowResumeCommand(runId: string): Promise { * Abandon a workflow run by ID (marks it as cancelled). */ export async function workflowAbandonCommand(runId: string): Promise { + await assertArchonStateWritable('workflow abandon'); const run = await abandonWorkflow(runId); console.log(`Abandoned workflow run: ${runId}`); console.log(`Workflow: ${run.workflow_name}`); @@ -849,6 +907,7 @@ export async function workflowAbandonCommand(runId: string): Promise { * Writes the approval events and transitions to 'failed' for auto-resume. */ export async function workflowApproveCommand(runId: string, comment?: string): Promise { + await assertArchonStateWritable('workflow approve'); const result = await approveWorkflow(runId, comment); // CLI auto-resumes after approval (unlike chat, which defers to next user message) @@ -905,6 +964,7 @@ export async function workflowApproveCommand(runId: string, comment?: string): P * Reject a paused workflow run by ID (marks it as cancelled). */ export async function workflowRejectCommand(runId: string, reason?: string): Promise { + await assertArchonStateWritable('workflow reject'); const result = await rejectWorkflow(runId, reason); if (result.cancelled) { @@ -965,6 +1025,7 @@ export async function workflowRejectCommand(runId: string, reason?: string): Pro * Delete terminal workflow runs older than the given number of days. */ export async function workflowCleanupCommand(days: number): Promise { + await assertArchonStateWritable('workflow cleanup'); try { const { count } = await workflowDb.deleteOldWorkflowRuns(days); if (count === 0) { diff --git a/packages/core/src/db/workflows.test.ts b/packages/core/src/db/workflows.test.ts index b6feb15845..28d0548db6 100644 --- a/packages/core/src/db/workflows.test.ts +++ b/packages/core/src/db/workflows.test.ts @@ -332,6 +332,22 @@ describe('workflows database', () => { expect(params).toContain(JSON.stringify({ error: 'Timeout exceeded' })); }); + test('merges optional metadata into failure payload', async () => { + mockQuery.mockResolvedValueOnce(createQueryResult([], 1)); + + await failWorkflowRun('workflow-run-123', 'Timeout exceeded', { + node_counts: { completed: 1, failed: 1, skipped: 0, total: 2 }, + }); + + const [, params] = mockQuery.mock.calls[0] as [string, unknown[]]; + expect(params).toContain( + JSON.stringify({ + error: 'Timeout exceeded', + node_counts: { completed: 1, failed: 1, skipped: 0, total: 2 }, + }) + ); + }); + test('throws when rowCount is 0', async () => { mockQuery.mockResolvedValueOnce(createQueryResult([], 0)); diff --git a/packages/core/src/db/workflows.ts b/packages/core/src/db/workflows.ts index 190cd245ca..2b9d5011ff 100644 --- a/packages/core/src/db/workflows.ts +++ b/packages/core/src/db/workflows.ts @@ -471,7 +471,11 @@ export async function completeWorkflowRun( } } -export async function failWorkflowRun(id: string, error: string): Promise { +export async function failWorkflowRun( + id: string, + error: string, + metadata?: Record +): Promise { const dialect = getDialect(); let result: Awaited>; try { @@ -479,7 +483,7 @@ export async function failWorkflowRun(id: string, error: string): Promise `UPDATE remote_agent_workflow_runs SET status = 'failed', completed_at = ${dialect.now()}, metadata = ${dialect.jsonMerge('metadata', 2)} WHERE id = $1 AND status = 'running'`, - [id, JSON.stringify({ error })] + [id, JSON.stringify({ error, ...(metadata ?? {}) })] ); } catch (dbError) { const err = dbError as Error; diff --git a/packages/docs-web/src/content/docs/guides/loop-nodes.md b/packages/docs-web/src/content/docs/guides/loop-nodes.md index 0e9e3eebc3..f22d7cda94 100644 --- a/packages/docs-web/src/content/docs/guides/loop-nodes.md +++ b/packages/docs-web/src/content/docs/guides/loop-nodes.md @@ -70,6 +70,9 @@ the executor checks for workflow cancellation. max_iterations: 10 # Required. Hard limit — node fails if exceeded. fresh_context: true # Optional. Default: false. until_bash: "..." # Optional. Bash script checked after each iteration. + progress_file: "..." # Optional. Progress tracker used for durable-progress checks. + stuck_after_no_progress_iterations: 3 # Optional. Fail early after repeated + # iterations with no new durable progress. interactive: true # Optional. Default: false. Pause after each non-completing # iteration for user input via /workflow approve. gate_message: "..." # Required when interactive: true. Message shown to the @@ -149,6 +152,21 @@ build success. The bash script supports the same variable substitution as `prompt` (`$ARTIFACTS_DIR`, `$nodeId.output`, etc.). Note: `$nodeId.output` values are shell-escaped when substituted into `until_bash`. +### `progress_file` and `stuck_after_no_progress_iterations` + +Use these together when a loop should stop early if iterations are spinning +without durable state changes. + +- `progress_file` points at the task tracker the loop updates as work completes +- `stuck_after_no_progress_iterations` sets how many consecutive non-completing + iterations Archon tolerates when neither of these changed: + - the current git `HEAD` + - the completed-task count in `progress_file` + +This is useful for stateless implementation loops that re-read work from disk on +each iteration. Instead of burning all `max_iterations`, the loop fails early and +surfaces a stuck-task condition for human review. + ## Patterns ### Stateless agent (Ralph pattern) diff --git a/packages/docs-web/src/content/docs/reference/troubleshooting.md b/packages/docs-web/src/content/docs/reference/troubleshooting.md index 50805c7911..457b49515c 100644 --- a/packages/docs-web/src/content/docs/reference/troubleshooting.md +++ b/packages/docs-web/src/content/docs/reference/troubleshooting.md @@ -63,6 +63,12 @@ curl http://localhost:3090/health/db SQLite requires no setup. The database is created automatically at `~/.archon/archon.db`. If you see errors, check that the `~/.archon/` directory exists and is writable. +For workflow-mutating CLI commands (`workflow run`, `workflow resume`, `workflow approve`, +`workflow reject`, `workflow cleanup`), Archon must be able to write the SQLite +state under `~/.archon/`. If you run Archon from an outer workspace sandbox, grant +write access to `~/.archon/` or rerun the CLI outside that sandbox. Changing the +inner Codex sandbox settings does not fix a parent-process write restriction. + **For remote PostgreSQL:** ```bash # Verify DATABASE_URL diff --git a/packages/paths/src/archon-paths.ts b/packages/paths/src/archon-paths.ts index ca8ea73774..81de9f9cd0 100644 --- a/packages/paths/src/archon-paths.ts +++ b/packages/paths/src/archon-paths.ts @@ -198,6 +198,13 @@ export function getDefaultWorkflowsPath(): string { return join(getAppArchonBasePath(), 'workflows', 'defaults'); } +/** + * Get the path to the app's bundled default scripts directory + */ +export function getDefaultScriptsPath(): string { + return join(getAppArchonBasePath(), 'scripts'); +} + /** * Returns the path to the cached web UI distribution for a given version. * Example: ~/.archon/web-dist/v0.3.2/ diff --git a/packages/paths/src/index.ts b/packages/paths/src/index.ts index 99a254f4ca..279bde3818 100644 --- a/packages/paths/src/index.ts +++ b/packages/paths/src/index.ts @@ -11,6 +11,7 @@ export { getAppArchonBasePath, getDefaultCommandsPath, getDefaultWorkflowsPath, + getDefaultScriptsPath, logArchonPaths, validateAppDefaultsPaths, parseOwnerRepo, diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts index 150ea4eeb7..64490110d2 100644 --- a/packages/workflows/src/dag-executor.test.ts +++ b/packages/workflows/src/dag-executor.test.ts @@ -23,6 +23,8 @@ mock.module('@archon/paths', () => ({ return paths; }, getDefaultCommandsPath: () => '/nonexistent/defaults', + getDefaultScriptsPath: () => '/Users/mase/Codebase/Personal-Projects/Archon/.archon/scripts', + BUNDLED_IS_BINARY: false, })); // --- Imports (after mocks) --- @@ -2860,6 +2862,138 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { ).toBe(1); }); + it('fails early when no durable progress is made across consecutive iterations', async () => { + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'Still working...' }; + yield { type: 'result', sessionId: 'loop-session' }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'dag-loop-stuck', + nodes: [ + { + id: 'my-loop', + loop: { + prompt: 'Do task.', + until: 'COMPLETE', + max_iterations: 10, + progress_file: '$ARTIFACTS_DIR/progress.txt', + stuck_after_no_progress_iterations: 2, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(mockSendQueryDag.mock.calls.length).toBe(3); + const failCalls = ( + mockDeps.store.failWorkflowRun as Mock<(id: string, error: string) => Promise> + ).mock.calls; + expect(failCalls.length).toBe(1); + const platformMessages = ( + platform.sendMessage as Mock< + ( + conversationId: string, + content: string, + metadata?: Record + ) => Promise + > + ).mock.calls.map(call => String(call[1])); + expect(platformMessages.some(message => message.includes('no durable progress'))).toBe(true); + }); + + it('resets the no-progress streak when the progress file advances', async () => { + let callCount = 0; + mockSendQueryDag.mockImplementation(async function* () { + callCount++; + const artifactsDir = join(testDir, 'artifacts'); + await mkdir(artifactsDir, { recursive: true }); + if (callCount === 1) { + await writeFile( + join(artifactsDir, 'progress.txt'), + '## Task 1: First task — COMPLETED\nDate: 2026-04-13\n---\n', + 'utf8' + ); + yield { type: 'assistant', content: 'Completed the first task.' }; + } else if (callCount === 2) { + await writeFile( + join(artifactsDir, 'progress.txt'), + '## Task 1: First task — COMPLETED\nDate: 2026-04-13\n---\n' + + '## Task 2: Second task — COMPLETED\nDate: 2026-04-13\n---\n', + 'utf8' + ); + yield { type: 'assistant', content: 'Completed the second task.' }; + } else { + yield { type: 'assistant', content: 'All done! COMPLETE' }; + } + yield { type: 'result', sessionId: `loop-session-${String(callCount)}` }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'dag-loop-progress', + nodes: [ + { + id: 'my-loop', + loop: { + prompt: 'Do task.', + until: 'COMPLETE', + max_iterations: 5, + progress_file: '$ARTIFACTS_DIR/progress.txt', + stuck_after_no_progress_iterations: 2, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(mockSendQueryDag.mock.calls.length).toBe(3); + expect( + (mockDeps.store.failWorkflowRun as Mock<(id: string, error: string) => Promise>).mock + .calls.length + ).toBe(0); + expect( + ( + mockDeps.store.completeWorkflowRun as Mock< + (id: string, metadata?: Record) => Promise + > + ).mock.calls.length + ).toBe(1); + }); + it('loop node output available to downstream nodes via $nodeId.output', async () => { let loopCallCount = 0; mockSendQueryDag.mockImplementation(function* (prompt: string) { @@ -5006,6 +5140,48 @@ describe('executeDagWorkflow -- script nodes', () => { expect(mockSendQueryDag.mock.calls.length).toBe(0); }); + it('named bun script executes from Archon default scripts when repo script is absent', async () => { + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('script-default-run-id', { + workflow_name: 'script-default-test', + conversation_id: 'conv-default-script', + user_message: 'default script test', + }); + + await writeFile(join(testDir, 'package.json'), JSON.stringify({ name: 'default-script-test' })); + + const commandsDir = join(testDir, '.archon', 'commands'); + await mkdir(commandsDir, { recursive: true }); + await writeFile(join(commandsDir, 'report-detect.md'), 'Detection output:\n$detect.output'); + + const nodes: DagNode[] = [ + { id: 'detect', script: 'detect-project', runtime: 'bun' }, + { id: 'report', command: 'report-detect', depends_on: ['detect'] }, + ]; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-default-script', + testDir, + { name: 'default-script-test', nodes }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(mockSendQueryDag.mock.calls.length).toBe(1); + const prompt = mockSendQueryDag.mock.calls[0][0] as string; + expect(prompt).toContain('PROJECT_TYPE=node'); + expect(prompt).toContain('INSTALL_CMD=npm ci'); + }); + it('non-zero exit code results in failed state', async () => { const mockDeps = createMockDeps(); const platform = createMockPlatform(); @@ -5043,6 +5219,54 @@ describe('executeDagWorkflow -- script nodes', () => { expect(failMsg).toBeDefined(); }); + it('marks workflow failed when some nodes succeed and a later node fails', async () => { + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('script-partial-fail-run-id', { + workflow_name: 'script-partial-fail-test', + conversation_id: 'conv-partial-fail', + user_message: 'partial fail test', + }); + + const nodes: DagNode[] = [ + { id: 'ok', script: 'console.log("ok")', runtime: 'bun' }, + { id: 'boom', script: 'process.exit(1)', runtime: 'bun', depends_on: ['ok'] }, + ]; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-partial-fail', + testDir, + { name: 'script-partial-fail-test', nodes }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(mockDeps.store.completeWorkflowRun as ReturnType).not.toHaveBeenCalled(); + const failCalls = ( + mockDeps.store.failWorkflowRun as Mock< + (id: string, error: string, metadata?: Record) => Promise + > + ).mock.calls; + expect(failCalls.length).toBe(1); + expect(failCalls[0][1]).toContain('failed after partial execution'); + expect(failCalls[0][2]).toEqual( + expect.objectContaining({ + node_counts: { completed: 1, failed: 1, skipped: 0, total: 2 }, + }) + ); + expect((failCalls[0][2] as Record).failed_nodes).toEqual( + expect.stringContaining("'boom':") + ); + }); + it('timeout kills subprocess', async () => { const mockDeps = createMockDeps(); const platform = createMockPlatform(); diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index facfbd1068..4d6c29e053 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -5,10 +5,11 @@ * Independent nodes within the same layer run concurrently via Promise.allSettled. * Captures all assistant output regardless of streaming mode for $node_id.output substitution. */ -import { readFile } from 'fs/promises'; -import { resolve, isAbsolute } from 'path'; +import { readFile, mkdtemp, writeFile, rm } from 'fs/promises'; +import { resolve, isAbsolute, join } from 'path'; +import { tmpdir } from 'os'; import { execFileAsync } from '@archon/git'; -import { discoverScripts } from './script-discovery'; +import { resolveNamedScript } from './script-discovery'; import type { WorkflowAssistantOptions, IWorkflowPlatform, @@ -101,10 +102,43 @@ interface SendMessageContext { nodeName?: string; } +interface BundledScriptExecution { + cmd: string; + args: string[]; + cleanup?: () => Promise; +} + /** Default DAG node retry for TRANSIENT errors */ const DEFAULT_NODE_MAX_RETRIES = 2; const DEFAULT_NODE_RETRY_DELAY_MS = 3000; +async function buildBundledScriptExecution( + scriptName: string, + runtime: 'bun' | 'uv', + content: string, + nodeDeps: string[] +): Promise { + if (runtime === 'uv') { + const withFlags = nodeDeps.flatMap(dep => ['--with', dep]); + return { + cmd: 'uv', + args: ['run', ...withFlags, 'python', '-c', content], + }; + } + + const tempDir = await mkdtemp(join(tmpdir(), `archon-bundled-script-${scriptName}-`)); + const scriptPath = join(tempDir, `${scriptName}.ts`); + await writeFile(scriptPath, content, 'utf8'); + + return { + cmd: 'bun', + args: ['run', scriptPath], + cleanup: async (): Promise => { + await rm(tempDir, { recursive: true, force: true }); + }, + }; +} + /** * Get effective retry config for a DAG node. */ @@ -1511,6 +1545,7 @@ async function executeScriptNode( // Build the command and args based on runtime and inline vs named let cmd = ''; let args: string[] = []; + let cleanupBundledScript: (() => Promise) | undefined; const nodeDeps = node.deps ?? []; @@ -1527,13 +1562,11 @@ async function executeScriptNode( args = ['run', ...withFlags, 'python', '-c', finalScript]; } } else { - // Named script — look up in .archon/scripts/ directory - const scriptsDir = resolve(cwd, '.archon', 'scripts'); - const scripts = await discoverScripts(scriptsDir); - const scriptDef = scripts.get(finalScript); + // Named script — look up in repo scripts first, then Archon defaults + const scriptDef = await resolveNamedScript(cwd, finalScript); if (!scriptDef) { - const errorMsg = `Script node '${node.id}': named script '${finalScript}' not found in .archon/scripts/`; + const errorMsg = `Script node '${node.id}': named script '${finalScript}' not found in .archon/scripts/ or Archon defaults`; getLog().error({ nodeId: node.id, scriptName: finalScript }, 'script_not_found'); await safeSendMessage(platform, conversationId, errorMsg, nodeContext); await logNodeError(logDir, workflowRun.id, node.id, errorMsg); @@ -1562,21 +1595,48 @@ async function executeScriptNode( return { state: 'failed', output: '', error: errorMsg }; } - // Use scriptDef.runtime (canonical source) instead of re-deriving from extension - if (scriptDef.runtime === 'uv') { - cmd = 'uv'; - const withFlags = nodeDeps.flatMap(dep => ['--with', dep]); - args = ['run', ...withFlags, scriptDef.path]; + if ('bundled' in scriptDef && scriptDef.bundled) { + const bundledExecution = await buildBundledScriptExecution( + scriptDef.name, + scriptDef.runtime, + scriptDef.content, + nodeDeps + ); + cmd = bundledExecution.cmd; + args = bundledExecution.args; + cleanupBundledScript = bundledExecution.cleanup; } else { - cmd = 'bun'; - args = ['run', scriptDef.path]; + // Use scriptDef.runtime (canonical source) instead of re-deriving from extension + if (scriptDef.runtime === 'uv') { + cmd = 'uv'; + const withFlags = nodeDeps.flatMap(dep => ['--with', dep]); + args = ['run', ...withFlags, scriptDef.path]; + } else { + cmd = 'bun'; + args = ['run', scriptDef.path]; + } } } - const { stdout, stderr } = await execFileAsync(cmd, args, { - cwd, - timeout, - }); + let stdout = ''; + let stderr = ''; + try { + const result = await execFileAsync(cmd, args, { + cwd, + timeout, + }); + stdout = result.stdout; + stderr = result.stderr; + } finally { + if (cleanupBundledScript) { + await cleanupBundledScript().catch((error: Error) => { + getLog().warn( + { err: error, nodeId: node.id, scriptName: finalScript }, + 'bundled_script_cleanup_failed' + ); + }); + } + } // Trim trailing newline from stdout (common shell behavior) const output = stdout.replace(/\n$/, ''); @@ -1689,6 +1749,103 @@ function buildLoopNodeOptions( return { ...(model ? { model } : {}), ...codexOptions, ...claudeOptions }; } +interface LoopProgressSnapshot { + gitHead?: string; + completedTaskCount?: number; +} + +async function resolveLoopProgressFile( + progressFile: string, + workflowRun: WorkflowRun, + artifactsDir: string, + baseBranch: string, + docsDir: string, + issueContext: string | undefined +): Promise { + const { prompt: substitutedPath } = substituteWorkflowVariables( + progressFile, + workflowRun.id, + workflowRun.user_message, + artifactsDir, + baseBranch, + docsDir, + issueContext + ); + return substitutedPath; +} + +function countCompletedTasks(progressText: string): number { + const matches = progressText.match(/^## Task \d+: .* — COMPLETED$/gm); + return matches?.length ?? 0; +} + +async function captureLoopProgressSnapshot( + cwd: string, + workflowRun: WorkflowRun, + loop: LoopNode['loop'], + artifactsDir: string, + baseBranch: string, + docsDir: string, + issueContext: string | undefined +): Promise { + let gitHead: string | undefined; + try { + const result = await execFileAsync('git', ['rev-parse', 'HEAD'], { cwd }); + gitHead = result.stdout.trim() || undefined; + } catch { + gitHead = undefined; + } + + let completedTaskCount: number | undefined; + if (loop.progress_file) { + try { + const resolvedPath = await resolveLoopProgressFile( + loop.progress_file, + workflowRun, + artifactsDir, + baseBranch, + docsDir, + issueContext + ); + const progressPath = isAbsolute(resolvedPath) ? resolvedPath : resolve(cwd, resolvedPath); + const progressText = await readFile(progressPath, 'utf8').catch( + (error: Error & { code?: string }) => { + if (error.code === 'ENOENT') return ''; + throw error; + } + ); + completedTaskCount = countCompletedTasks(progressText); + } catch { + completedTaskCount = undefined; + } + } + + return { gitHead, completedTaskCount }; +} + +function didLoopProgressAdvance( + previous: LoopProgressSnapshot, + current: LoopProgressSnapshot +): boolean { + if ( + previous.gitHead !== undefined && + current.gitHead !== undefined && + previous.gitHead !== current.gitHead + ) { + return true; + } + + if ( + previous.completedTaskCount !== undefined && + current.completedTaskCount !== undefined && + current.completedTaskCount > previous.completedTaskCount + ) { + return true; + } + + return false; +} + /** * Execute a loop node — runs prompt repeatedly until completion signal or max iterations. * @@ -1745,6 +1902,8 @@ async function executeLoopNode( let loopTotalCostUsd: number | undefined; let loopFinalStopReason: string | undefined; let loopTotalNumTurns: number | undefined; + let previousProgressSnapshot: LoopProgressSnapshot | undefined; + let noProgressStreak = 0; const resolvedOptions = buildLoopNodeOptions(workflowProvider, workflowModel, config); // Helper to log event store errors consistently @@ -2112,6 +2271,57 @@ async function executeLoopNode( }; } + if (loop.stuck_after_no_progress_iterations !== undefined) { + const currentProgressSnapshot = await captureLoopProgressSnapshot( + cwd, + workflowRun, + loop, + artifactsDir, + baseBranch, + docsDir, + issueContext + ); + + if ( + previousProgressSnapshot && + !didLoopProgressAdvance(previousProgressSnapshot, currentProgressSnapshot) + ) { + noProgressStreak += 1; + } else { + noProgressStreak = 0; + } + previousProgressSnapshot = currentProgressSnapshot; + + if (noProgressStreak >= loop.stuck_after_no_progress_iterations) { + const progressSummary = [ + currentProgressSnapshot.gitHead + ? `HEAD=${currentProgressSnapshot.gitHead.slice(0, 7)}` + : null, + currentProgressSnapshot.completedTaskCount !== undefined + ? `completed_tasks=${String(currentProgressSnapshot.completedTaskCount)}` + : null, + ] + .filter(Boolean) + .join(', '); + const errorMsg = + `Loop node '${node.id}' made no durable progress for ${String(noProgressStreak)} consecutive iteration` + + `${noProgressStreak === 1 ? '' : 's'}. ` + + 'Stop and inspect the current task before retrying.' + + (progressSummary ? ` Snapshot: ${progressSummary}` : ''); + getLog().warn( + { nodeId: node.id, iteration: i, noProgressStreak, progressSummary }, + 'loop_node.no_progress_streak_reached' + ); + await safeSendMessage(platform, conversationId, errorMsg, msgContext); + return { + state: 'failed', + output: lastIterationOutput, + error: errorMsg, + costUsd: loopTotalCostUsd, + }; + } + } + // Interactive loop gate — pause after every iteration where the AI did NOT emit the // completion signal. The user reviews the AI's output and provides feedback or approval. // On approval, the AI will emit the signal in the next iteration, exiting above. @@ -2939,11 +3149,11 @@ export async function executeDagWorkflow( const failMsg = `DAG workflow '${workflow.name}' completed with no successful nodes. ` + 'Check node conditions, trigger rules, and upstream failures.'; - // Note: nodeCounts not stored for failed runs — failWorkflowRun only stores { error }. - // Frontend guards with isValidNodeCounts so missing node_counts is safe. - await deps.store.failWorkflowRun(workflowRun.id, failMsg).catch((dbErr: Error) => { - getLog().error({ err: dbErr, workflowRunId: workflowRun.id }, 'dag_db_fail_failed'); - }); + await deps.store + .failWorkflowRun(workflowRun.id, failMsg, { node_counts: nodeCounts }) + .catch((dbErr: Error) => { + getLog().error({ err: dbErr, workflowRunId: workflowRun.id }, 'dag_db_fail_failed'); + }); await logWorkflowError(logDir, workflowRun.id, failMsg).catch((logErr: Error) => { getLog().error( { err: logErr, workflowRunId: workflowRun.id }, @@ -2957,6 +3167,18 @@ export async function executeDagWorkflow( workflowName: workflow.name, error: failMsg, }); + deps.store + .createWorkflowEvent({ + workflow_run_id: workflowRun.id, + event_type: 'workflow_failed', + data: { error: failMsg, node_counts: nodeCounts }, + }) + .catch((err: Error) => { + getLog().error( + { err, workflowRunId: workflowRun.id, eventType: 'workflow_failed' }, + 'workflow_event_persist_failed' + ); + }); emitterForFail.unregisterRun(workflowRun.id); await safeSendMessage(platform, conversationId, `\u274c ${failMsg}`, { workflowId: workflowRun.id, @@ -2970,12 +3192,48 @@ export async function executeDagWorkflow( .filter(([, o]) => o.state === 'failed') .map(([id, o]) => `'${id}': ${o.state === 'failed' ? o.error : 'unknown'}`) .join('; '); - await safeSendMessage( - platform, - conversationId, - `\u26a0\ufe0f Some DAG nodes failed: ${failedNodes}\nSuccessful nodes completed normally.`, - { workflowId: workflowRun.id } - ); + if (await skipIfStatusChanged('dag.skip_partial_fail_status_changed')) return; + const failMsg = + `DAG workflow '${workflow.name}' failed after partial execution. ` + + `Failed nodes: ${failedNodes}`; + await deps.store + .failWorkflowRun(workflowRun.id, failMsg, { + node_counts: nodeCounts, + failed_nodes: failedNodes, + }) + .catch((dbErr: Error) => { + getLog().error({ err: dbErr, workflowRunId: workflowRun.id }, 'dag_db_partial_fail_failed'); + }); + await logWorkflowError(logDir, workflowRun.id, failMsg).catch((logErr: Error) => { + getLog().error( + { err: logErr, workflowRunId: workflowRun.id }, + 'dag.workflow_error_log_write_failed' + ); + }); + const emitterForFail = getWorkflowEventEmitter(); + emitterForFail.emit({ + type: 'workflow_failed', + runId: workflowRun.id, + workflowName: workflow.name, + error: failMsg, + }); + deps.store + .createWorkflowEvent({ + workflow_run_id: workflowRun.id, + event_type: 'workflow_failed', + data: { error: failMsg, node_counts: nodeCounts, failed_nodes: failedNodes }, + }) + .catch((err: Error) => { + getLog().error( + { err, workflowRunId: workflowRun.id, eventType: 'workflow_failed' }, + 'workflow_event_persist_failed' + ); + }); + emitterForFail.unregisterRun(workflowRun.id); + await safeSendMessage(platform, conversationId, `\u274c ${failMsg}`, { + workflowId: workflowRun.id, + }); + return; } // Check if status was changed externally (e.g. cancelled) before marking complete. diff --git a/packages/workflows/src/defaults/bundled-defaults.test.ts b/packages/workflows/src/defaults/bundled-defaults.test.ts index 84826a3352..d0200bad87 100644 --- a/packages/workflows/src/defaults/bundled-defaults.test.ts +++ b/packages/workflows/src/defaults/bundled-defaults.test.ts @@ -1,5 +1,10 @@ import { describe, it, expect } from 'bun:test'; -import { isBinaryBuild, BUNDLED_COMMANDS, BUNDLED_WORKFLOWS } from './bundled-defaults'; +import { + isBinaryBuild, + BUNDLED_COMMANDS, + BUNDLED_WORKFLOWS, + BUNDLED_SCRIPTS, +} from './bundled-defaults'; describe('bundled-defaults', () => { describe('isBinaryBuild', () => { @@ -135,4 +140,13 @@ describe('bundled-defaults', () => { } }); }); + + describe('BUNDLED_SCRIPTS', () => { + it('should include the detect-project helper used by default workflows', () => { + expect(BUNDLED_SCRIPTS).toHaveProperty('detect-project'); + expect(BUNDLED_SCRIPTS['detect-project'].runtime).toBe('bun'); + expect(BUNDLED_SCRIPTS['detect-project'].extension).toBe('.ts'); + expect(BUNDLED_SCRIPTS['detect-project'].content).toContain('PROJECT_TYPE='); + }); + }); }); diff --git a/packages/workflows/src/defaults/bundled-defaults.ts b/packages/workflows/src/defaults/bundled-defaults.ts index 3e3fdec5df..f815900b99 100644 --- a/packages/workflows/src/defaults/bundled-defaults.ts +++ b/packages/workflows/src/defaults/bundled-defaults.ts @@ -37,6 +37,14 @@ import archonValidatePrE2eFeatureCmd from '../../../../.archon/commands/defaults import archonValidatePrE2eMainCmd from '../../../../.archon/commands/defaults/archon-validate-pr-e2e-main.md' with { type: 'text' }; import archonValidatePrReportCmd from '../../../../.archon/commands/defaults/archon-validate-pr-report.md' with { type: 'text' }; +// ============================================================================= +// Default Scripts +// ============================================================================= + +// @ts-expect-error Bun text import of a TypeScript source asset is valid at runtime, +// but TypeScript rejects the .ts extension in import-attribute mode. +import detectProjectScript from '../../../../.archon/scripts/detect-project.ts' with { type: 'text' }; + // ============================================================================= // Default Workflows (15 total) // ============================================================================= @@ -89,6 +97,23 @@ export const BUNDLED_COMMANDS: Record = { 'archon-validate-pr-report': archonValidatePrReportCmd, }; +export interface BundledScriptAsset { + content: string; + runtime: 'bun' | 'uv'; + extension: '.ts' | '.js' | '.py'; +} + +/** + * Bundled default scripts - filename (without extension) -> runtime + content + */ +export const BUNDLED_SCRIPTS: Record = { + 'detect-project': { + content: detectProjectScript, + runtime: 'bun', + extension: '.ts', + }, +}; + /** * Bundled default workflows - filename (without extension) -> content */ diff --git a/packages/workflows/src/loader.test.ts b/packages/workflows/src/loader.test.ts index 74b86a5977..15f2a2f5df 100644 --- a/packages/workflows/src/loader.test.ts +++ b/packages/workflows/src/loader.test.ts @@ -1622,6 +1622,8 @@ nodes: max_iterations: 10 fresh_context: true until_bash: "test -f done.txt" + progress_file: "$ARTIFACTS_DIR/progress.txt" + stuck_after_no_progress_iterations: 3 idle_timeout: 300000 ` ); @@ -1641,6 +1643,8 @@ nodes: expect(wf.nodes[0].loop.max_iterations).toBe(10); expect(wf.nodes[0].loop.fresh_context).toBe(true); expect(wf.nodes[0].loop.until_bash).toBe('test -f done.txt'); + expect(wf.nodes[0].loop.progress_file).toBe('$ARTIFACTS_DIR/progress.txt'); + expect(wf.nodes[0].loop.stuck_after_no_progress_iterations).toBe(3); expect(wf.nodes[0].idle_timeout).toBe(300000); } }); @@ -1671,6 +1675,8 @@ nodes: if (isLoopNode(wf.nodes[0])) { expect(wf.nodes[0].loop.fresh_context).toBe(false); expect(wf.nodes[0].loop.until_bash).toBeUndefined(); + expect(wf.nodes[0].loop.progress_file).toBeUndefined(); + expect(wf.nodes[0].loop.stuck_after_no_progress_iterations).toBeUndefined(); } }); diff --git a/packages/workflows/src/schemas/loop.ts b/packages/workflows/src/schemas/loop.ts index a21a6b2192..62a18675d0 100644 --- a/packages/workflows/src/schemas/loop.ts +++ b/packages/workflows/src/schemas/loop.ts @@ -15,6 +15,10 @@ export const loopNodeConfigSchema = z fresh_context: z.boolean().default(false), /** Optional bash script run after each iteration; exit 0 = complete. */ until_bash: z.string().optional(), + /** Optional progress file used to detect durable task completion across iterations. */ + progress_file: z.string().optional(), + /** Fail early when this many consecutive iterations make no durable progress. */ + stuck_after_no_progress_iterations: z.number().int().min(2).optional(), /** When true, pause between iterations for user input via /workflow approve. */ interactive: z.boolean().optional(), /** Message shown to user when paused (required when interactive is true). */ diff --git a/packages/workflows/src/script-discovery.test.ts b/packages/workflows/src/script-discovery.test.ts index 18bc9c58ef..49fdef1b5f 100644 --- a/packages/workflows/src/script-discovery.test.ts +++ b/packages/workflows/src/script-discovery.test.ts @@ -18,7 +18,11 @@ const mockLogger = { debug: mock(() => undefined), trace: mock(() => undefined), }; -mock.module('@archon/paths', () => ({ createLogger: mock(() => mockLogger) })); +mock.module('@archon/paths', () => ({ + createLogger: mock(() => mockLogger), + getDefaultScriptsPath: () => '/app/.archon/scripts', + BUNDLED_IS_BINARY: false, +})); import { discoverScripts, getDefaultScripts } from './script-discovery'; @@ -160,10 +164,15 @@ describe('discoverScripts', () => { }); describe('getDefaultScripts', () => { - test('returns an empty Map', () => { + test('returns the bundled detect-project script', () => { const defaults = getDefaultScripts(); expect(defaults).toBeInstanceOf(Map); - expect(defaults.size).toBe(0); + expect(defaults.size).toBeGreaterThan(0); + const detectProject = defaults.get('detect-project'); + expect(detectProject).toBeDefined(); + expect(detectProject?.runtime).toBe('bun'); + expect(detectProject?.path).toBe('[bundled:detect-project]'); + expect(detectProject?.content).toContain('function detectProject()'); }); test('returns a new Map each call', () => { diff --git a/packages/workflows/src/script-discovery.ts b/packages/workflows/src/script-discovery.ts index ce74b1a3bb..69033f335d 100644 --- a/packages/workflows/src/script-discovery.ts +++ b/packages/workflows/src/script-discovery.ts @@ -5,8 +5,9 @@ * from the file extension: .ts/.js -> bun, .py -> uv. */ import { readdir, stat } from 'fs/promises'; -import { join, basename, extname } from 'path'; -import { createLogger } from '@archon/paths'; +import { resolve, join, basename, extname } from 'path'; +import { createLogger, getDefaultScriptsPath } from '@archon/paths'; +import { BUNDLED_SCRIPTS, isBinaryBuild } from './defaults/bundled-defaults'; /** Normalize path separators to forward slashes for cross-platform consistency */ function normalizeSep(p: string): string { @@ -30,6 +31,15 @@ export interface ScriptDefinition { runtime: ScriptRuntime; } +/** A bundled script that exists only as embedded content. */ +export interface BundledScriptDefinition extends ScriptDefinition { + content: string; + bundled: true; +} + +/** A resolved script can come from the repo or from bundled defaults. */ +export type ResolvedScriptDefinition = ScriptDefinition | BundledScriptDefinition; + /** Supported file extensions and their runtimes */ const EXTENSION_RUNTIME_MAP: Record = { '.ts': 'bun', @@ -120,9 +130,52 @@ export async function discoverScripts(dir: string): Promise { + const defaults = new Map(); + + for (const [name, asset] of Object.entries(BUNDLED_SCRIPTS)) { + defaults.set(name, { + name, + path: `[bundled:${name}]`, + runtime: asset.runtime, + content: asset.content, + bundled: true, + }); + } + + return defaults; +} + +/** + * Discover default scripts shipped with Archon. + * + * In binary mode, scripts come from the embedded bundle. In dev mode, scripts are + * read from the app's own `.archon/scripts/` directory so default workflows can + * reference default scripts without each target repo copying them in. */ -export function getDefaultScripts(): Map { - return new Map(); +export async function discoverDefaultScripts(): Promise> { + if (isBinaryBuild()) { + return getDefaultScripts(); + } + + return discoverScripts(getDefaultScriptsPath()); +} + +/** + * Resolve a named script using repo-local scripts first, then Archon defaults. + */ +export async function resolveNamedScript( + cwd: string, + scriptName: string +): Promise { + const repoScripts = await discoverScripts(resolve(cwd, '.archon', 'scripts')); + const repoScript = repoScripts.get(scriptName); + if (repoScript) { + return repoScript; + } + + const defaultScripts = await discoverDefaultScripts(); + return defaultScripts.get(scriptName) ?? null; } diff --git a/packages/workflows/src/store.ts b/packages/workflows/src/store.ts index 9d9a85e275..c3cb045af9 100644 --- a/packages/workflows/src/store.ts +++ b/packages/workflows/src/store.ts @@ -54,7 +54,7 @@ export interface IWorkflowStore { updateWorkflowActivity(id: string): Promise; getWorkflowRunStatus(id: string): Promise; completeWorkflowRun(id: string, metadata?: Record): Promise; - failWorkflowRun(id: string, error: string): Promise; + failWorkflowRun(id: string, error: string, metadata?: Record): Promise; pauseWorkflowRun(id: string, approvalContext: ApprovalContext): Promise; cancelWorkflowRun(id: string): Promise; diff --git a/packages/workflows/src/validator.test.ts b/packages/workflows/src/validator.test.ts index 9a8c8979ba..cb3656507c 100644 --- a/packages/workflows/src/validator.test.ts +++ b/packages/workflows/src/validator.test.ts @@ -329,6 +329,15 @@ describe('validateWorkflowResources — script nodes', () => { expect(scriptErrors).toHaveLength(0); }); + test('no error when named bun script exists in Archon defaults', async () => { + const workflow = makeWorkflow('test', [ + { id: 'step1', script: 'detect-project', runtime: 'bun' } as unknown as DagNode, + ]); + const issues = await validateWorkflowResources(workflow, tmpDir); + const scriptErrors = issues.filter(i => i.level === 'error' && i.field === 'script'); + expect(scriptErrors).toHaveLength(0); + }); + test('no error for inline bun script (no file lookup needed)', async () => { const workflow = makeWorkflow('test', [ { diff --git a/packages/workflows/src/validator.ts b/packages/workflows/src/validator.ts index be0011763c..9a9eef8d98 100644 --- a/packages/workflows/src/validator.ts +++ b/packages/workflows/src/validator.ts @@ -31,7 +31,7 @@ function getLog(): ReturnType { import { isScriptNode } from './schemas'; import type { WorkflowDefinition, DagNode } from './schemas'; import type { ScriptRuntime } from './script-discovery'; -import { discoverScripts } from './script-discovery'; +import { discoverDefaultScripts, discoverScripts, resolveNamedScript } from './script-discovery'; import { isInlineScript } from './executor-shared'; // ============================================================================= @@ -410,23 +410,26 @@ export async function validateWorkflowResources( if (isScriptNode(node)) { const script = node.script; - // Named script: validate file exists in .archon/scripts/ + // Named script: validate file exists in repo scripts or Archon defaults if (!isInlineScript(script)) { - const scriptsDir = resolve(cwd, '.archon', 'scripts'); - const extensions = node.runtime === 'uv' ? ['.py'] : ['.ts', '.js']; - const existsResults = await Promise.all( - extensions.map(ext => fileExists(join(scriptsDir, `${script}${ext}`))) - ); - const scriptExists = existsResults.some(Boolean); - - if (!scriptExists) { + const resolvedScript = await resolveNamedScript(cwd, script); + + if (!resolvedScript) { issues.push({ level: 'error', nodeId: node.id, field: 'script', - message: `Named script '${script}' not found in .archon/scripts/`, + message: `Named script '${script}' not found in .archon/scripts/ or Archon defaults`, hint: `Create .archon/scripts/${script}.${node.runtime === 'uv' ? 'py' : 'ts'} with your script code`, }); + } else if (resolvedScript.runtime !== node.runtime) { + issues.push({ + level: 'error', + nodeId: node.id, + field: 'runtime', + message: `Script '${script}' resolves to runtime '${resolvedScript.runtime}', but node requests '${node.runtime}'`, + hint: `Update the node runtime or use a ${node.runtime === 'uv' ? '.py' : '.ts'} implementation of '${script}'`, + }); } } @@ -548,13 +551,22 @@ export interface ScriptValidationResult { export async function discoverAvailableScripts( cwd: string ): Promise<{ name: string; path: string; runtime: ScriptRuntime }[]> { - const scriptsDir = resolve(cwd, '.archon', 'scripts'); try { - const scripts = await discoverScripts(scriptsDir); - return [...scripts.values()].map(s => ({ name: s.name, path: s.path, runtime: s.runtime })); + const scripts = new Map(); + + for (const script of (await discoverDefaultScripts()).values()) { + scripts.set(script.name, { name: script.name, path: script.path, runtime: script.runtime }); + } + + const repoScripts = await discoverScripts(resolve(cwd, '.archon', 'scripts')); + for (const script of repoScripts.values()) { + scripts.set(script.name, { name: script.name, path: script.path, runtime: script.runtime }); + } + + return [...scripts.values()]; } catch (error) { const err = error as Error; - getLog().warn({ err, scriptsDir }, 'script_discovery_failed'); + getLog().warn({ err, cwd }, 'script_discovery_failed'); return []; } } @@ -567,40 +579,26 @@ export async function validateScript( cwd: string ): Promise { const issues: ValidationIssue[] = []; - const scriptsDir = resolve(cwd, '.archon', 'scripts'); - - // Find the script file (any supported extension) - const allExtensions = ['.ts', '.js', '.py']; - let foundPath: string | null = null; - let detectedRuntime: ScriptRuntime | null = null; - - for (const ext of allExtensions) { - const candidate = join(scriptsDir, `${scriptName}${ext}`); - if (await fileExists(candidate)) { - foundPath = candidate; - detectedRuntime = ext === '.py' ? 'uv' : 'bun'; - break; - } - } + const resolvedScript = await resolveNamedScript(cwd, scriptName); - if (!foundPath || !detectedRuntime) { + if (!resolvedScript) { issues.push({ level: 'error', field: 'file', - message: `Script '${scriptName}' not found in .archon/scripts/`, + message: `Script '${scriptName}' not found in .archon/scripts/ or Archon defaults`, hint: `Create .archon/scripts/${scriptName}.ts (bun) or .archon/scripts/${scriptName}.py (uv)`, }); return { scriptName, valid: false, issues }; } // Check runtime availability - const runtimeAvailable = await checkRuntimeAvailable(detectedRuntime); + const runtimeAvailable = await checkRuntimeAvailable(resolvedScript.runtime); if (!runtimeAvailable) { issues.push({ level: 'warning', field: 'runtime', - message: `Runtime '${detectedRuntime}' is not available on PATH`, - hint: RUNTIME_INSTALL_HINTS[detectedRuntime], + message: `Runtime '${resolvedScript.runtime}' is not available on PATH`, + hint: RUNTIME_INSTALL_HINTS[resolvedScript.runtime], }); } From 58eacabc9e9aca121c5ad2c6f2555a5d539e0e25 Mon Sep 17 00:00:00 2001 From: matzls Date: Mon, 13 Apr 2026 11:35:49 +0200 Subject: [PATCH 15/21] docs(skill): expand Archon workflow monitoring guidance - add routing guidance for monitoring, interactive relays, and log debugging in the Archon skill - add focused references for workflow monitoring cadence, paused-run relay behavior, and JSONL-first debugging - keep Archon follow-up handling grounded in the run status and per-run logs Co-authored-by: Codex --- .agents/skills/archon/SKILL.md | 50 +++++++++- .../references/interactive-workflows.md | 68 +++++++++++++ .../skills/archon/references/log-debugging.md | 80 +++++++++++++++ .../skills/archon/references/monitoring.md | 97 +++++++++++++++++++ 4 files changed, 291 insertions(+), 4 deletions(-) create mode 100644 .agents/skills/archon/references/interactive-workflows.md create mode 100644 .agents/skills/archon/references/log-debugging.md create mode 100644 .agents/skills/archon/references/monitoring.md diff --git a/.agents/skills/archon/SKILL.md b/.agents/skills/archon/SKILL.md index 016002ad51..1c76f1b086 100644 --- a/.agents/skills/archon/SKILL.md +++ b/.agents/skills/archon/SKILL.md @@ -28,6 +28,17 @@ archon workflow list --json If `archon` is unavailable, report that the Archon CLI is not installed or not on `PATH`. Do not perform setup unless the user explicitly asks. +## Routing + +Choose the smallest surface that matches the user's need: + +| Intent | Action | +| --- | --- | +| pick or run a Codex-safe workflow | continue in this file | +| monitor an active workflow | read `references/monitoring.md` | +| debug a confusing, failed, or stalled run | read `references/log-debugging.md` | +| relay an interactive workflow cleanly | read `references/interactive-workflows.md` | + ## Codex Naming Convention Prefer Archon workflows ending in `-codex` when they exist. That suffix indicates @@ -84,18 +95,49 @@ Rules: `piv/codex-auth-refactor`. 3. For read-only questions or exploration, `--no-worktree` is acceptable. 4. Prefer one Archon workflow per command rather than combining unrelated tasks. +5. Treat Archon workflows as long-running jobs. Keep the run ID, working path, + and current status available for follow-up checks instead of assuming the + launch command alone is the full observability surface. ## Monitoring -Use: +Start with: ```bash archon workflow status --json ``` -When an interactive workflow pauses, relay the workflow's question clearly and -pass the user's answer back through the Archon approval or reject command rather -than trying to continue locally. +Default live-monitoring cadence: + +- check once shortly after launch to confirm the run exists +- if the user is actively waiting, re-check about every 30 seconds + +Rationale: + +- the web client already has a 15 second fallback poll, but CLI monitoring is + heavier because each check is a full Archon CLI invocation with database + access + +State handling: + +- `running`: keep monitoring, surface only meaningful progress +- `paused`: read the latest workflow output and relay it transparently +- `completed` or `failed`: report the terminal result and stop polling +- `running` with unchanged `last_activity_at` plus no new JSONL activity for 5 + minutes: report a possible stall, not a confirmed failure + +When an interactive workflow pauses, do not summarize the workflow's question. +Read the latest output and pass the user's answer back through the Archon +approval or reject command rather than trying to continue locally. + +If the user explicitly wants unattended follow-up and the current Codex surface +supports thread heartbeat automations, attach one to the current thread and have +it report only meaningful changes: approval gates, terminal state changes, or a +possible stall. If automation is unavailable on the current surface, continue +with in-session polling instead. + +Read `references/monitoring.md` for the detailed monitoring contract and +`references/interactive-workflows.md` for the transparent-relay loop. ## Repo Guidance diff --git a/.agents/skills/archon/references/interactive-workflows.md b/.agents/skills/archon/references/interactive-workflows.md new file mode 100644 index 0000000000..da5cbda8e6 --- /dev/null +++ b/.agents/skills/archon/references/interactive-workflows.md @@ -0,0 +1,68 @@ +# Interactive Workflows For Codex + +Use this guide when the workflow is interactive and the user is effectively +talking to the workflow through Codex. + +Interactive workflows in this repo include: + +- `archon-piv-loop-codex` +- `archon-interactive-prd` + +## Core Rule + +Be a transparent relay. + +- show the workflow's latest question or summary directly +- do not rewrite or "improve" the workflow's wording +- pass the user's answer back as directly as possible + +## Basic Loop + +1. Launch the workflow and capture the run ID. +2. Monitor with `archon workflow status --json`. +3. When the run becomes `paused`, read the latest workflow output. +4. Relay that output directly to the user. +5. When the user answers, resume with `archon workflow approve` or + `archon workflow reject`. +6. Repeat until the run reaches a terminal state. + +## Commands + +```bash +archon workflow status --json +archon workflow approve "" +archon workflow reject "" +``` + +## When Paused + +When the workflow is paused: + +- read the latest assistant output from the run log +- show it directly +- wait for the user +- pass their response through verbatim unless a safety or formatting issue + requires intervention + +Do not replace the workflow's structured questions with your own summary. + +## When Still Running + +Long research or implementation nodes can stay `running` for a while without +needing user input. + +- keep checking status on the monitoring cadence +- do not treat "still running" by itself as a problem +- if activity stops for the stall window, flag a possible stall and say what + evidence stopped moving + +## Where To Read The Latest Output + +Use the per-run JSONL when status alone is not enough: + +```bash +find "${ARCHON_HOME:-$HOME/.archon}/workspaces" -name ".jsonl" 2>/dev/null +tail -n 40 "" +``` + +Read `log-debugging.md` when you need the full trace. diff --git a/.agents/skills/archon/references/log-debugging.md b/.agents/skills/archon/references/log-debugging.md new file mode 100644 index 0000000000..ae9f62269a --- /dev/null +++ b/.agents/skills/archon/references/log-debugging.md @@ -0,0 +1,80 @@ +# Archon Log Debugging For Codex + +Use this guide when the main job is understanding what Archon just did, why a +run paused, why it failed, or whether it is stalled. + +## Three Evidence Layers + +### 1. Status and run details + +Use first when you need the current high-level state. + +- `archon workflow status --json` +- `archon workflow status --verbose` +- web or API run details when available + +This is the fastest way to confirm: + +- run ID +- current status +- `last_activity_at` +- working path +- approval context + +### 2. Per-run workflow JSONL + +Use when status is ambiguous or you need the actual workflow trace. + +Default location: + +```text +${ARCHON_HOME:-$HOME/.archon}/workspaces///logs/.jsonl +``` + +Best for: + +- assistant output +- tool calls +- node boundaries +- workflow pause or failure context + +Representative commands: + +```bash +find "${ARCHON_HOME:-$HOME/.archon}/workspaces" -name ".jsonl" 2>/dev/null +tail -n 40 "$LOG_FILE" +rg '"type":"workflow_error"|"type":"node_error"' "$LOG_FILE" +rg '"type":"assistant"' "$LOG_FILE" | tail -n 5 +``` + +### 3. Runtime process logs + +Use only when the issue looks like Archon runtime behavior rather than workflow +logic. + +Examples: + +```bash +LOG_LEVEL=debug archon workflow status --json +LOG_LEVEL=debug archon workflow run "" +``` + +Best for: + +- database errors +- config loading failures +- adapter or API problems +- unexpected process behavior + +## Triage Order + +1. `archon workflow status --json` +2. `archon workflow status --verbose` or the web/API run details +3. per-run JSONL +4. runtime logs with `LOG_LEVEL=debug` + +## Important Note + +Status and UI/API events are intentionally lean. They are good for current +state, but not a replacement for the JSONL trace when you need the workflow's +actual assistant or tool history. diff --git a/.agents/skills/archon/references/monitoring.md b/.agents/skills/archon/references/monitoring.md new file mode 100644 index 0000000000..a92c70afbf --- /dev/null +++ b/.agents/skills/archon/references/monitoring.md @@ -0,0 +1,97 @@ +# Monitoring Archon Workflows From Codex + +Use this guide when the user wants ongoing updates about a live Archon workflow. + +## First Check + +Start with: + +```bash +archon workflow status --json +``` + +This command currently includes `last_activity_at`, which makes it usable as the +first stall-detection surface. + +## Default Cadence + +Use this cadence during active live monitoring: + +- first check shortly after launch +- then about every 30 seconds while the user is actively waiting + +Why not every 15 seconds? + +- the web app already has a 15 second client-side fallback poll +- CLI monitoring is heavier because each check is a full Archon CLI invocation + with database access + +If the user is not actively waiting, reduce noise and check less often. + +## Evidence Order + +1. `archon workflow status --json` +2. web or API run details if available +3. per-run JSONL when status is ambiguous, paused, failed, or possibly stalled +4. runtime logs only when the problem looks like Archon itself + +## Progress States + +### Healthy running + +Report only meaningful changes such as: + +- current workflow changed +- status changed +- a new approval gate appeared +- artifacts appeared +- the run clearly moved to a new phase or node family + +Do not spam the user with identical "still running" updates. + +### Paused + +Treat `paused` as action-required. + +- open the latest workflow output +- relay it directly +- wait for the user response + +### Possible stall + +Default heuristic: + +- run status is still `running` +- `last_activity_at` has not advanced for at least 5 minutes +- the JSONL tail shows no new assistant, tool, or node activity in the same + 5 minute window + +Report this as a possible stall, not a confirmed failure. + +### Terminal + +When the run becomes `completed`, `failed`, or `cancelled`: + +- report the terminal status +- include the most relevant evidence +- stop polling + +## Optional Heartbeat Automation + +If the user explicitly wants unattended follow-up and the current Codex surface +supports thread heartbeat automations, prefer a thread-attached heartbeat that: + +- watches a specific run ID +- posts only on meaningful change +- flags a possible stall using the heuristic above +- stops once the run reaches a terminal state + +Suggested reporting triggers: + +- status transition +- approval gate reached +- terminal result +- possible stall + +If heartbeat automation is unavailable on the current Codex surface, keep the +monitoring in-session instead of pretending the automation exists. From 96dc935e86d172582533bfeb579b15c0eeb40f35 Mon Sep 17 00:00:00 2001 From: matzls Date: Mon, 13 Apr 2026 16:12:33 +0200 Subject: [PATCH 16/21] docs: add workflow node display names PRD Add the generated PRD for workflow node display names to the Archon repo under docs/prd. The document keeps one PRD with a small execution-graph-only phase 1 and defers builder, non-graph execution surfaces, inference, and historical-fidelity questions to phase 2. Co-authored-by: Codex --- docs/prd/workflow-node-display-names.prd.md | 364 ++++++++++++++++++++ 1 file changed, 364 insertions(+) create mode 100644 docs/prd/workflow-node-display-names.prd.md diff --git a/docs/prd/workflow-node-display-names.prd.md b/docs/prd/workflow-node-display-names.prd.md new file mode 100644 index 0000000000..eaa792270b --- /dev/null +++ b/docs/prd/workflow-node-display-names.prd.md @@ -0,0 +1,364 @@ +--- +title: Workflow Node Display Names +status: draft +created: 2026-04-13 +updated: 2026-04-13 +--- + +# PRD: Workflow Node Display Names + +## 1. Problem Statement + +**Who has this problem:** Mase as the primary operator of Archon workflows, including +when running several workflows in parallel and needing to understand them quickly. +Secondary user: a technically capable observer who did not author the workflow — in +practice, future-Mase reviewing a completed run after some delay, or another observer +reviewing progress without deep knowledge of the workflow internals. They understand the +high-level goal but cannot be expected to decode raw YAML internals from the graph. + +**What problem they face:** When opening the workflow execution graph today, node labels +are too generic or internal. For non-command nodes — loop, script, approval, bash, +prompt — the label shown is either the raw `node.id` (the machine identifier from the +YAML) or a hardcoded type string like "Prompt" or "Shell". Neither tells the observer +what the node *does* in this workflow. The pain is immediate: you can see nodes +executing but cannot tell what is happening or what each step's purpose is. + +**Why it cannot be solved today by naming YAML better:** The display problem is not +purely about author discipline. Even with a descriptive `id`, the execution surface +does not reliably surface useful human-readable names for non-command nodes. Command +nodes get reasonable labels (the command name), but every other node type falls back +to the raw id or a hardcoded generic string. There is a structural gap: no schema +field exists to carry a human intent label distinct from the machine id, and no +inference logic exists to derive one from node content. + +**Why now:** Archon workflows are in active daily use and this surfaced immediately as +a usability problem. Quick comprehension across multiple runs without reverse-engineering +node ids matters from the first day of use. + +--- + +## 2. Evidence + +- **Verified in code:** `dag-executor.ts` emits `nodeName: node.command ?? node.id` + for command/prompt nodes and `node.id` for all other types (bash, script, loop, + approval, cancel). Raw node id is the fallback for the majority of node types. +- **Verified in code:** `WorkflowCanvas.tsx` `resolveNodeLabel()` (line 25–29) only + handles `'command'`, `'prompt'`, `'bash'`; returns hardcoded `'Prompt'` or `'Shell'` + for non-command types. Loop, script, approval, cancel are not handled. +- **Verified in code:** `ExecutionDagNode.tsx` (line 59) renders `data.label` directly. + For execution nodes, label is the same `data.label` field as the builder. Loop nodes + only get a type badge `'LOOP'` with no descriptive label. +- **Verified in schema:** `packages/workflows/src/schemas/dag-node.ts` — no + `display_name` field exists in any of the 7 node type schemas (CommandNode, + PromptNode, BashNode, ScriptNode, LoopNode, ApprovalNode, CancelNode). +- **Verified in DB:** `migrations/012_workflow_events.sql` — `step_name` column stores + `node.id`; no separate display label column in the events table. +- **Verified in events:** `event-emitter.ts` `NodeStartedEvent`, `NodeCompletedEvent`, + `NodeFailedEvent`, `NodeSkippedEvent` all carry `nodeName` field; its value is + populated by `dag-executor.ts` using the logic above. + +--- + +## 3. Proposed Solution + +Add an optional `display_name` field to the DAG node schema so workflow authors can +attach a human-readable label to any node. Wire that field through the execution event +pipeline and through the web UI graph components so both live and historical graph views +show the label. When `display_name` is absent, apply a resolution chain that infers a +meaningful label from available node content (phase 2) rather than falling back to the +raw id. + +This extends existing primitives: the schema, event emitter, and graph components +already have the structural slots needed. No new tables, no new API endpoints, and no +changes to the `step_name` DB contract are required for the MVP. + +--- + +## 4. Key Hypothesis + +If workflow authors can optionally provide a `display_name` on any node, and the graph +view shows that name as the primary label, then operators and observers will be able to +understand what each node does without inspecting raw YAML or decoding internal ids. + +The hypothesis is testable: after the change, open a workflow graph and ask whether +each node's label explains its purpose without additional context. + +--- + +## 5. What We're NOT Building + +- **Workflow Builder canvas changes.** The builder (`WorkflowBuilderPage`, `WorkflowCanvas.tsx`, `DagNodeComponent.tsx`) is phase 2. Phase 1 must not change builder rendering or editing behavior. +- **Non-graph execution surfaces.** The currently-executing banner, progress list, and log-derived step labels are phase 2. Phase 1 only changes labels on the execution graph node cards. +- **Database / event-contract changes.** `step_name` in `remote_agent_workflow_events` + remains `node.id`. We do not add a `display_name` column to the events table. The + display label is resolved at read/render time from the workflow definition, not stored + in event history. +- **Sophisticated NLP inference.** Stripping boilerplate like "You are ..." or + summarizing multi-paragraph prompts is out of scope for v1. Simple truncation only. +- **Retroactive relabeling of old runs.** Historical runs will benefit from inference + fallbacks if the workflow YAML is still available, but there is no backfill job. +- **Per-platform display_name variants.** One label per node; no locale or platform + override concept. +- **Approval / cancel node detailed labels** beyond what the display_name field or + simple inference provides. + +--- + +## 6. Success Metrics + +**Primary (qualitative, operator-assessed):** +- When opening a workflow execution graph, every node has a label that explains its + purpose without requiring the operator to inspect raw YAML or node ids. +- For workflows where `display_name` is set, no raw ids or generic hardcoded strings + appear as primary labels. +- For workflows without `display_name`, the inferred label (phase 2) is more meaningful + than the current fallback. + +**Observable signal (phase 1):** +- 0 nodes in a display_name-annotated workflow show a raw `node.id` as their primary + label in the execution graph. + +**Observable signal (phase 2):** +- For prompt and loop nodes, the inferred label visibly reflects the intent from the + first line of the prompt content (truncated to 80 chars), not "Prompt" or the node id. + +--- + +## 7. Open Questions + +| # | Question | Current Answer | +|---|----------|----------------| +| 1 | What truncation length for inferred labels? | 80 characters; adjust in a follow-on if needed. | +| 2 | Should boilerplate stripping be applied (e.g. "You are ...")? | No. Over-engineering for v1; skip. | +| 3 | How should loop nodes be labeled by inference? | Use the first 80 chars of `node.loop.prompt` (the inner prompt text). | +| 4 | How should script nodes be labeled by inference? | Use the script filename from `node.script` if it references a file, else first non-blank line. | +| 5 | Does `display_name` need to appear in Workflow Builder canvas? | Out of scope for this slice; follow-on. | +| 6 | Should `display_name` be stored in DB events for the observer path? | No — resolve at render time from the definition. Keeps DB contract clean. | + +--- + +## 8. Users & Context + +**Primary user:** Mase as Archon workflow operator. Runs workflows daily, sometimes +several in parallel. Needs quick comprehension of what is happening in any graph view, +including mid-run and post-run review after a delay. + +**Secondary user:** Technically capable observer who did not author the workflow. +Understands the high-level goal. Should not need to know raw node ids or internal YAML +structure to read the graph. + +**JTBD:** +> When I run an Archon workflow and open the graph view, I want to clearly understand +> what each node in the graph is doing or what its job is, so I can understand what the +> agent did or is doing — without decoding internal identifiers. + +**Non-users / out of scope for this slice:** +- Non-technical stakeholders who need a narrative summary (not a graph) +- Workflow authors who want to edit display names in the builder canvas (follow-on) + +--- + +## 9. Solution Detail + +### MoSCoW Table + +| Priority | Item | Notes | +|----------|------|-------| +| **Must** | `display_name?: string` field in `dagNodeSchema` | Optional; backward-compatible | +| **Must** | Execution graph node cards show `display_name` when present | Scope limited to graph node cards | +| **Must** | Execution graph shows meaningful static fallback labels for loop, script, approval, and cancel nodes | Keep this local to execution graph in phase 1 | +| **Should** | Inference fallback for prompt nodes: first 80 chars of `node.prompt` | Phase 2 | +| **Should** | Inference fallback for loop nodes: first 80 chars of `node.loop.prompt` | Phase 2 | +| **Should** | Inference fallback for bash nodes: first non-comment line of `node.bash` | Phase 2 | +| **Should** | Inference fallback for script nodes: filename or first non-blank line | Phase 2 | +| **Could** | Tooltip showing full prompt/script on hover when label is truncated | Phase 2 or 3 | +| **Won't** | Builder canvas display_name editing | Out of scope this slice | +| **Won't** | DB event contract changes | Out of scope | +| **Won't** | Boilerplate stripping from prompts | Over-engineering for v1 | + +### MVP Definition + +Phase 1 is the minimum viable increment: +1. Add `display_name?: string` to the schema. +2. Regenerate frontend API types so the web app receives the new field. +3. Resolve labels in the execution graph from the workflow definition, using `display_name` when present. +4. Keep simple static fallbacks for node kinds without `display_name`: command name, `Shell`, `Prompt`, `Loop`, `Script`, `Approval`, `Cancel`. + +Phase 2 adds builder support, non-graph execution-surface updates, and optional inference so workflows without `display_name` still show more meaningful labels derived from content. + +--- + +## 10. Technical Approach + +All paths verified against the codebase at the time of writing. + +### Schema Extension +**File:** `packages/workflows/src/schemas/dag-node.ts` + +Add `display_name: z.string().optional()` to the shared `dagNodeBaseSchema` (the fields +common to all node types). This automatically makes it available on all 7 node type +schemas without touching each union branch. Because `dagNodeSchema` is a discriminated +union built on per-type schemas, the shared base approach is the lowest-change path. + +Currently the schema has no `display_name` field. The `id` field is the stable +machine identifier and must not be changed or overloaded. + +### Web UI — Execution Graph Label +**Primary files:** `packages/web/src/components/workflows/WorkflowDagViewer.tsx`, `packages/web/src/components/workflows/ExecutionDagNode.tsx` + +Phase 1 should be execution-graph-only. The execution graph already receives the full workflow definition via `dagNodes`, so it can resolve a display label directly from the definition without changing workflow events, DB contracts, or non-graph execution surfaces. + +Recommended phase-1 behavior: +- `display_name` wins when present +- otherwise use a simple static per-type fallback +- do not attempt prompt/script inference yet +- do not change builder rendering paths in this phase + +Recommended execution-only resolver shape: +```typescript +function resolveExecutionNodeLabel(dn: DagNode): string { + if (dn.display_name) return dn.display_name; + if ('command' in dn && dn.command) return dn.command; + if ('bash' in dn && dn.bash) return 'Shell'; + if ('loop' in dn && dn.loop) return 'Loop'; + if ('script' in dn && dn.script) return 'Script'; + if ('approval' in dn && dn.approval) return 'Approval'; + if ('cancel' in dn && dn.cancel) return 'Cancel'; + return 'Prompt'; +} +``` + +`WorkflowDagViewer.tsx` can apply this resolver when building the execution node data from `dagNodes`. + +`ExecutionDagNode.tsx` should be updated only as needed so the execution graph can show correct badges/colors for any newly distinguished node kinds used in phase 1. + +### Builder Isolation +`packages/web/src/lib/dag-layout.ts` is shared by execution and builder loading. Because phase 1 must stay execution-only, avoid using a shared resolver there for this first slice. Keep builder rendering behavior unchanged until phase 2. + +### DagNodeData Interface +**File:** `packages/web/src/components/workflows/DagNodeComponent.tsx` + +If phase 1 keeps label resolution inside the execution graph path, builder-facing `DagNodeData` can stay unchanged. Only add new shared node-type values there in phase 1 if the execution implementation truly requires them. Prefer keeping this untouched until phase 2 if possible. + +### Type Regeneration +**File:** `packages/web/src/lib/api.generated.d.ts` + +After adding `display_name` to the schema and running the server, run: +```bash +bun --filter @archon/web generate:types +``` +This regenerates `api.generated.d.ts` from the OpenAPI spec so the web package sees +the new field via `DagNode` from `@/lib/api`. + +### Phase 2 — Inference Helpers +**New utility function** (suggest placing in `packages/workflows/src/utils/` or +inline in `dag-executor.ts`): + +```typescript +function inferNodeLabel(node: DagNode, maxLen = 80): string { + if (node.display_name) return node.display_name; + if (node.command) return node.command; + if (node.prompt) return node.prompt.slice(0, maxLen).trimEnd(); + if (node.loop?.prompt) return node.loop.prompt.slice(0, maxLen).trimEnd(); + if (node.bash) return firstNonCommentLine(node.bash) ?? 'Shell'; + if (node.script) return firstNonBlankLine(node.script) ?? 'Script'; + if (node.approval) return node.approval.message?.slice(0, maxLen) ?? 'Approval'; + return node.id; +} +``` + +Apply in both `dag-executor.ts` (events) and the web UI label-building path. + +### No DB or API Changes Required (Phase 1) +- `remote_agent_workflow_events.step_name` remains `node.id` — no migration needed in phase 1. +- No new API endpoints needed; `display_name` rides through the existing workflow definition returned by `GET /api/workflows/:name`. +- The existing `GET /api/workflows/:name` route already returns the full workflow definition including node fields, so `display_name` will be available in the response automatically after schema extension. +- Non-graph execution surfaces may still show raw ids after phase 1 because they are driven from workflow events, not from graph-definition label resolution. That is a deliberate phase-1 tradeoff. + +--- + +## 11. Implementation Phases + +### Phase 1 — Explicit display_name (MVP) + +| # | Task | File(s) | Notes | +|---|------|---------|-------| +| 1.1 | Add `display_name?: string` to dagNodeBaseSchema | `packages/workflows/src/schemas/dag-node.ts` | Shared base; one change covers all types | +| 1.2 | Regenerate frontend API types | `packages/web/src/lib/api.generated.d.ts` | `bun --filter @archon/web generate:types` | +| 1.3 | Add execution-only label resolver using workflow definition | `packages/web/src/components/workflows/WorkflowDagViewer.tsx` | `display_name` first, then simple static fallback | +| 1.4 | Update execution node badges/colors only if needed for newly distinguished kinds | `packages/web/src/components/workflows/ExecutionDagNode.tsx` | Keep changes local to execution graph | +| 1.5 | Run `bun run validate` | All packages | type-check, lint, format, tests | + +**Parallel opportunities in Phase 1:** +- Schema/type regeneration can proceed ahead of the execution-graph UI update, but the slice is small enough that sequential implementation is likely cleaner. + +### Phase 2 — Inference Fallbacks + +| # | Task | File(s) | Notes | +|---|------|---------|-------| +| 2.1 | Add builder compatibility for `display_name` and expanded node kinds | `packages/web/src/lib/dag-layout.ts`, `packages/web/src/components/workflows/DagNodeComponent.tsx`, builder surfaces | Shared builder/render path | +| 2.2 | Update non-graph execution surfaces to show display labels instead of raw ids | `packages/web/src/components/workflows/WorkflowExecution.tsx`, `DagNodeProgress.tsx`, `WorkflowLogs.tsx` | Currently executing banner, progress list, log labels | +| 2.3 | Decide whether to emit/persist display labels in events for better historical fidelity | executor/event/SSE/read models as needed | Optional, depends on how much post-hoc accuracy matters | +| 2.4 | Add simple inference fallback | shared helper + graph/render paths | 80-char truncation; no NLP | +| 2.5 | Optional: add truncation tooltip in ExecutionDagNode | `packages/web/src/components/workflows/ExecutionDagNode.tsx` | Show full text on hover | +| 2.6 | Run `bun run validate` | All packages | | + +--- + +## 12. Decisions Log + +| Decision | Rationale | +|----------|-----------| +| `display_name` is optional, not required | Backward-compatible; existing workflows continue to work unchanged | +| `step_name` in DB events stays as `node.id` | Preserves machine-stable identity for event correlation; display is a UI concern | +| Display label resolved at render time, not stored in events | Keeps DB contract clean; label can be updated by editing the workflow YAML without migrating historical data | +| Phase 1 skips inference | Reduces scope; explicit labeling is the highest-value unblocked step | +| Phase 1 is execution-graph-only | Keeps blast radius small and avoids shared builder/event paths | +| Builder canvas excluded from phase 1 | Separate creation surface; move to phase 2 with shared-rendering adjustments | +| Non-graph execution surfaces excluded from phase 1 | They depend on event-driven names and can be addressed coherently in phase 2 | +| Boilerplate stripping excluded | Over-engineering for v1; simple truncation at 80 chars is sufficient | +| `display_name` added to shared base schema, not per-type | One change covers all 7 node types; no per-type duplication | +| No new API endpoints needed | `display_name` rides through the existing workflow definition response | + +--- + +## Validation Notes + +**Validated against codebase at:** `packages/workflows/src/schemas/dag-node.ts`, +`packages/workflows/src/dag-executor.ts`, `packages/web/src/lib/dag-layout.ts`, +`packages/web/src/components/workflows/WorkflowDagViewer.tsx`, +`packages/web/src/components/workflows/ExecutionDagNode.tsx`, +`packages/web/src/components/workflows/DagNodeComponent.tsx`, +`packages/web/src/components/workflows/WorkflowCanvas.tsx`, +`packages/web/src/routes/WorkflowExecutionPage.tsx`, +`packages/workflows/src/schemas/loop.ts`, +`migrations/012_workflow_events.sql` + +**Corrections made during validation:** + +1. **Critical — wrong file for execution label building.** The PRD originally stated + `WorkflowExecutionPage.tsx` constructs execution node labels. That page only renders + `` (2 lines). Current execution labels come from shared web DAG + helpers consumed by `WorkflowDagViewer.tsx`, but phase 1 was then narrowed further to + avoid shared builder paths and keep label resolution local to the execution graph. + +2. **`resolveNodeDisplay()` gap confirmed.** The function currently falls through loop, + script, approval, cancel nodes to the `'Prompt'` branch — verified in source. This + is the root cause of the display problem for those node types, but because the helper is shared with builder loading it should be handled in phase 2 unless phase 1 explicitly accepts builder impact. + +3. **`loop.prompt` field name confirmed correct.** `loopNodeConfigSchema` in + `packages/workflows/src/schemas/loop.ts` uses `prompt` as the field name. The + inference reference `node.loop.prompt` in the PRD is accurate. + +4. **`resolveNodeLabel()` in `WorkflowCanvas.tsx` is builder-only.** It is called only + at lines 154 and 266 of `WorkflowCanvas.tsx` (drag-create paths). It is NOT used in + the execution graph — `resolveNodeDisplay()` in `dag-layout.ts` is the execution + path. PRD updated to clarify scope and defer builder changes. + +5. **`dagNodeBaseSchema` name verified correct** at line 113 of `dag-node.ts`. + +6. **`approval.message` field name verified correct** at line 249 of `dag-node.ts`. + +7. **`packages/workflows/src/utils/` directory confirmed to exist** with existing + utilities (variable-substitution, tool-formatter, idle-timeout). Phase 2 inference + helper can be placed here. From 7cf4e236dba6fd39ed53741c888cd38064c157ad Mon Sep 17 00:00:00 2001 From: matzls Date: Mon, 13 Apr 2026 20:35:32 +0200 Subject: [PATCH 17/21] docs(design): define Codex-first workflow surface strategy Add the fork-level design doc that defines the Codex-first workflow surface, decision rules, and follow-on implementation sequence. Co-authored-by: Codex --- .../codex-first-workflow-surface-strategy.md | 440 ++++++++++++++++++ 1 file changed, 440 insertions(+) create mode 100644 docs/design/codex-first-workflow-surface-strategy.md diff --git a/docs/design/codex-first-workflow-surface-strategy.md b/docs/design/codex-first-workflow-surface-strategy.md new file mode 100644 index 0000000000..c051d3f446 --- /dev/null +++ b/docs/design/codex-first-workflow-surface-strategy.md @@ -0,0 +1,440 @@ +--- +title: Codex-First Workflow Surface Strategy +status: draft +created: 2026-04-13 +updated: 2026-04-13 +--- + +# Design Doc: Codex-First Workflow Surface Strategy + +## 1. Purpose + +Define what this fork should mean by "Codex-first" for local Archon usage without +degrading or muddying the original/cloud/Claude implementation surface. + +This document is not an implementation PRD. It defines: +- the target Codex workflow surface for this fork +- which current workflows are strong enough to keep +- which current workflows should be removed or deferred +- what counts as real Codex parity versus misleading pseudo-parity +- the adaptation rules future Codex workflows must satisfy before they are + shipped as defaults + +This document is the design and policy anchor for follow-on implementation PRDs +and plans. + +## 2. Problem Statement + +This fork now has a meaningful Codex surface, but it is uneven. + +Some Codex workflows are genuinely adapted and useful, especially +`archon-piv-loop-codex`. Others currently imply more maturity than they +actually have, especially the provisional `archon-feature-development-codex` +workflow. In addition, some behavior differs between repo-local source usage and +bundled/binary usage because bundled defaults are hardcoded in +`packages/workflows/src/defaults/bundled-defaults.ts`. + +The risk is not only missing functionality. The larger risk is false +confidence: +- a workflow appears Codex-supported but is only a rename or provider patch +- a repo-local workflow exists but is not actually bundled or shipped +- a workflow appears parity-complete while relying on Claude-oriented + assumptions +- documentation overstates what Codex can currently do inside Archon's workflow + runtime + +If this fork is going to be run mainly through Codex, the Codex defaults must +be intentionally curated, clearly documented, and held to a real quality bar. + +## 3. Goals + +### Primary Goals + +- Make this fork clearly and honestly Codex-first for local usage. +- Preserve original/cloud/Claude behavior unless there is a correctness or + shared-runtime parity reason to change it. +- Keep only genuinely Codex-adapted workflows in the default Codex surface. +- Remove or defer thin pseudo-parity workflows. +- Define a repeatable checklist for adapting future workflows to Codex. +- Separate repo-local experiments from truly shipped default assets. + +### Secondary Goals + +- Improve operator clarity for which workflow to use under Codex. +- Reduce bundle drift between source checkout behavior and bundled/binary + behavior. +- Provide a basis for future Codex-specific implementation and workflow-builder + work. + +## 4. Non-Goals + +- Rewriting the Claude/default workflow surface to match Codex. +- Forcing one-to-one migration of Claude-oriented workflow-node features where + Codex support is absent or materially different. +- Claiming parity based on possible SDK analogues that Archon does not yet + expose or validate. +- Implementing all Codex parity improvements in one slice. +- Replacing the current workflow-builder immediately. + +## 5. Current Repo-Grounded State + +### Strong Codex Surface + +- `.archon/workflows/defaults/archon-assist-codex.yaml` +- `.archon/workflows/defaults/archon-piv-loop-codex.yaml` +- `.archon/commands/defaults/archon-assist-codex.md` +- `.agents/skills/archon/SKILL.md` +- `packages/core/src/clients/codex.ts` + +### Weak Or Misleading Codex Surface + +- the provisional `archon-feature-development-codex` workflow +- `.archon/workflows/defaults/archon-workflow-builder.yaml` when interpreted as + Codex-safe +- bundled defaults in `packages/workflows/src/defaults/bundled-defaults.ts`, + which currently lag repo-local Codex assets + +### Current Runtime Constraint Surface + +- Codex client behavior is implemented in `packages/core/src/clients/codex.ts` +- Claude client behavior is implemented in `packages/core/src/clients/claude.ts` +- workflow-level validation and provider-specific restrictions are enforced in + `packages/workflows/src/validator.ts` +- workflow dependency support is described in + `packages/workflows/src/deps.ts` +- default bundled asset behavior is defined in + `packages/workflows/src/defaults/bundled-defaults.ts` +- current orchestrator routing remains assist-centric in + `packages/core/src/orchestrator/prompt-builder.ts` + +## 6. Design Principles + +### 6.1 Honest Capability Boundaries + +A workflow is not "Codex-supported" just because it has `provider: codex` or +because a rough SDK analogue may exist. It is Codex-supported only when the +actual Archon workflow surface, validation rules, runtime behavior, and +operator guidance all line up. + +### 6.2 Codex Defaults Must Be Curated + +Codex-specific defaults in this fork should be few, intentional, and high +quality. A thin or misleading workflow is worse than a missing one. + +### 6.3 Preserve Shared Runtime Where Reasonable + +Shared runtime code should stay shared unless: +- correctness requires a change +- parity requires a shared abstraction improvement +- a Codex-specific branch is unavoidable and contained + +### 6.4 No Fake Parity + +If a workflow cannot cleanly support Codex yet, it should remain: +- Claude-only +- repo-local experimental +- deferred for redesign + +It should not be promoted into the default Codex surface prematurely. + +### 6.5 Source And Bundle Must Not Disagree On Shipped Defaults + +A workflow that is intended as a real default must exist consistently in: +- repo-local defaults +- bundled defaults +- discovery tests +- metadata surfaces where relevant + +## 7. Decision Summary + +| Surface | Decision | Status | +| --- | --- | --- | +| `archon-piv-loop-codex` | Keep as the reference-quality Codex workflow | Keep | +| `archon-assist-codex` | Keep as the general Codex assist lane | Keep | +| `archon-feature-development-codex` | Remove from the default surface for now; rebuild later only if it becomes a real Codex-native workflow | Remove / rebuild later | +| `archon-workflow-builder` | Leave shared/original workflow alone for now; do not treat it as Codex-safe | Defer | +| `archon-workflow-builder-codex` | Design as a separate future workflow, not a patch on the current builder | Future work | +| Codex capability crosswalk doc | Create as supporting reference documentation | Planned | +| Bundled-vs-repo default parity rules | Tighten and test | Planned | + +## 8. Workflow-Specific Decisions + +### 8.1 `archon-piv-loop-codex` + +`.archon/workflows/defaults/archon-piv-loop-codex.yaml` is currently the +strongest Codex-native workflow in the repo. + +Why it stays: +- it is meaningfully adapted for Codex behavior rather than just renamed +- it has explicit loop discipline and operator guidance +- it is already treated as part of the Codex surface +- it is useful as the quality benchmark for future Codex workflows + +Design role: +- reference implementation +- quality bar for future Codex workflow adaptation +- baseline operator experience target + +### 8.2 `archon-assist-codex` + +`.archon/workflows/defaults/archon-assist-codex.yaml` remains the default Codex +assist workflow. + +Why it stays: +- it serves a real routing purpose +- it already has a Codex-specific command surface via + `.archon/commands/defaults/archon-assist-codex.md` +- it is useful as the general entry lane for Codex users + +Constraint: +- it should not become the catch-all substitute for every Codex workflow need +- more specialized Codex workflows should not be forced through assist-centric + routing forever + +### 8.3 `archon-feature-development-codex` + +The provisional `archon-feature-development-codex` workflow should be removed +from the default surface in its current form. + +Why it should be removed: +- it is currently too thin to justify first-class default status +- it does not yet show the same level of Codex-specific adaptation as the PIV + loop +- it creates the impression of feature-development parity without earning it +- it is currently repo-local only rather than a real shipped default + +Future path: +- rebuild from scratch later if the fork needs a real Codex-native + feature-development lane +- reintroduce only after prompt quality, operator guidance, runtime fit, and + bundling/testing all meet the Codex default bar + +### 8.4 `archon-workflow-builder` + +`.archon/workflows/defaults/archon-workflow-builder.yaml` should remain +untouched for now and should not be presented as Codex-safe. + +Why: +- it is shared/original behavior +- it currently carries Claude-oriented assumptions +- forcing mixed-provider pseudo-parity here would create confusion and risk + +Future path: +- create a dedicated `archon-workflow-builder-codex` only when there is a clear + Codex-safe design +- optimize it specifically for Codex-supported workflow authoring, validation, + and operator use + +## 9. Shipped Asset Policy + +This fork needs a stricter distinction between four classes of workflow +surface: + +| Class | Meaning | Allowed Visibility | +| --- | --- | --- | +| Shipped default | Supported, bundled, tested, operator-ready | CLI, UI, docs, bundle | +| Repo-local experimental | Present in repo for development or evaluation, not yet shipped | repo only | +| Deferred / Claude-only | Intentionally not for Codex yet | docs only | +| Misleading pseudo-parity | Looks supported but is not actually ready | not allowed | + +A workflow must satisfy all of the following before it is considered a shipped +Codex default: +- genuinely Codex-adapted prompt and operator behavior +- valid against Codex workflow constraints +- bundled in shipped defaults if intended as a default +- covered by basic discovery and asset-parity tests +- described honestly in docs and metadata +- not dependent on unsupported Claude-only fields or assumptions + +## 10. Codex Workflow Adaptation Checklist + +Any future Codex-specific workflow must pass this checklist before being added +to the default surface. + +### 10.1 Routing And Identity + +- Is the provider explicit? +- Is the workflow name honest and specific? +- Does routing send the user to this workflow for the right class of task? +- Is the workflow distinguishable from assist-only routing? + +### 10.2 Prompt And Operator Quality + +- Is the prompt written for Codex behavior rather than copied from Claude? +- Are stop/continue semantics explicit? +- Are validation and iteration expectations scoped and concrete? +- Is the operator guidance at least parity quality with the Claude/default + equivalent? + +### 10.3 Runtime Fit + +- Does it avoid unsupported or ignored Codex workflow fields? +- Does it avoid fake support for node capabilities that Archon does not expose + on Codex? +- Are tool, sandbox, network, and reasoning assumptions aligned with actual + Codex runtime behavior? + +### 10.4 Shipped Asset Completeness + +- Is the workflow present in repo defaults if intended? +- Is it present in bundled defaults if intended? +- Is any required command or supporting doc present and discoverable? +- Are repo-local-only docs clearly treated as repo-local support material, not + shipped runtime assets? + +### 10.5 Testing And Observability + +- Is there at least one test or validation assertion proving it is + discoverable? +- If bundled, is bundled inclusion tested? +- Are key routing assumptions covered? +- Does the runtime produce enough operator-visible evidence to debug failures? + +### 10.6 Parity Honesty + +- Is parity real, degraded, or intentionally absent? +- If degraded, is the limitation documented clearly? +- If a feature is unsupported, is the workflow redesigned instead of awkwardly + translated? + +## 11. Capability Crosswalk Policy + +Future parity work must distinguish between: +- currently implemented Archon Codex support +- possible Codex SDK analogue +- unsupported in current Archon workflow/runtime surface +- intentionally left Claude-only + +This fork should not claim parity based on theoretical analogue alone. + +### Current Crosswalk Policy + +- reasoning-effort style controls may map cleanly if already exposed through + Codex runtime wiring +- sandbox and network controls may map cleanly if already exposed and validated +- system-prompt or instruction-lane parity must be proven in Archon's actual + Codex integration before being claimed +- hooks, per-node controls, MCP shape, and similar workflow-node capabilities + must be treated cautiously and documented as unsupported until verified + +Supporting document planned: +- a focused reference doc comparing Claude-oriented runtime/workflow fields + against Codex runtime/workflow equivalents or gaps + +That reference should drive future implementation decisions, not speculative +assumptions. + +## 12. Repo-Local vs Bundled Rules + +The fork must clearly separate source-checkout convenience from bundled product +truth. + +```mermaid +flowchart LR + A["Repo defaults"] --> B["Source discovery"] + C["Bundled imports"] --> D["Binary defaults"] + B --> E["Local source truth"] + D --> F["Shipped product truth"] +``` + +Rules: +- if a workflow is meant to be a true default, repo discovery and bundled + imports must agree +- if a workflow is experimental, it must not be described as a real shipped + default +- bundled asset tests must fail when intended defaults drift from the bundle + set +- repo-local supporting docs may remain repo-only if they are not needed in + bundled runtime behavior + +Implication: +- repo-only README material such as + `.archon/workflows/defaults/archon-piv-loop-codex.README.md` is acceptable + when it is clearly operator support documentation rather than a bundled + runtime dependency + +## 13. Routing Target State + +The desired routing model for this fork is: + +- `archon-assist-codex` for general Codex assistance +- `archon-piv-loop-codex` for iterative Codex-native implementation and + validation loops where that lane fits +- no `archon-feature-development-codex` until a real Codex-native + implementation exists +- no claim that `archon-workflow-builder` is Codex-safe +- future specialized Codex workflows added only after passing the adaptation + checklist + +This keeps the visible Codex surface smaller, clearer, and more trustworthy. + +## 14. Planned Follow-On Documents + +This design doc should be followed by small, scoped implementation documents +rather than one large execution plan. + +### 14.1 Supporting Reference Doc + +Codex vs Claude workflow/runtime capability crosswalk: +- actual implemented Archon support +- likely analogues worth investigating +- unsupported fields +- redesign-required areas + +### 14.2 Implementation PRD A + +Codex workflow surface cleanup: +- align visible default surfaces and metadata to the actual Codex lane set +- tighten bundle/default parity expectations +- remove thin pseudo-parity from default routing + +### 14.3 Implementation PRD B + +Codex-native feature-development workflow v2: +- design from scratch +- define prompt structure, iteration contract, validation scope, and operator + guidance +- benchmark against `archon-piv-loop-codex` + +### 14.4 Implementation PRD C + +Codex workflow-builder variant: +- separate workflow +- explicitly Codex-safe authoring and guidance contract +- no mixed-provider ambiguity + +## 15. Recommended Implementation Sequence + +1. Tighten shipped-vs-experimental asset rules and bundled parity checks. +2. Write the Codex-vs-Claude capability crosswalk reference. +3. Decide whether a Codex-native feature-development lane is actually needed. +4. If needed, design and implement it from scratch. +5. Only after that, design a Codex-safe workflow-builder variant. + +This sequence prioritizes trustworthiness and clarity before expansion. + +## 16. Risks + +| Risk | Why it matters | Mitigation | +| --- | --- | --- | +| Over-claiming Codex parity | Creates operator confusion and brittle workflows | Keep unsupported areas explicit | +| Bundle drift | Source behavior differs from shipped behavior | Add asset parity rules and tests | +| Over-expanding Codex defaults too early | Increases maintenance and pseudo-parity risk | Keep the default set intentionally small | +| Shared-runtime churn | Could destabilize original/cloud behavior | Prefer contained, justified changes only | +| Rebuilding too soon without a capability crosswalk | Risks repeating thin adaptation | Write the crosswalk doc first | + +## 17. Final Position + +This fork should be Codex-first by being intentionally narrower and more +truthful, not by mirroring every Claude/default surface immediately. + +Near-term Codex-first means: +- keep the strong Codex workflows +- remove the thin one +- defer the builder +- document the real capability boundary +- add new Codex workflows only when they are genuinely adapted and + operator-ready + +That is the path to a first-class Codex fork without degrading the +original/cloud implementation. From 84bf6ce65fad3d78a1c8cae9603a93e056325a6e Mon Sep 17 00:00:00 2001 From: matzls Date: Mon, 13 Apr 2026 20:35:42 +0200 Subject: [PATCH 18/21] docs(archon): tighten Codex assist workflow guidance Refine the Archon Codex skill and assist command so substantial implementation work routes to the Codex PIV lane, and add explicit worktree-proof/readback guardrails for assist-mode edits. Co-authored-by: Codex --- .agents/skills/archon/SKILL.md | 8 +++++--- .archon/commands/defaults/archon-assist-codex.md | 10 ++++++++++ .archon/workflows/defaults/archon-assist-codex.yaml | 3 ++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/.agents/skills/archon/SKILL.md b/.agents/skills/archon/SKILL.md index 1c76f1b086..e3616deb38 100644 --- a/.agents/skills/archon/SKILL.md +++ b/.agents/skills/archon/SKILL.md @@ -93,9 +93,11 @@ Rules: 1. Use `--branch` unless the user explicitly wants `--no-worktree`. 2. Use descriptive branch names, for example `assist/codex-readme` or `piv/codex-auth-refactor`. -3. For read-only questions or exploration, `--no-worktree` is acceptable. -4. Prefer one Archon workflow per command rather than combining unrelated tasks. -5. Treat Archon workflows as long-running jobs. Keep the run ID, working path, +3. For substantial implementation work, prefer `archon-piv-loop-codex` + over `archon-assist-codex`. +4. For read-only questions or exploration, `--no-worktree` is acceptable. +5. Prefer one Archon workflow per command rather than combining unrelated tasks. +6. Treat Archon workflows as long-running jobs. Keep the run ID, working path, and current status available for follow-up checks instead of assuming the launch command alone is the full observability surface. diff --git a/.archon/commands/defaults/archon-assist-codex.md b/.archon/commands/defaults/archon-assist-codex.md index db101917a4..03976bf777 100644 --- a/.archon/commands/defaults/archon-assist-codex.md +++ b/.archon/commands/defaults/archon-assist-codex.md @@ -16,6 +16,9 @@ workflow. 1. **Understand the request** - Identify whether this is a question, debugging task, repo exploration, a one-off change, or a CI/problem investigation. + - If the request is substantial multi-file implementation work that should + end in a PR, stop and route to `archon-piv-loop-codex` instead of + continuing in assist mode. 2. **Ground yourself in the repo** - Search the codebase, read the relevant files, and understand the current implementation before acting. 3. **Read repo guidance explicitly when needed** @@ -25,6 +28,13 @@ workflow. - Do not assume `CLAUDE.md` was automatically loaded by Codex. 4. **Use Codex capabilities directly** - Read and edit files, run commands, inspect git state, and validate relevant changes. + - If you are going to write files in assist mode, prove the assigned + worktree first with `pwd`, `git rev-parse --show-toplevel`, and + `git branch --show-current`. + - If you claim file changes, prove they landed in the current worktree with + `git status --short` or `git diff --name-only` before closing out. + - If the repo is clean after claimed edits, treat that as a workflow/path + mismatch and report it explicitly instead of claiming success. 5. **Call out routing gaps** - If this should have been a narrower Codex workflow, mention: "Note: Using archon-assist-codex. Consider creating or using a more specific diff --git a/.archon/workflows/defaults/archon-assist-codex.yaml b/.archon/workflows/defaults/archon-assist-codex.yaml index e8d3ef59f8..dbce914c3e 100644 --- a/.archon/workflows/defaults/archon-assist-codex.yaml +++ b/.archon/workflows/defaults/archon-assist-codex.yaml @@ -5,7 +5,8 @@ description: | "use archon codex", "general help codex", "codex workflow help". Handles: Questions, debugging, exploration, one-off tasks, explanations, CI failures, general help. Capability: Full Codex agent session with file, shell, git, and network access as configured by Archon. - NOT for: Claude-tuned assist mode (use archon-assist) or guided Codex PIV work (use archon-piv-loop-codex). + NOT for: Claude-tuned assist mode (use archon-assist), or guided Codex PIV work + (use archon-piv-loop-codex). Note: Will inform user when Codex assist mode is used for tracking. provider: codex From 5d88abf6e4193ef69b5f510668a4f2746cd2c59e Mon Sep 17 00:00:00 2001 From: matzls Date: Tue, 14 Apr 2026 12:56:23 +0200 Subject: [PATCH 19/21] docs(archon): expand Codex skill parity guidance Add a full Codex-first operator and authoring surface for the Archon skill, including workflow monitoring, debugging, repo init, command authoring, DAG authoring, CLI references, configuration guidance, and a Codex capability crosswalk. Correct the documented Codex parity boundaries so loop model/provider overrides are described accurately and workflow-level Codex tuning fields are called out as parsed but not runtime-effective per workflow in the current executor. Validation: - git diff --check - archon workflow list --json Co-authored-by: Codex --- .agents/skills/archon/SKILL.md | 123 +++++++++- .../archon/examples/command-template.md | 55 +++++ .../skills/archon/examples/dag-workflow.yaml | 80 +++++++ .../archon/references/authoring-commands.md | 118 +++++++++ .../skills/archon/references/cli-commands.md | 158 ++++++++++++ .../references/codex-capability-crosswalk.md | 132 +++++++++++ .../skills/archon/references/configuration.md | 174 ++++++++++++++ .../references/interactive-workflows.md | 38 ++- .../skills/archon/references/log-debugging.md | 112 +++++++-- .../skills/archon/references/monitoring.md | 32 +++ .agents/skills/archon/references/repo-init.md | 90 +++++++ .agents/skills/archon/references/variables.md | 70 ++++++ .../skills/archon/references/workflow-dag.md | 224 ++++++++++++++++++ .../commands/defaults/archon-assist-codex.md | 13 +- .../defaults/archon-assist-codex.yaml | 4 +- 15 files changed, 1389 insertions(+), 34 deletions(-) create mode 100644 .agents/skills/archon/examples/command-template.md create mode 100644 .agents/skills/archon/examples/dag-workflow.yaml create mode 100644 .agents/skills/archon/references/authoring-commands.md create mode 100644 .agents/skills/archon/references/cli-commands.md create mode 100644 .agents/skills/archon/references/codex-capability-crosswalk.md create mode 100644 .agents/skills/archon/references/configuration.md create mode 100644 .agents/skills/archon/references/repo-init.md create mode 100644 .agents/skills/archon/references/variables.md create mode 100644 .agents/skills/archon/references/workflow-dag.md diff --git a/.agents/skills/archon/SKILL.md b/.agents/skills/archon/SKILL.md index e3616deb38..628071bfcd 100644 --- a/.agents/skills/archon/SKILL.md +++ b/.agents/skills/archon/SKILL.md @@ -1,14 +1,18 @@ --- name: archon description: | - Use when the user wants Codex to run or monitor Archon workflows, or when a task - should be delegated from Codex into an Archon workflow instead of being handled + Use when the user wants Codex to run or monitor Archon workflows, initialize + Archon in a repo, create or edit Archon commands/workflows, inspect Archon CLI + behavior, or customize Archon for Codex usage rather than handling the task directly in the current session. Triggers: "use archon", "run archon", "archon workflow", "archon assist", - "codex archon assist", "have archon handle this", "use archon codex". - Also use when the user wants help choosing the Codex-safe Archon workflow for a task. + "codex archon assist", "have archon handle this", "use archon codex", + "archon init", "create an archon workflow", "create an archon command", + "archon config", "archon variables", "archon cli". + Also use when the user wants help choosing the Codex-safe Archon workflow or + authoring/customization surface for a task. NOT for: Direct local implementation when the user wants Codex to do the work here - without handing off to Archon. + without handing off to Archon or without using Archon surfaces. --- # Archon For Codex @@ -17,6 +21,19 @@ Archon runs long-form workflows through its own CLI and workflow engine. In Code this skill exists to route work into the right Archon workflow and to avoid Claude-specific workflow names or assumptions. +This skill is intentionally narrower than the full Archon product surface: + +- it is Codex-first +- it covers workflow operation, debugging, and Archon customization +- it does not try to duplicate setup/install or broad platform-adapter docs + +Direct workflow routing comes first. + +- If the task clearly matches a specific Codex-safe workflow, run that workflow. +- Use `archon-assist-codex` only when no narrower Codex-safe workflow fits. +- Do not route guided implementation or interactive review loops through assist + first just to "get into Archon." + ## First Step Check the available workflows before suggesting or running one: @@ -38,6 +55,13 @@ Choose the smallest surface that matches the user's need: | monitor an active workflow | read `references/monitoring.md` | | debug a confusing, failed, or stalled run | read `references/log-debugging.md` | | relay an interactive workflow cleanly | read `references/interactive-workflows.md` | +| initialize `.archon/` in a repo | read `references/repo-init.md` | +| inspect variable substitution | read `references/variables.md` | +| create or edit Archon commands | read `references/authoring-commands.md` | +| create or edit Archon workflow YAML | read `references/workflow-dag.md` | +| inspect Archon CLI surfaces | read `references/cli-commands.md` | +| inspect or modify Archon config | read `references/configuration.md` | +| inspect Codex vs Claude capability boundaries | read `references/codex-capability-crosswalk.md` | ## Codex Naming Convention @@ -47,7 +71,7 @@ the workflow has been tuned or separated for Codex behavior. Known Codex-specific lanes in this repo: - `archon-assist-codex` for general Archon help, debugging, exploration, and - one-off questions + one-off questions when no narrower Codex-safe lane fits - `archon-piv-loop-codex` for guided Plan-Implement-Validate workflows with Codex @@ -93,14 +117,80 @@ Rules: 1. Use `--branch` unless the user explicitly wants `--no-worktree`. 2. Use descriptive branch names, for example `assist/codex-readme` or `piv/codex-auth-refactor`. -3. For substantial implementation work, prefer `archon-piv-loop-codex` - over `archon-assist-codex`. +3. For substantial implementation work, interactive refinement, or any guided + human-in-the-loop build request, prefer `archon-piv-loop-codex` over + `archon-assist-codex`. 4. For read-only questions or exploration, `--no-worktree` is acceptable. 5. Prefer one Archon workflow per command rather than combining unrelated tasks. 6. Treat Archon workflows as long-running jobs. Keep the run ID, working path, and current status available for follow-up checks instead of assuming the launch command alone is the full observability surface. +## Interactive Operator Protocol + +Use this protocol for interactive workflows such as `archon-piv-loop-codex`. + +### Launch + +1. Run the workflow directly with `archon workflow run ...`. +2. Capture: + - workflow name + - run ID + - working path + - branch name if available +3. Immediately verify the launched run with `archon workflow status --json`. + +### State Machine + +Treat Codex as the human-facing operator for the workflow run until it reaches a +terminal state. + +| Status | Action | +| --- | --- | +| `running` | keep monitoring; report only meaningful changes | +| `paused` | fetch the latest workflow output, relay it directly, wait for the user's answer | +| `completed` | report the terminal result and stop | +| `failed` | report the failure evidence and stop | + +### Post-Transition Rule + +After every `archon workflow run`, `archon workflow approve`, `archon workflow reject`, +or `archon workflow resume`: + +1. check `archon workflow status --json` +2. continue until the run is back at one of: + - `paused` + - `completed` + - `failed` + +Do not stop after recording approval or rejection alone. The control loop is not +done until the workflow either pauses again or reaches a terminal state. + +### Pause Detection Rule + +For interactive loops, treat a new human checkpoint as real only when the run is +currently `paused`. + +Track the paused fingerprint: + +- `approval.nodeId` +- `approval.iteration` +- `approval.message` + +Important nuance: + +- approval metadata can persist while the run is `running` +- do not treat `metadata.approval` by itself as proof that the loop has paused again +- workflow truth comes from the current `status`, not from stale approval metadata + +### Surface Boundaries + +- `archon workflow run ...` is the correct direct CLI surface for interactive workflows +- `archon chat ...` is single-shot orchestration, not a persistent multi-turn workflow chat +- web foreground runs can resume from natural-language replies in the same thread +- CLI `workflow approve` and `workflow reject` auto-resume the run +- `/workflow approve` is a different surface; do not assume it behaves like the CLI command + ## Monitoring Start with: @@ -132,6 +222,11 @@ When an interactive workflow pauses, do not summarize the workflow's question. Read the latest output and pass the user's answer back through the Archon approval or reject command rather than trying to continue locally. +When a paused checkpoint is tied to a mutable artifact such as a plan-review +loop, reopen the current artifact from disk before relaying any state summary. +Do not assume a previously read file path or artifact contents are still the +latest truth. + If the user explicitly wants unattended follow-up and the current Codex surface supports thread heartbeat automations, attach one to the current thread and have it report only meaningful changes: approval gates, terminal state changes, or a @@ -146,3 +241,15 @@ Read `references/monitoring.md` for the detailed monitoring contract and Do not assume Codex auto-loaded `CLAUDE.md` even if a fallback filename is configured globally. If repo conventions are load-bearing for the delegated task, read `CLAUDE.md` explicitly before recommending or running the workflow. + +For Archon customization requests, keep the boundary clear: + +- use the shared Archon authoring docs for commands, workflows, variables, and + repo initialization +- use `references/configuration.md` for repo and global Archon config changes +- use `references/codex-capability-crosswalk.md` whenever provider capability + differences are load-bearing +- do not imply that Claude-only per-node controls automatically become Codex + node features +- keep `archon chat` documented as single-shot orchestration rather than a + persistent workflow conversation diff --git a/.agents/skills/archon/examples/command-template.md b/.agents/skills/archon/examples/command-template.md new file mode 100644 index 0000000000..035f6c9cd8 --- /dev/null +++ b/.agents/skills/archon/examples/command-template.md @@ -0,0 +1,55 @@ +--- +description: Template for a Codex-safe Archon command +argument-hint: +--- + +# Command Name + +**Workflow ID**: $WORKFLOW_ID + +User request: $ARGUMENTS +Artifacts directory: $ARTIFACTS_DIR +Base branch: $BASE_BRANCH + +## Phase 1: Load + +Gather the context you actually need: + +- read any required files from the repository +- read prior artifacts from `$ARTIFACTS_DIR` if this command depends on earlier steps +- confirm the expected output before making changes + +### Phase 1 Checkpoint + +- [ ] request understood +- [ ] required inputs loaded +- [ ] expected output identified + +## Phase 2: Execute + +Perform the main task of this command. + +Keep the prompt explicit about: + +- what to inspect +- what to change or produce +- how to validate the result + +### Phase 2 Checkpoint + +- [ ] main task completed +- [ ] relevant validation run or intentionally skipped with reason + +## Phase 3: Report + +If downstream nodes need durable output, write it into `$ARTIFACTS_DIR/output.md` +with: + +- what was done +- key findings or decisions +- blockers or follow-up notes + +### Phase 3 Checkpoint + +- [ ] durable output written when needed +- [ ] summary ready for the next step or the user diff --git a/.agents/skills/archon/examples/dag-workflow.yaml b/.agents/skills/archon/examples/dag-workflow.yaml new file mode 100644 index 0000000000..56c013b83e --- /dev/null +++ b/.agents/skills/archon/examples/dag-workflow.yaml @@ -0,0 +1,80 @@ +# Example Codex-safe Archon workflow +# +# Demonstrates: +# - bash node +# - structured output +# - conditional routing +# - command node +# - loop node +# - validation node +# +# This is a reference example. Adjust nodes, dependencies, and outputs to the +# actual task rather than copying it literally. + +name: codex-smart-issue-fix +description: | + Classify a GitHub issue, route to the right investigation path, implement the + change with Codex, and summarize validation results. +provider: codex +model: gpt-5.4 + +nodes: + - id: fetch-issue + bash: | + issue_num=$(echo "$ARGUMENTS" | grep -oE '[0-9]+' | head -1) + if [ -z "$issue_num" ]; then + echo "No issue number found in: $ARGUMENTS" + exit 1 + fi + gh issue view "$issue_num" --json title,body,labels + timeout: 15000 + + - id: classify + prompt: | + Classify this issue as bug or feature and return structured output. + + $fetch-issue.output + depends_on: [fetch-issue] + output_format: + type: object + properties: + issue_type: + type: string + enum: [bug, feature] + title: + type: string + required: [issue_type, title] + + - id: investigate-bug + command: investigate-bug + depends_on: [classify] + when: "$classify.output.issue_type == 'bug'" + + - id: plan-feature + command: plan-feature + depends_on: [classify] + when: "$classify.output.issue_type == 'feature'" + + - id: implement + depends_on: [investigate-bug, plan-feature] + trigger_rule: one_success + loop: + prompt: | + Implement the next required change for: $classify.output.title + + Read the relevant artifacts from $ARTIFACTS_DIR. + When implementation and validation are complete, output: + DONE + until: DONE + max_iterations: 5 + fresh_context: false + until_bash: "bun run test" + + - id: summarize + prompt: | + Summarize the implementation and validation outcome for: + $classify.output.title + + Implementation result: + $implement.output + depends_on: [implement] diff --git a/.agents/skills/archon/references/authoring-commands.md b/.agents/skills/archon/references/authoring-commands.md new file mode 100644 index 0000000000..41bc9ed014 --- /dev/null +++ b/.agents/skills/archon/references/authoring-commands.md @@ -0,0 +1,118 @@ +# Authoring Archon Command Files For Codex + +Command files are Markdown prompt templates. They are shared Archon primitives, +not Claude-only assets. + +## File Location + +```text +.archon/commands/ +├── my-command.md +├── review-code.md +└── defaults/ + └── archon-assist-codex.md +``` + +Commands are referenced by name without the `.md` extension from workflow YAML. + +## File Format + +```markdown +--- +description: One-line description of what this command does +argument-hint: +--- + +# Command Title + +**Workflow ID**: $WORKFLOW_ID + +User request: $ARGUMENTS +Artifacts: $ARTIFACTS_DIR + +## Phase 1: Load + +[Gather the needed context] + +## Phase 2: Execute + +[Do the work] + +## Phase 3: Report + +[Summarize or write artifacts] +``` + +The full file content, including frontmatter, becomes the prompt. + +## Frontmatter Fields + +| Field | Required | Description | +| --- | --- | --- | +| `description` | recommended | Human-readable description used in listings | +| `argument-hint` | optional | Expected argument shape such as `` or `(no arguments)` | + +## Discovery And Priority + +When a workflow references `command: my-command`, Archon resolves in this order: + +1. `.archon/commands/my-command.md` +2. `.archon/commands/defaults/my-command.md` +3. bundled defaults shipped with Archon + +First match wins. + +## Variable Use + +Most common variables: + +- `$ARGUMENTS` +- `$ARTIFACTS_DIR` +- `$WORKFLOW_ID` +- `$BASE_BRANCH` + +See `variables.md` for the full reference. + +## Recommended Structure + +For non-trivial commands, keep the prompt phased: + +1. load context +2. analyze or execute +3. validate if relevant +4. report or write artifacts + +Use short checklists when they materially help the workflow stay deterministic. + +## Artifact Conventions + +If downstream nodes need the result, write it into `$ARTIFACTS_DIR` instead of +leaving it only in free-form assistant output. + +Common patterns: + +- `$ARTIFACTS_DIR/plan.md` +- `$ARTIFACTS_DIR/investigation.md` +- `$ARTIFACTS_DIR/implementation.md` +- `$ARTIFACTS_DIR/validation.md` + +## Authoring Rules For Codex + +- keep commands provider-neutral unless a prompt truly depends on provider + behavior +- do not assume Claude-only node controls such as per-node hooks or skills +- do not hardcode local-only paths when `$ARTIFACTS_DIR` is the intended output + surface +- do not assume prior conversational memory when the calling node uses fresh + context + +## Anti-Patterns + +- vague instructions that do not define expected outputs +- commands that produce no durable artifact when downstream nodes need one +- prompts that assume Claude-specific tools or behavior without saying so +- monolithic prompts with no clear responsibility + +## Example + +See `examples/command-template.md` for a Codex-safe starter template. diff --git a/.agents/skills/archon/references/cli-commands.md b/.agents/skills/archon/references/cli-commands.md new file mode 100644 index 0000000000..28d0e91ada --- /dev/null +++ b/.agents/skills/archon/references/cli-commands.md @@ -0,0 +1,158 @@ +# Archon CLI Command Reference For Codex + +Use this when the user wants the real Archon CLI surface rather than a skill +summary. + +All commands except `version` and `chat` are normally run from within a git +repository. + +## Workflow Commands + +### `archon workflow list` + +```bash +archon workflow list +archon workflow list --json +``` + +Use this first when choosing a Codex-safe workflow. + +### `archon workflow run [message]` + +Examples: + +```bash +archon workflow run archon-assist-codex --branch assist/codex-readme "Explain the current workflow surface" +archon workflow run archon-piv-loop-codex --branch piv/codex-auth "Implement auth from the approved plan" +archon workflow run my-workflow --branch feat/dark-mode "Add dark mode" +archon workflow run quick-fix --no-worktree "Fix the typo in README" +archon workflow run my-workflow --resume +``` + +Key flags: + +| Flag | Description | +| --- | --- | +| `--branch ` | create or reuse a worktree branch | +| `--from ` | choose the base branch for a new worktree | +| `--no-worktree` | run in the live checkout | +| `--resume` | resume the last failed run | +| `--cwd ` | override working directory | + +Important: + +- default behavior creates an isolated worktree automatically +- `--branch` and `--no-worktree` conflict +- `--resume` and `--branch` conflict + +### `archon workflow status` + +```bash +archon workflow status +archon workflow status --verbose +archon workflow status --json +``` + +Use `--json` as the source of truth for current workflow state. + +### `archon workflow approve` + +```bash +archon workflow approve "" +``` + +Use for paused workflows that need human feedback. The CLI approve path records +the response and resumes the run. + +### `archon workflow reject` + +```bash +archon workflow reject "" +``` + +Use for paused workflows that need rejection or rework feedback. + +### `archon workflow resume` + +```bash +archon workflow resume +``` + +Use when the run failed and should be resumed from its failure point. + +## Validation Commands + +### `archon validate workflows [name]` + +```bash +archon validate workflows +archon validate workflows my-workflow +archon validate workflows my-workflow --json +``` + +This checks workflow syntax, dependency structure, resource resolution, and +provider-compatibility warnings. + +### `archon validate commands [name]` + +```bash +archon validate commands +archon validate commands my-command +``` + +Use after creating or editing command files. + +## Isolation Commands + +### `archon isolation list` + +```bash +archon isolation list +``` + +Shows active worktree environments. + +### `archon isolation cleanup` + +```bash +archon isolation cleanup +archon isolation cleanup 14 +archon isolation cleanup --merged +``` + +## Other Commands + +### `archon complete ` + +```bash +archon complete feature-auth +archon complete feature-auth --force +``` + +Completes a branch lifecycle by removing the worktree and branch state. + +### `archon version` + +```bash +archon version +``` + +### `archon chat ` + +```bash +archon chat "What workflows are available?" +``` + +Important: + +- `archon chat` is single-shot orchestration +- it is not a persistent multi-turn workflow conversation +- interactive workflow control should stay on `archon workflow run/status/approve/reject` + +## Useful Environment Variables + +| Variable | Purpose | +| --- | --- | +| `ARCHON_HOME` | override Archon home directory | +| `LOG_LEVEL` | control Archon process log verbosity | +| `DATABASE_URL` | use PostgreSQL instead of SQLite | diff --git a/.agents/skills/archon/references/codex-capability-crosswalk.md b/.agents/skills/archon/references/codex-capability-crosswalk.md new file mode 100644 index 0000000000..3de433d8af --- /dev/null +++ b/.agents/skills/archon/references/codex-capability-crosswalk.md @@ -0,0 +1,132 @@ +# Codex Capability Crosswalk For Archon + +Use this document when the question is not "how do I write workflow YAML?" but +"what actually survives the Claude-to-Codex translation?" + +This is a capability crosswalk, not a feature-sales guide. Treat it as the +truth table for Codex-safe Archon authoring. + +## Crosswalk + +| Feature | Claude in workflow YAML | Codex in workflow YAML | Codex real surface | Meaning | +| --- | --- | --- | --- | --- | +| `provider` | supported | supported | workflow or node field | real parity | +| `model` | supported | supported | workflow or node field | real parity, including loop node provider/model overrides | +| `output_format` | supported | supported | workflow YAML | real parity with structured-output caveats | +| `retry` | supported | supported | workflow YAML | real parity except loop-node retry stays invalid | +| `hooks` | supported per-node | ignored | none | no parity | +| `mcp` | supported per-node | ignored per-node | Codex global config | global-only, not equivalent | +| `skills` | supported per-node | ignored per-node | global or repo `.agents/skills/` | global/repo discovery, not equivalent | +| `allowed_tools` | supported per-node | ignored | Codex config / MCP config | global-only, not equivalent | +| `denied_tools` | supported per-node | ignored | Codex config / MCP config | global-only, not equivalent | +| `modelReasoningEffort` | not the same field | parsed but not runtime-effective per workflow | Archon assistant config | config-only truth today | +| `webSearchMode` | not the same field | parsed but not runtime-effective per workflow | Archon assistant config | config-only truth today | +| `additionalDirectories` | not the same field | parsed but not runtime-effective per workflow | Archon assistant config | config-only truth today | + +## Feature Notes + +### `provider` and `model` + +These are real workflow controls for Codex. + +- node-level `provider` and `model` overrides are honored for normal nodes +- loop nodes also resolve and pass node-level `provider` and `model` overrides + into loop execution + +That means the parity boundary is not "loops ignore model overrides." The real +boundary is in Claude-only controls such as hooks, per-node MCP, per-node +skills, and node-level tool restrictions. + +### `output_format` + +This is real Codex parity, not a degraded fallback. + +Archon maps workflow `output_format:` to the Codex client's structured-output +path. Downstream field references such as `$node.output.field` are valid when +Codex returns structured output as expected. + +Operational nuance: + +- if Codex returns non-JSON output, Archon warns that field-based downstream + conditions may misbehave +- this is still a supported feature, but not a promise that every prompt will + always produce clean structured output + +### `retry` + +This remains a shared workflow feature for command, prompt, and bash nodes. +Loop-node retry is still not valid. + +### `modelReasoningEffort`, `webSearchMode`, and `additionalDirectories` + +These fields matter for Codex, but not in the same way the YAML surface may +suggest. + +- the workflow loader parses them +- current Codex execution still pulls these values from + `config.assistants.codex.*` +- do not treat workflow YAML as an isolated per-workflow tuning surface for + these fields unless the runtime is changed to honor them + +For current Codex behavior, use Archon config: + +- `assistants.codex.modelReasoningEffort` +- `assistants.codex.webSearchMode` +- `assistants.codex.additionalDirectories` + +### `hooks` + +There is no Codex node-level equivalent in Archon. If a workflow depends on +hooks for guardrails or tool interception, do not call it Codex-safe. + +### `mcp` + +Important distinction: + +- Claude: `mcp:` is a node-level workflow surface +- Codex: Archon ignores `mcp:` on a node + +For Codex, MCP belongs in Codex configuration rather than workflow YAML. That +means the workflow cannot assume one node has one MCP setup and another node has +a different one in the same fine-grained way. + +### `skills` + +Important distinction: + +- Claude: `skills:` is a node-level workflow surface +- Codex: Archon ignores `skills:` on a node + +Codex skill discovery is global or repo-local, not a workflow node isolation +mechanism. + +### `allowed_tools` and `denied_tools` + +These are Claude node-level controls. Archon warns and ignores them on Codex. + +If the desired effect is tool restriction for Codex, that belongs in Codex's +own configuration surface, not in Archon workflow YAML as a per-node contract. + +## Codex Global Surfaces That Are Real + +These are real Codex-side configuration surfaces even though they are not node +parity features: + +- global or repo skill discovery under `.agents/skills/` +- Codex MCP configuration in Codex config +- Codex assistant defaults in Archon config: + - `assistants.codex.model` + - `assistants.codex.modelReasoningEffort` + - `assistants.codex.webSearchMode` + - `assistants.codex.additionalDirectories` + - `assistants.codex.codexBinaryPath` + +## Authoring Rules + +1. If a workflow depends on per-node hooks, per-node MCP, per-node skills, or + per-node tool restrictions, do not present it as Codex-safe. +2. If Codex has a global-only equivalent, document that boundary explicitly. +3. Prefer a dedicated `-codex` workflow when the original workflow depends on + Claude-oriented node controls. +4. Treat this crosswalk as code-backed contract, not as an aspirational parity + promise. diff --git a/.agents/skills/archon/references/configuration.md b/.agents/skills/archon/references/configuration.md new file mode 100644 index 0000000000..02e0e6861f --- /dev/null +++ b/.agents/skills/archon/references/configuration.md @@ -0,0 +1,174 @@ +# Archon Configuration Guide For Codex + +Use this guide when the user wants to view, explain, create, or modify Archon +configuration for Codex-first usage. + +## Configuration Levels + +Archon has two config levels: + +- global config: `~/.archon/config.yaml` +- repo config: `/.archon/config.yaml` + +Precedence is: + +1. environment variables +2. repo config +3. global config +4. built-in defaults + +## When To Use Which Level + +Use repo config when the change is project-specific: + +- prefer Codex only in one repo +- set one repo's base branch +- set one repo's `copyFiles` +- disable bundled defaults for one repo +- add repo-specific Codex defaults + +Use global config when the change is user-wide: + +- prefer Codex by default across repos +- set default Codex model or reasoning effort +- set global Codex `webSearchMode` +- set global `additionalDirectories` +- set global streaming or bot preferences + +## Reading Current Config + +Global config: + +```bash +sed -n '1,220p' ~/.archon/config.yaml +``` + +Repo config: + +```bash +sed -n '1,220p' .archon/config.yaml +``` + +If a file does not exist: + +- global config is auto-created on first Archon run +- repo config is optional; Archon falls back to defaults + +## Most Important Codex Settings + +### Global config example + +```yaml +defaultAssistant: codex + +assistants: + codex: + model: gpt-5.4 + modelReasoningEffort: medium + webSearchMode: live + additionalDirectories: + - /absolute/path/to/other/repo + codexBinaryPath: /absolute/path/to/codex +``` + +### Repo config example + +```yaml +assistant: codex + +assistants: + codex: + model: gpt-5.4 + modelReasoningEffort: high + webSearchMode: live + +worktree: + baseBranch: main + copyFiles: + - .env + - .env.local + +commands: + folder: .archon/commands + +defaults: + loadDefaultCommands: true + loadDefaultWorkflows: true +``` + +## Key Fields + +### Global config fields + +| Field | Meaning | +| --- | --- | +| `defaultAssistant` | default assistant when a repo does not override it | +| `assistants.codex.model` | default Codex model | +| `assistants.codex.modelReasoningEffort` | default Codex reasoning effort | +| `assistants.codex.webSearchMode` | default Codex web search mode | +| `assistants.codex.additionalDirectories` | extra writable directories for Codex sessions | +| `assistants.codex.codexBinaryPath` | explicit Codex CLI path, mainly relevant in compiled Archon builds | +| `botName` | bot display name | +| `streaming.*` | platform response mode | +| `concurrency.maxConversations` | max parallel conversations | + +### Repo config fields + +| Field | Meaning | +| --- | --- | +| `assistant` | repo-level assistant override | +| `assistants.codex.*` | repo-level Codex defaults | +| `commands.folder` | extra command folder search path | +| `worktree.baseBranch` | base branch used for worktree creation and `$BASE_BRANCH` | +| `worktree.copyFiles` | ignored files copied into new worktrees | +| `defaults.loadDefaultCommands` | whether bundled commands are loaded at runtime | +| `defaults.loadDefaultWorkflows` | whether bundled workflows are loaded at runtime | +| `docs.path` | repo docs path used by workflow surfaces that care about docs | +| `env` | per-project env vars merged into workflow execution config; most relevant when a workflow surface consumes `config.envVars` | + +## Editing Guidance + +When modifying config: + +- preserve unrelated keys +- keep repo config focused on non-default behavior +- use repo config for project-specific overrides instead of widening the global + config unnecessarily +- do not confuse `assistant` with `defaultAssistant` + - `assistant` is repo-level + - `defaultAssistant` is global + +## Environment Variable Overrides + +These override config files: + +| Env Var | Overrides | +| --- | --- | +| `DEFAULT_AI_ASSISTANT` | assistant preference | +| `BOT_DISPLAY_NAME` | `botName` | +| `TELEGRAM_STREAMING_MODE` | `streaming.telegram` | +| `DISCORD_STREAMING_MODE` | `streaming.discord` | +| `SLACK_STREAMING_MODE` | `streaming.slack` | +| `MAX_CONCURRENT_CONVERSATIONS` | `concurrency.maxConversations` | +| `ARCHON_HOME` | Archon base path | + +## Operational Notes For Codex + +- Codex MCP configuration is not controlled by Archon workflow `mcp:` node + fields +- Codex skill discovery is not controlled by Archon workflow `skills:` node + fields +- if the user wants those behaviors, route them through Codex config and the + Codex capability crosswalk instead of pretending repo config creates node + parity + +## Validation + +After a config change, verify the expected behavior with readback: + +```bash +archon workflow list --json +``` + +For repo-specific changes, read the effective repo config again and confirm the +intended keys are present with the expected values. diff --git a/.agents/skills/archon/references/interactive-workflows.md b/.agents/skills/archon/references/interactive-workflows.md index da5cbda8e6..3d0c97b3a1 100644 --- a/.agents/skills/archon/references/interactive-workflows.md +++ b/.agents/skills/archon/references/interactive-workflows.md @@ -15,16 +15,21 @@ Be a transparent relay. - show the workflow's latest question or summary directly - do not rewrite or "improve" the workflow's wording - pass the user's answer back as directly as possible +- keep operating the run until it pauses again or reaches a terminal state ## Basic Loop -1. Launch the workflow and capture the run ID. -2. Monitor with `archon workflow status --json`. +1. Launch the workflow and capture: + - run ID + - workflow name + - working path +2. Verify the launched run with `archon workflow status --json`. 3. When the run becomes `paused`, read the latest workflow output. 4. Relay that output directly to the user. 5. When the user answers, resume with `archon workflow approve` or `archon workflow reject`. -6. Repeat until the run reaches a terminal state. +6. Immediately re-check `archon workflow status --json`. +7. Repeat until the run reaches `paused`, `completed`, or `failed`. ## Commands @@ -44,8 +49,21 @@ When the workflow is paused: - pass their response through verbatim unless a safety or formatting issue requires intervention +Treat the paused fingerprint as: + +- `approval.nodeId` +- `approval.iteration` +- `approval.message` + +If the workflow pauses again with a new fingerprint, that is a new human +checkpoint even if the wording looks similar. + Do not replace the workflow's structured questions with your own summary. +If the paused node is reviewing a mutable artifact, reopen the current artifact +from disk before you speak for the workflow. For example, a plan-review pause +should use the latest saved plan rather than a stale earlier read. + ## When Still Running Long research or implementation nodes can stay `running` for a while without @@ -56,6 +74,13 @@ needing user input. - if activity stops for the stall window, flag a possible stall and say what evidence stopped moving +Important nuance: + +- interactive-loop approval metadata can remain present while the run is + `running` +- that does not mean the workflow is paused again +- only treat the loop as back when the run status itself is `paused` + ## Where To Read The Latest Output Use the per-run JSONL when status alone is not enough: @@ -66,3 +91,10 @@ tail -n 40 "" ``` Read `log-debugging.md` when you need the full trace. + +## Surface Boundaries + +- `archon workflow run ...` is the direct CLI surface for this interaction model +- `archon chat ...` is not a persistent multi-turn workflow conversation +- web foreground workflows can resume from natural-language replies in the same thread +- CLI `workflow approve` and `workflow reject` resume immediately after recording the decision diff --git a/.agents/skills/archon/references/log-debugging.md b/.agents/skills/archon/references/log-debugging.md index ae9f62269a..95c2cf5a07 100644 --- a/.agents/skills/archon/references/log-debugging.md +++ b/.agents/skills/archon/references/log-debugging.md @@ -1,19 +1,21 @@ # Archon Log Debugging For Codex -Use this guide when the main job is understanding what Archon just did, why a -run paused, why it failed, or whether it is stalled. +Use this guide when the main job is understanding what Archon just did during a +workflow run, why it failed, why it paused, or where the useful evidence lives. ## Three Evidence Layers +Archon exposes overlapping but non-interchangeable evidence surfaces. + ### 1. Status and run details -Use first when you need the current high-level state. +Use this first for the current high-level truth. - `archon workflow status --json` - `archon workflow status --verbose` - web or API run details when available -This is the fastest way to confirm: +Best for: - run ID - current status @@ -23,7 +25,7 @@ This is the fastest way to confirm: ### 2. Per-run workflow JSONL -Use when status is ambiguous or you need the actual workflow trace. +Use this when status is ambiguous or when you need the full workflow trace. Default location: @@ -36,20 +38,12 @@ Best for: - assistant output - tool calls - node boundaries +- validation events - workflow pause or failure context -Representative commands: - -```bash -find "${ARCHON_HOME:-$HOME/.archon}/workspaces" -name ".jsonl" 2>/dev/null -tail -n 40 "$LOG_FILE" -rg '"type":"workflow_error"|"type":"node_error"' "$LOG_FILE" -rg '"type":"assistant"' "$LOG_FILE" | tail -n 5 -``` - ### 3. Runtime process logs -Use only when the issue looks like Archon runtime behavior rather than workflow +Use this when the issue looks like Archon runtime behavior rather than workflow logic. Examples: @@ -73,8 +67,88 @@ Best for: 3. per-run JSONL 4. runtime logs with `LOG_LEVEL=debug` -## Important Note +## Finding The Run + +For active runs: + +```bash +archon workflow status +archon workflow status --verbose +archon workflow status --json +``` + +If you already know the run ID: + +```bash +find "${ARCHON_HOME:-$HOME/.archon}/workspaces" -name ".jsonl" 2>/dev/null +``` + +## Reading The JSONL + +Set a shell variable first: + +```bash +LOG_FILE="${ARCHON_HOME:-$HOME/.archon}/workspaces///logs/.jsonl" +``` + +Common reads: + +```bash +tail -n 40 "$LOG_FILE" +rg '"type":"workflow_error"|"type":"node_error"' "$LOG_FILE" +rg '"type":"assistant"' "$LOG_FILE" | tail -n 5 +rg '"type":"validation"' "$LOG_FILE" +``` + +## Common Event Families + +Representative JSONL event types include: + +- `workflow_start` +- `workflow_complete` +- `workflow_error` +- `assistant` +- `tool` +- `validation` +- `node_start` +- `node_complete` +- `node_skipped` +- `node_error` + +Use them as breadcrumbs rather than assuming the UI event names will match +exactly. + +## Filtering Patterns + +Assistant messages: + +```bash +rg '"type":"assistant"' "$LOG_FILE" +``` + +Tool calls: + +```bash +rg '"type":"tool"' "$LOG_FILE" +``` + +Skipped nodes: + +```bash +rg '"type":"node_skipped"' "$LOG_FILE" +``` + +If `jq` is available: + +```bash +jq -r 'select(.type=="assistant") | .content' "$LOG_FILE" | tail -n 1 +jq -c 'select(.type=="node_error") | {ts, step, error}' "$LOG_FILE" +``` + +## Interpretation Rules -Status and UI/API events are intentionally lean. They are good for current -state, but not a replacement for the JSONL trace when you need the workflow's -actual assistant or tool history. +- status and UI/API surfaces are intentionally lean +- the JSONL trace is the authoritative assistant and tool history for one run +- current pause state should still come from `archon workflow status --json` +- use runtime logs only when the issue looks like Archon itself rather than a + workflow node decision diff --git a/.agents/skills/archon/references/monitoring.md b/.agents/skills/archon/references/monitoring.md index a92c70afbf..f6110f9695 100644 --- a/.agents/skills/archon/references/monitoring.md +++ b/.agents/skills/archon/references/monitoring.md @@ -13,6 +13,9 @@ archon workflow status --json This command currently includes `last_activity_at`, which makes it usable as the first stall-detection surface. +Treat this command as the source of truth for current workflow state. Do not +infer current pause state from old terminal output alone. + ## Default Cadence Use this cadence during active live monitoring: @@ -57,6 +60,35 @@ Treat `paused` as action-required. - relay it directly - wait for the user response +Track the paused fingerprint: + +- `approval.nodeId` +- `approval.iteration` +- `approval.message` + +If any of those change on a later `paused` check, the loop has returned with a +new checkpoint. + +Important nuance: + +- interactive-loop approval metadata can persist after approval while the run is + back in `running` +- do not treat persisted `metadata.approval` as proof of a fresh pause +- current `status` wins + +### After approve or reject + +After every approval, rejection, or manual resume: + +1. re-run `archon workflow status --json` +2. continue checking until the run reaches: + - `paused` + - `completed` + - `failed` + +Recording approval is not the end of the operator loop. The next required state +change must be observed. + ### Possible stall Default heuristic: diff --git a/.agents/skills/archon/references/repo-init.md b/.agents/skills/archon/references/repo-init.md new file mode 100644 index 0000000000..081d17b392 --- /dev/null +++ b/.agents/skills/archon/references/repo-init.md @@ -0,0 +1,90 @@ +# Initializing Archon In A Repository For Codex + +Use this when the task is to add `.archon/` to a repository so Codex can create +or customize Archon commands and workflows there. + +## Directory Structure + +Create this in the repository root: + +```text +.archon/ +├── commands/ +├── workflows/ +├── mcp/ # optional; mainly relevant for Claude node-local MCP config +└── config.yaml # optional +``` + +Minimum setup: + +```bash +mkdir -p .archon/commands .archon/workflows +``` + +## Minimal Repo Config + +Create `.archon/config.yaml` only when the repo needs non-default behavior: + +```yaml +assistant: codex + +worktree: + baseBranch: main + copyFiles: + - .env + - .env.local + +defaults: + loadDefaultCommands: true + loadDefaultWorkflows: true +``` + +Notes: + +- `assistant: codex` makes this repo prefer Codex under Archon +- `worktree.copyFiles` is only needed when worktrees need copied local files +- bundled defaults do not need to be copied into the repo to be available + +## Bundled Default Behavior + +Archon ships bundled workflows and commands. Repo-local files override bundled +files with the same name. + +- `archon workflow list` shows discovered workflows +- repo `.archon/workflows/*` overrides bundled workflows with the same name +- repo `.archon/commands/*` overrides bundled commands with the same name + +## Optional MCP Directory + +Keep `.archon/mcp/` optional in Codex-first guidance. + +Why: + +- Archon supports node-local `mcp:` for Claude workflows +- Codex does not use `mcp:` as a node-local parity surface +- Codex MCP configuration belongs in Codex config rather than in workflow YAML + +## Global Config Reminder + +Global config lives at `~/.archon/config.yaml`. + +If the goal is a Codex-first Archon environment more broadly, that file can use: + +```yaml +defaultAssistant: codex +assistants: + codex: + model: gpt-5.4 + modelReasoningEffort: medium + webSearchMode: live +``` + +## Verification + +After initialization: + +```bash +archon workflow list --json +``` + +The repo should now expose bundled workflows plus any repo-local custom ones. diff --git a/.agents/skills/archon/references/variables.md b/.agents/skills/archon/references/variables.md new file mode 100644 index 0000000000..f7e8d737b4 --- /dev/null +++ b/.agents/skills/archon/references/variables.md @@ -0,0 +1,70 @@ +# Variable Substitution Reference For Codex + +Variables are placeholders in command files and workflow prompts. Archon +replaces them at execution time. + +## Variable Table + +| Variable | Scope | Description | +| --- | --- | --- | +| `$ARGUMENTS` | all modes | The original user message passed to the workflow | +| `$USER_MESSAGE` | all modes | Alias for `$ARGUMENTS` | +| `$WORKFLOW_ID` | all modes | Unique workflow run ID | +| `$ARTIFACTS_DIR` | all modes | Pre-created artifact directory for the current run | +| `$BASE_BRANCH` | all modes | Base branch name, auto-detected or configured via `worktree.baseBranch` | +| `$DOCS_DIR` | all modes | Repo docs directory, from `docs.path` or default `docs/` | +| `$CONTEXT` | all modes | GitHub issue or PR context when the platform provides it | +| `$EXTERNAL_CONTEXT` | all modes | Alias for `$CONTEXT` | +| `$ISSUE_CONTEXT` | all modes | Alias for `$CONTEXT` | +| `$LOOP_USER_INPUT` | interactive loop resumes | User feedback injected on the first resumed iteration, empty otherwise | +| `$REJECTION_REASON` | approval `on_reject` prompts | Reviewer feedback captured when an approval node rejects and re-prompts | +| `$nodeId.output` | DAG only | Full output from a completed upstream node | +| `$nodeId.output.field` | DAG only | JSON field access on structured output from an upstream node | + +## Where Variables Are Substituted + +- command files in `.archon/commands/*.md` +- inline `prompt:` fields +- `loop.prompt:` fields +- approval `on_reject.prompt` fields +- `bash:` scripts in DAG nodes + +In `bash:` nodes, `$nodeId.output` values are automatically shell-quoted before +injection. + +## Substitution Order + +1. standard workflow variables such as `$WORKFLOW_ID`, `$ARGUMENTS`, + `$ARTIFACTS_DIR`, `$BASE_BRANCH`, `$DOCS_DIR`, and `$CONTEXT` +2. node output references such as `$nodeId.output` and `$nodeId.output.field` + +## Structured Output Notes + +`$nodeId.output.field` only works when the upstream node produced structured +output through `output_format:`. + +For Codex, `output_format:` is a real supported workflow surface. It maps to the +Codex client's structured-output path rather than being a Claude-only feature. + +## Context Auto-Append + +If a prompt template does not mention `$CONTEXT`, `$EXTERNAL_CONTEXT`, or +`$ISSUE_CONTEXT` anywhere but Archon has external context available, Archon may +append that context automatically after a separator. + +## Literal Dollar Signs + +Use `\\$` to produce a literal `$` without substitution. + +## Unknown References + +Unknown node references resolve to an empty string with a warning in the logs. +Do not depend on missing-node references as control flow. + +## Interactive Workflow Notes + +- `$LOOP_USER_INPUT` is only populated when an interactive loop resumes after an + approval round-trip +- `$REJECTION_REASON` is only populated for an approval node's `on_reject` + branch +- outside those contexts, both variables resolve to an empty string diff --git a/.agents/skills/archon/references/workflow-dag.md b/.agents/skills/archon/references/workflow-dag.md new file mode 100644 index 0000000000..07ff31944f --- /dev/null +++ b/.agents/skills/archon/references/workflow-dag.md @@ -0,0 +1,224 @@ +# Workflow Authoring For Codex + +Archon workflows use a DAG format: nodes with explicit dependencies. This is a +shared Archon surface for both Claude and Codex, but some node fields behave +differently by provider. + +## Schema + +```yaml +name: my-workflow +description: What this workflow does + +provider: codex # optional; 'claude' or 'codex' +model: gpt-5.4 # optional model override + +nodes: + - id: first-node + command: my-command + - id: second-node + prompt: "Use the output: $first-node.output" + depends_on: [first-node] +``` + +## Seven Node Types + +Each node must define exactly one of: + +- `command` +- `prompt` +- `bash` +- `script` +- `loop` +- `approval` +- `cancel` + +### Command Node + +```yaml +- id: investigate + command: investigate-issue +``` + +### Prompt Node + +```yaml +- id: classify + prompt: "Classify this issue: $ARGUMENTS" +``` + +### Bash Node + +```yaml +- id: fetch-data + bash: "gh issue view 42 --json title,body" + timeout: 15000 +``` + +### Script Node + +```yaml +- id: summarize + script: scripts/summarize_issue.py + runtime: uv + deps: + - pyyaml +``` + +### Loop Node + +```yaml +- id: implement + loop: + prompt: "Implement the next task. When complete: DONE" + until: DONE + max_iterations: 10 + fresh_context: true + until_bash: "bun run test" +``` + +### Approval Node + +```yaml +- id: approve-plan + approval: + message: "Approve the plan draft?" + capture_response: true + on_reject: + prompt: "Revise the plan using this feedback: $REJECTION_REASON" +``` + +### Cancel Node + +```yaml +- id: stop-run + cancel: "Human rejected the proposal" +``` + +## Shared Node Fields + +| Field | Description | +| --- | --- | +| `id` | unique node identifier | +| `depends_on` | upstream node IDs | +| `when` | condition expression | +| `trigger_rule` | join semantics for dependencies | +| `context` | `fresh` or `shared` assistant-session behavior | +| `idle_timeout` | per-node or per-iteration idle timeout | + +## Provider-Aware Node Fields + +These fields are shared and meaningful for Codex: + +| Field | Codex status | Notes | +| --- | --- | --- | +| `provider` | supported | workflow-level or node-level | +| `model` | supported | workflow-level and node-level, including loop nodes | +| `output_format` | supported | structured output works on Codex | +| `retry` | supported except loop nodes | loop-node retry is still a hard error | + +These fields are not Codex per-node parity features: + +| Field | Codex status | Notes | +| --- | --- | --- | +| `hooks` | ignored | Claude-only node control | +| `mcp` | ignored per-node | Codex MCP is global, not node-local | +| `skills` | ignored per-node | Codex skill discovery is global or repo-level | +| `allowed_tools` | ignored | Claude-only node control | +| `denied_tools` | ignored | Claude-only node control | + +## Workflow-Level Codex Fields + +These are workflow-level controls, not node-level controls: + +| Field | Codex status | Notes | +| --- | --- | --- | +| `interactive` | supported | workflow-level switch for approval delivery and interactive loop behavior | +| `modelReasoningEffort` | parsed but not runtime-effective per workflow | current execution still uses `assistants.codex.modelReasoningEffort` from Archon config | +| `webSearchMode` | parsed but not runtime-effective per workflow | current execution still uses `assistants.codex.webSearchMode` from Archon config | +| `additionalDirectories` | parsed but not runtime-effective per workflow | current execution still uses `assistants.codex.additionalDirectories` from Archon config | + +## Conditions + +Use `when:` for simple routing: + +```yaml +- id: investigate + command: investigate-bug + depends_on: [classify] + when: "$classify.output.issue_type == 'bug'" +``` + +Supported operators in workflow conditions remain the same regardless of +provider. + +## Structured Output + +Structured output is a real Codex-safe feature: + +```yaml +- id: classify + prompt: "Classify this issue" + output_format: + type: object + properties: + issue_type: + type: string + enum: [bug, feature] + required: [issue_type] +``` + +This enables downstream references such as `$classify.output.issue_type`. + +## Loop Notes + +Loop nodes support: + +- `loop.prompt` +- `until` +- `max_iterations` +- `fresh_context` +- `interactive` +- `gate_message` +- `until_bash` + +Do not treat loop nodes as a place for advanced per-node Codex controls. Fields +such as `hooks`, `mcp`, `skills`, tool restrictions, and retry either do not +apply or are ignored. + +## Approval And Cancel Notes + +- `approval` pauses the workflow for human input +- `approval.on_reject.prompt` can use `$REJECTION_REASON` +- `approval.capture_response` preserves the reviewer response for downstream use +- `cancel` ends the workflow intentionally with a human-readable reason + +## Resume On Failure + +```bash +archon workflow run my-workflow --resume +``` + +Completed nodes are skipped on resume. + +## Validation + +Before treating a workflow as done, validate it: + +```bash +archon validate workflows +``` + +This checks YAML structure, dependency references, command existence, and +provider compatibility warnings. + +## Authoring Rule For Codex + +If a workflow depends on per-node hooks, per-node MCP, per-node skills, or +per-node tool restrictions, do not present it as Codex-safe. Use a Codex +variant or document the degraded behavior explicitly. + +## Example + +See `examples/dag-workflow.yaml` for a Codex-safe reference workflow that keeps +to shared or explicitly supported Codex surfaces. diff --git a/.archon/commands/defaults/archon-assist-codex.md b/.archon/commands/defaults/archon-assist-codex.md index 03976bf777..82688d0181 100644 --- a/.archon/commands/defaults/archon-assist-codex.md +++ b/.archon/commands/defaults/archon-assist-codex.md @@ -12,13 +12,18 @@ argument-hint: You are helping with a request that did not match a more specific Codex-safe workflow. +This lane is the fallback, not the default bootstrap for all Codex usage. + ## Instructions 1. **Understand the request** - Identify whether this is a question, debugging task, repo exploration, a one-off change, or a CI/problem investigation. - - If the request is substantial multi-file implementation work that should - end in a PR, stop and route to `archon-piv-loop-codex` instead of - continuing in assist mode. + - If the request is substantial multi-file implementation work, guided + development, interactive refinement, or any task that clearly wants a + human-in-the-loop build/review cycle, stop and route to + `archon-piv-loop-codex` instead of continuing in assist mode. + - If the user is explicitly asking to run a specific Archon workflow, honor + that direct workflow route rather than staying in assist mode. 2. **Ground yourself in the repo** - Search the codebase, read the relevant files, and understand the current implementation before acting. 3. **Read repo guidance explicitly when needed** @@ -63,6 +68,8 @@ When the request is mainly about a failed, paused, or confusing workflow run: - Prefer small, reversible changes. - Use project-defined validation commands when relevant. - Report validation failures honestly. +- Do not present assist mode as the required entrypoint for Archon. If a direct + workflow lane fits, use it. - Do not rely on Claude-only workflow-node features such as `skills`, `hooks`, `mcp`, `allowed_tools`, or `denied_tools`. - If the user explicitly wants the Claude-oriented assist lane instead, say so diff --git a/.archon/workflows/defaults/archon-assist-codex.yaml b/.archon/workflows/defaults/archon-assist-codex.yaml index dbce914c3e..e2d20ce172 100644 --- a/.archon/workflows/defaults/archon-assist-codex.yaml +++ b/.archon/workflows/defaults/archon-assist-codex.yaml @@ -5,7 +5,9 @@ description: | "use archon codex", "general help codex", "codex workflow help". Handles: Questions, debugging, exploration, one-off tasks, explanations, CI failures, general help. Capability: Full Codex agent session with file, shell, git, and network access as configured by Archon. - NOT for: Claude-tuned assist mode (use archon-assist), or guided Codex PIV work + This is the fallback Codex lane, not the default bootstrap for all Archon work. + NOT for: Claude-tuned assist mode (use archon-assist), direct workflow requests that + already match a narrower Codex-safe workflow, or guided Codex PIV work (use archon-piv-loop-codex). Note: Will inform user when Codex assist mode is used for tracking. From b6c190595aa3df7591a57efaf5a8e1404418a9b6 Mon Sep 17 00:00:00 2001 From: matzls Date: Tue, 14 Apr 2026 13:53:26 +0200 Subject: [PATCH 20/21] fix(workflows): honor workflow-level codex tuning Make modelReasoningEffort, webSearchMode, and additionalDirectories effective from workflow YAML for Codex execution, with config fallback for normal and loop nodes. Add regression coverage for override, fallback, and mixed-provider loop preservation. Update Codex Archon references to match the implemented precedence. Co-authored-by: Codex --- .../references/codex-capability-crosswalk.md | 25 +- .../skills/archon/references/configuration.md | 26 ++ .../skills/archon/references/workflow-dag.md | 12 +- packages/workflows/src/dag-executor.test.ts | 283 ++++++++++++++++++ packages/workflows/src/dag-executor.ts | 60 +++- 5 files changed, 379 insertions(+), 27 deletions(-) diff --git a/.agents/skills/archon/references/codex-capability-crosswalk.md b/.agents/skills/archon/references/codex-capability-crosswalk.md index 3de433d8af..490e770aa5 100644 --- a/.agents/skills/archon/references/codex-capability-crosswalk.md +++ b/.agents/skills/archon/references/codex-capability-crosswalk.md @@ -19,9 +19,9 @@ truth table for Codex-safe Archon authoring. | `skills` | supported per-node | ignored per-node | global or repo `.agents/skills/` | global/repo discovery, not equivalent | | `allowed_tools` | supported per-node | ignored | Codex config / MCP config | global-only, not equivalent | | `denied_tools` | supported per-node | ignored | Codex config / MCP config | global-only, not equivalent | -| `modelReasoningEffort` | not the same field | parsed but not runtime-effective per workflow | Archon assistant config | config-only truth today | -| `webSearchMode` | not the same field | parsed but not runtime-effective per workflow | Archon assistant config | config-only truth today | -| `additionalDirectories` | not the same field | parsed but not runtime-effective per workflow | Archon assistant config | config-only truth today | +| `modelReasoningEffort` | not the same field | supported for Codex | workflow YAML or Archon config | workflow-level override with config fallback | +| `webSearchMode` | not the same field | supported for Codex | workflow YAML or Archon config | workflow-level override with config fallback | +| `additionalDirectories` | not the same field | supported for Codex | workflow YAML or Archon config | workflow-level override with config fallback | ## Feature Notes @@ -59,16 +59,21 @@ Loop-node retry is still not valid. ### `modelReasoningEffort`, `webSearchMode`, and `additionalDirectories` -These fields matter for Codex, but not in the same way the YAML surface may -suggest. +These are real workflow-level Codex tuning fields. -- the workflow loader parses them -- current Codex execution still pulls these values from +- if the workflow sets them, execution uses the workflow value +- if the workflow omits them, execution falls back to `config.assistants.codex.*` -- do not treat workflow YAML as an isolated per-workflow tuning surface for - these fields unless the runtime is changed to honor them +- they remain workflow-level controls, not node-level controls -For current Codex behavior, use Archon config: +Current precedence: + +1. workflow YAML +2. `config.assistants.codex.*` +3. SDK defaults + +Archon config still matters as the default source when the workflow does not set +these fields: - `assistants.codex.modelReasoningEffort` - `assistants.codex.webSearchMode` diff --git a/.agents/skills/archon/references/configuration.md b/.agents/skills/archon/references/configuration.md index 02e0e6861f..c1cb594fd3 100644 --- a/.agents/skills/archon/references/configuration.md +++ b/.agents/skills/archon/references/configuration.md @@ -27,6 +27,12 @@ Use repo config when the change is project-specific: - disable bundled defaults for one repo - add repo-specific Codex defaults +Use workflow YAML when one Codex workflow should run with its own tuning: + +- set workflow-specific `modelReasoningEffort` +- set workflow-specific `webSearchMode` +- set workflow-specific `additionalDirectories` + Use global config when the change is user-wide: - prefer Codex by default across repos @@ -126,6 +132,26 @@ defaults: | `docs.path` | repo docs path used by workflow surfaces that care about docs | | `env` | per-project env vars merged into workflow execution config; most relevant when a workflow surface consumes `config.envVars` | +## Workflow-Level Codex Overrides + +For Codex, these workflow YAML fields override Archon config for that workflow: + +- `modelReasoningEffort` +- `webSearchMode` +- `additionalDirectories` + +Runtime precedence for those fields is: + +1. workflow YAML +2. `assistants.codex.*` in Archon config +3. SDK defaults + +That means: + +- use Archon config for shared defaults across many Codex workflows +- use workflow YAML when one workflow needs a different execution profile +- do not expect node-level versions of those fields + ## Editing Guidance When modifying config: diff --git a/.agents/skills/archon/references/workflow-dag.md b/.agents/skills/archon/references/workflow-dag.md index 07ff31944f..3035a55be4 100644 --- a/.agents/skills/archon/references/workflow-dag.md +++ b/.agents/skills/archon/references/workflow-dag.md @@ -134,9 +134,15 @@ These are workflow-level controls, not node-level controls: | Field | Codex status | Notes | | --- | --- | --- | | `interactive` | supported | workflow-level switch for approval delivery and interactive loop behavior | -| `modelReasoningEffort` | parsed but not runtime-effective per workflow | current execution still uses `assistants.codex.modelReasoningEffort` from Archon config | -| `webSearchMode` | parsed but not runtime-effective per workflow | current execution still uses `assistants.codex.webSearchMode` from Archon config | -| `additionalDirectories` | parsed but not runtime-effective per workflow | current execution still uses `assistants.codex.additionalDirectories` from Archon config | +| `modelReasoningEffort` | supported | workflow-level override, with `assistants.codex.modelReasoningEffort` as fallback | +| `webSearchMode` | supported | workflow-level override, with `assistants.codex.webSearchMode` as fallback | +| `additionalDirectories` | supported | workflow-level override, with `assistants.codex.additionalDirectories` as fallback | + +Precedence for these workflow-level Codex tuning fields is: + +1. workflow YAML +2. `assistants.codex.*` from Archon config +3. SDK defaults ## Conditions diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts index 64490110d2..5d49cc1040 100644 --- a/packages/workflows/src/dag-executor.test.ts +++ b/packages/workflows/src/dag-executor.test.ts @@ -1514,6 +1514,111 @@ describe('executeDagWorkflow -- output_format structured output', () => { .filter(msg => typeof msg === 'string' && msg.includes('did not return structured output')); expect(warningMessages).toHaveLength(0); }); + + it('uses workflow-level Codex tuning instead of config defaults for normal nodes', async () => { + mockGetAssistantClientDag.mockImplementation(() => ({ + sendQuery: mockSendQueryDag, + getType: () => 'codex', + })); + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'workflow scoped codex settings' }; + yield { type: 'result', sessionId: 'codex-sid-3' }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('codex-workflow-options-run'); + const config: WorkflowConfig = { + ...minimalConfig, + assistant: 'codex', + assistants: { + ...minimalConfig.assistants, + codex: { + modelReasoningEffort: 'low', + webSearchMode: 'disabled', + additionalDirectories: ['/config/default'], + }, + }, + }; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-codex-workflow-options', + testDir, + { + name: 'codex-workflow-options', + modelReasoningEffort: 'xhigh', + webSearchMode: 'live', + additionalDirectories: ['/workflow/override'], + nodes: [{ id: 'classify', command: 'classify' }], + }, + workflowRun, + 'codex', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + config + ); + + const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; + expect(optionsArg.modelReasoningEffort).toBe('xhigh'); + expect(optionsArg.webSearchMode).toBe('live'); + expect(optionsArg.additionalDirectories).toEqual(['/workflow/override']); + }); + + it('falls back to config Codex tuning when workflow-level values are absent for normal nodes', async () => { + mockGetAssistantClientDag.mockImplementation(() => ({ + sendQuery: mockSendQueryDag, + getType: () => 'codex', + })); + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'config scoped codex settings' }; + yield { type: 'result', sessionId: 'codex-sid-4' }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('codex-config-options-run'); + const config: WorkflowConfig = { + ...minimalConfig, + assistant: 'codex', + assistants: { + ...minimalConfig.assistants, + codex: { + modelReasoningEffort: 'medium', + webSearchMode: 'cached', + additionalDirectories: ['/config/fallback'], + }, + }, + }; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-codex-config-options', + testDir, + { + name: 'codex-config-options', + nodes: [{ id: 'classify', command: 'classify' }], + }, + workflowRun, + 'codex', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + config + ); + + const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; + expect(optionsArg.modelReasoningEffort).toBe('medium'); + expect(optionsArg.webSearchMode).toBe('cached'); + expect(optionsArg.additionalDirectories).toEqual(['/config/fallback']); + }); }); describe('executeDagWorkflow -- when condition parse errors (fail-closed)', () => { @@ -2767,6 +2872,184 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { }); }); + it('uses workflow-level Codex tuning instead of config defaults for loop nodes', async () => { + mockGetAssistantClientDag.mockImplementation(() => ({ + sendQuery: mockSendQueryDag, + getType: () => 'codex', + })); + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'DONE' }; + yield { type: 'result', sessionId: 'loop-codex-sid-1' }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('codex-loop-workflow-options-run'); + const config: WorkflowConfig = { + ...minimalConfig, + assistant: 'codex', + assistants: { + ...minimalConfig.assistants, + codex: { + modelReasoningEffort: 'low', + webSearchMode: 'disabled', + additionalDirectories: ['/config/loop-default'], + }, + }, + }; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-codex-loop-workflow-options', + testDir, + { + name: 'codex-loop-workflow-options', + modelReasoningEffort: 'high', + webSearchMode: 'live', + additionalDirectories: ['/workflow/loop-override'], + nodes: [ + { + id: 'my-loop', + loop: { + prompt: 'Do a task. When done, output DONE.', + until: 'DONE', + max_iterations: 1, + }, + }, + ], + }, + workflowRun, + 'codex', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + config + ); + + const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; + expect(optionsArg.modelReasoningEffort).toBe('high'); + expect(optionsArg.webSearchMode).toBe('live'); + expect(optionsArg.additionalDirectories).toEqual(['/workflow/loop-override']); + }); + + it('preserves node-level loop provider and model when workflow-level Codex tuning is present', async () => { + mockGetAssistantClientDag.mockImplementation(provider => ({ + sendQuery: mockSendQueryDag, + getType: () => provider, + })); + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'DONE' }; + yield { type: 'result', sessionId: 'loop-mixed-provider-sid-1' }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('codex-loop-mixed-provider-run'); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-codex-loop-mixed-provider', + testDir, + { + name: 'codex-loop-mixed-provider', + modelReasoningEffort: 'xhigh', + webSearchMode: 'live', + additionalDirectories: ['/workflow/codex-override'], + nodes: [ + { + id: 'my-loop', + provider: 'claude', + model: 'sonnet', + loop: { + prompt: 'Do a task. When done, output DONE.', + until: 'DONE', + max_iterations: 1, + }, + }, + ], + }, + workflowRun, + 'codex', + 'gpt-5.3-codex', + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + { ...minimalConfig, assistant: 'codex' } + ); + + expect(mockGetAssistantClientDag.mock.calls[0]?.[0]).toBe('claude'); + const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; + expect(optionsArg.model).toBe('sonnet'); + expect(optionsArg.modelReasoningEffort).toBeUndefined(); + expect(optionsArg.webSearchMode).toBeUndefined(); + expect(optionsArg.additionalDirectories).toBeUndefined(); + }); + + it('falls back to config Codex tuning when workflow-level values are absent for loop nodes', async () => { + mockGetAssistantClientDag.mockImplementation(() => ({ + sendQuery: mockSendQueryDag, + getType: () => 'codex', + })); + mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'DONE' }; + yield { type: 'result', sessionId: 'loop-codex-sid-2' }; + }); + + const mockDeps = createMockDeps(); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun('codex-loop-config-options-run'); + const config: WorkflowConfig = { + ...minimalConfig, + assistant: 'codex', + assistants: { + ...minimalConfig.assistants, + codex: { + modelReasoningEffort: 'minimal', + webSearchMode: 'cached', + additionalDirectories: ['/config/loop-fallback'], + }, + }, + }; + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-codex-loop-config-options', + testDir, + { + name: 'codex-loop-config-options', + nodes: [ + { + id: 'my-loop', + loop: { + prompt: 'Do a task. When done, output DONE.', + until: 'DONE', + max_iterations: 1, + }, + }, + ], + }, + workflowRun, + 'codex', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + config + ); + + const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; + expect(optionsArg.modelReasoningEffort).toBe('minimal'); + expect(optionsArg.webSearchMode).toBe('cached'); + expect(optionsArg.additionalDirectories).toEqual(['/config/loop-fallback']); + }); + it('completes after multiple iterations', async () => { let callCount = 0; mockSendQueryDag.mockImplementation(function* () { diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index 4d6c29e053..bba13586df 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -76,13 +76,16 @@ function getLog(): ReturnType { return cachedLog; } -/** Workflow-level Claude SDK options — per-node overrides take precedence via ?? */ +/** Workflow-level execution options. Per-node `provider`/`model` overrides still take precedence. */ interface WorkflowLevelOptions { effort?: EffortLevel; thinking?: ThinkingConfig; fallbackModel?: string; betas?: string[]; sandbox?: SandboxSettings; + modelReasoningEffort?: WorkflowAssistantOptions['modelReasoningEffort']; + webSearchMode?: WorkflowAssistantOptions['webSearchMode']; + additionalDirectories?: WorkflowAssistantOptions['additionalDirectories']; } /** Internal node execution result — extends NodeOutput with cost data for aggregation. */ @@ -102,6 +105,11 @@ interface SendMessageContext { nodeName?: string; } +type WorkflowCodexExecutionOptions = Pick< + WorkflowAssistantOptions, + 'modelReasoningEffort' | 'webSearchMode' | 'additionalDirectories' +>; + interface BundledScriptExecution { cmd: string; args: string[]; @@ -288,6 +296,24 @@ export function buildSDKHooksFromYAML(nodeHooks: WorkflowNodeHooks): SDKHooksMap return sdkHooks; } +function resolveWorkflowCodexOptions( + workflowLevelOptions: WorkflowLevelOptions, + config: WorkflowConfig +): WorkflowCodexExecutionOptions | undefined { + const modelReasoningEffort = + workflowLevelOptions.modelReasoningEffort ?? config.assistants.codex.modelReasoningEffort; + const webSearchMode = workflowLevelOptions.webSearchMode ?? config.assistants.codex.webSearchMode; + const additionalDirectories = + workflowLevelOptions.additionalDirectories ?? config.assistants.codex.additionalDirectories; + + const resolved: WorkflowCodexExecutionOptions = {}; + if (modelReasoningEffort !== undefined) resolved.modelReasoningEffort = modelReasoningEffort; + if (webSearchMode !== undefined) resolved.webSearchMode = webSearchMode; + if (additionalDirectories !== undefined) resolved.additionalDirectories = additionalDirectories; + + return Object.keys(resolved).length > 0 ? resolved : undefined; +} + /** * Load MCP server config from a JSON file and expand environment variables. * Format: Record matching the SDK's expected shape. @@ -522,14 +548,15 @@ async function resolveNodeProviderAndModel( let options: WorkflowAssistantOptions | undefined; if (provider === 'codex') { options = { - model, - modelReasoningEffort: config.assistants.codex.modelReasoningEffort, - webSearchMode: config.assistants.codex.webSearchMode, - additionalDirectories: config.assistants.codex.additionalDirectories, + ...(model ? { model } : {}), + ...(resolveWorkflowCodexOptions(workflowLevelOptions, config) ?? {}), }; if (node.output_format) { options.outputFormat = { type: 'json_schema', schema: node.output_format }; } + if (Object.keys(options).length === 0) { + options = undefined; + } } else { const claudeOptions: WorkflowAssistantOptions = {}; if (model) claudeOptions.model = model; @@ -1723,22 +1750,17 @@ async function executeScriptNode( } /** - * Build WorkflowAssistantOptions from resolved provider, model, and config. + * Build WorkflowAssistantOptions from resolved provider, model, workflow-level options, and config. * Caller is responsible for resolving per-node overrides before passing model. */ function buildLoopNodeOptions( provider: 'claude' | 'codex', model: string | undefined, + workflowLevelOptions: WorkflowLevelOptions, config: WorkflowConfig ): WorkflowAssistantOptions | undefined { const codexOptions = - provider === 'codex' - ? { - modelReasoningEffort: config.assistants.codex.modelReasoningEffort, - webSearchMode: config.assistants.codex.webSearchMode, - additionalDirectories: config.assistants.codex.additionalDirectories, - } - : undefined; + provider === 'codex' ? resolveWorkflowCodexOptions(workflowLevelOptions, config) : undefined; const claudeOptions = provider === 'claude' && config.assistants.claude.settingSources @@ -1868,6 +1890,7 @@ async function executeLoopNode( baseBranch: string, docsDir: string, nodeOutputs: Map, + workflowLevelOptions: WorkflowLevelOptions, config: WorkflowConfig, issueContext?: string ): Promise { @@ -1904,7 +1927,12 @@ async function executeLoopNode( let loopTotalNumTurns: number | undefined; let previousProgressSnapshot: LoopProgressSnapshot | undefined; let noProgressStreak = 0; - const resolvedOptions = buildLoopNodeOptions(workflowProvider, workflowModel, config); + const resolvedOptions = buildLoopNodeOptions( + workflowProvider, + workflowModel, + workflowLevelOptions, + config + ); // Helper to log event store errors consistently const logEventStoreError = (err: Error, iteration: number): void => { @@ -2592,6 +2620,9 @@ export async function executeDagWorkflow( fallbackModel: workflow.fallbackModel, betas: workflow.betas, sandbox: workflow.sandbox, + modelReasoningEffort: workflow.modelReasoningEffort, + webSearchMode: workflow.webSearchMode, + additionalDirectories: workflow.additionalDirectories, }; const layers = buildTopologicalLayers(workflow.nodes); const nodeOutputs = new Map(); @@ -2852,6 +2883,7 @@ export async function executeDagWorkflow( baseBranch, docsDir, nodeOutputs, + workflowLevelOptions, config, issueContext ); From 61b2d2eb1d3c59e40ab49e9089afdd00d49d4a8c Mon Sep 17 00:00:00 2001 From: matzls Date: Tue, 14 Apr 2026 13:58:33 +0200 Subject: [PATCH 21/21] docs(archon): align codex workflow tuning docs Update the assistant architecture reference to reflect that workflow-level Codex tuning fields now override Archon config with config fallback, matching the shipped runtime behavior. Co-authored-by: Codex --- .../docs/reference/assistant-architecture.md | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/packages/docs-web/src/content/docs/reference/assistant-architecture.md b/packages/docs-web/src/content/docs/reference/assistant-architecture.md index 7d3096bc6a..5ba1647cb9 100644 --- a/packages/docs-web/src/content/docs/reference/assistant-architecture.md +++ b/packages/docs-web/src/content/docs/reference/assistant-architecture.md @@ -284,19 +284,25 @@ Current implementation: - `denied_tools` - `output_format` -### Parsed but not fully applied: workflow-level Codex options +### Workflow-level Codex tuning -There is one important implementation gap: +Workflow YAML now supports these Codex tuning fields as real runtime inputs: -- workflow YAML supports `modelReasoningEffort`, `webSearchMode`, and `additionalDirectories` -- the loader parses them successfully -- the runtime executor currently pulls Codex options from merged config, not from the workflow object +- `modelReasoningEffort` +- `webSearchMode` +- `additionalDirectories` Practical effect: - `model:` on a workflow is effective -- workflow-level `modelReasoningEffort`, `webSearchMode`, and `additionalDirectories` are currently best treated as **documented intent, not guaranteed runtime behavior** -- if you need deterministic current behavior, set those in `assistants.codex` config +- workflow-level `modelReasoningEffort`, `webSearchMode`, and `additionalDirectories` override Archon config for that workflow +- if the workflow omits them, execution falls back to `assistants.codex.*` + +Current precedence is: + +1. workflow YAML +2. `assistants.codex.*` in Archon config +3. SDK defaults ## When Codex Can Realistically Replace Claude For A Node