diff --git a/.archon/workflows/e2e-claude-smoke.yaml b/.archon/workflows/e2e-claude-smoke.yaml new file mode 100644 index 0000000000..e4b0f776a4 --- /dev/null +++ b/.archon/workflows/e2e-claude-smoke.yaml @@ -0,0 +1,23 @@ +# E2E smoke test — Claude provider +# Verifies: provider selection, sendQuery, structured output, tool use +name: e2e-claude-smoke +description: "E2E smoke test for Claude provider. Runs a simple prompt + structured output node." +provider: claude + +nodes: + - id: simple + prompt: "What is 2+2? Answer with just the number, nothing else." + + - id: structured + prompt: "Classify this input as 'math' or 'text': '2+2=4'" + output_format: + type: object + properties: + category: + type: string + enum: ["math", "text"] + depends_on: [simple] + + - id: tool-use + prompt: "Read the file packages/providers/package.json and tell me the package name. Answer with just the name." + depends_on: [simple] diff --git a/.archon/workflows/e2e-codex-smoke.yaml b/.archon/workflows/e2e-codex-smoke.yaml new file mode 100644 index 0000000000..6650f92215 --- /dev/null +++ b/.archon/workflows/e2e-codex-smoke.yaml @@ -0,0 +1,21 @@ +# E2E smoke test — Codex provider +# Verifies: provider selection, sendQuery, structured output +name: e2e-codex-smoke +description: "E2E smoke test for Codex provider. Runs a simple prompt + structured output node." +provider: codex + +nodes: + - id: simple + prompt: "What is 2+2? Answer with just the number, nothing else." + + - id: structured + prompt: "Classify this input as 'math' or 'text': '2+2=4'. Return JSON only." + output_format: + type: object + properties: + category: + type: string + enum: ["math", "text"] + required: ["category"] + additionalProperties: false + depends_on: [simple] diff --git a/CLAUDE.md b/CLAUDE.md index 1541841583..363086969d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -266,9 +266,16 @@ packages/ │ ├── adapters/ # CLI adapter (stdout output) │ ├── commands/ # CLI command implementations │ └── cli.ts # CLI entry point +├── providers/ # @archon/providers - AI agent providers (SDK deps live here) +│ └── src/ +│ ├── types.ts # Contract layer (IAgentProvider, SendQueryOptions, MessageChunk — ZERO SDK deps) +│ ├── factory.ts # getAgentProvider() switch (built-in: claude, codex) +│ ├── errors.ts # UnknownProviderError +│ ├── claude/ # ClaudeProvider + parseClaudeConfig + MCP/hooks/skills translation +│ ├── codex/ # CodexProvider + parseCodexConfig + binary-resolver +│ └── index.ts # Package exports ├── core/ # @archon/core - Shared business logic │ └── src/ -│ ├── providers/ # AI SDK providers (Claude, Codex) │ ├── config/ # YAML config loading │ ├── db/ # Database connection, queries │ ├── handlers/ # Command handler (slash commands) @@ -289,7 +296,7 @@ packages/ │ ├── executor.ts # Workflow execution orchestrator (executeWorkflow) │ ├── dag-executor.ts # DAG-specific execution logic │ ├── store.ts # IWorkflowStore interface (database abstraction) -│ ├── deps.ts # WorkflowDeps injection types (IWorkflowPlatform, IWorkflowAgentProvider) +│ ├── deps.ts # WorkflowDeps injection types (IWorkflowPlatform, imports from @archon/providers/types) │ ├── event-emitter.ts # Workflow observability events │ ├── logger.ts # JSONL file logger │ ├── validator.ts # Resource validation (command files, MCP configs, skill dirs) @@ -401,10 +408,11 @@ import type { DagNode, WorkflowDefinition } from '@/lib/api'; **Package Split:** - **@archon/paths**: Path resolution utilities, Pino logger factory, web dist cache path (`getWebDistDir`), CWD env stripper (`stripCwdEnv`, `strip-cwd-env-boot`) (no @archon/* deps; `pino` and `dotenv` are allowed external deps) - **@archon/git**: Git operations - worktrees, branches, repos, exec wrappers (depends only on @archon/paths) +- **@archon/providers**: AI agent providers (Claude, Codex) — owns SDK deps, `IAgentProvider` interface, `sendQuery()` contract, and provider-specific option translation. `@archon/providers/types` is the contract subpath (zero SDK deps, zero runtime side effects) that `@archon/workflows` imports from. Providers receive raw `nodeConfig` + `assistantConfig` and translate to SDK-specific options internally. - **@archon/isolation**: Worktree isolation types, providers, resolver, error classifiers (depends only on @archon/git + @archon/paths) -- **@archon/workflows**: Workflow engine - loader, router, executor, DAG, logger, bundled defaults (depends only on @archon/git + @archon/paths + @hono/zod-openapi + zod; DB/AI/config injected via `WorkflowDeps`) +- **@archon/workflows**: Workflow engine - loader, router, executor, DAG, logger, bundled defaults (depends only on @archon/git + @archon/paths + @archon/providers/types + @hono/zod-openapi + zod; DB/AI/config injected via `WorkflowDeps`) - **@archon/cli**: Command-line interface for running workflows and starting the web UI server (depends on @archon/server + @archon/adapters for the serve command) -- **@archon/core**: Business logic, database, orchestration, AI providers (provides `createWorkflowStore()` adapter bridging core DB → `IWorkflowStore`) +- **@archon/core**: Business logic, database, orchestration (depends on @archon/providers for AI; provides `createWorkflowStore()` adapter bridging core DB → `IWorkflowStore`) - **@archon/adapters**: Platform adapters for Slack, Telegram, GitHub, Discord (depends on @archon/core) - **@archon/server**: OpenAPIHono HTTP server (Zod + OpenAPI spec generation via `@hono/zod-openapi`), Web adapter (SSE), API routes, Web UI static serving (depends on @archon/adapters) - **@archon/web**: React frontend (Vite + Tailwind v4 + shadcn/ui + Zustand), SSE streaming to server. `WorkflowRunStatus`, `WorkflowDefinition`, and `DagNode` are all derived from `src/lib/api.generated.d.ts` (generated from the OpenAPI spec via `bun generate:types`; never import from `@archon/workflows`) @@ -440,7 +448,7 @@ import type { DagNode, WorkflowDefinition } from '@/lib/api'; - Session management: Create new or resume existing - Stream AI responses to platform -**4. AI Agent Providers** (`packages/core/src/providers/`) +**4. AI Agent Providers** (`packages/providers/src/`) - Implement `IAgentProvider` interface - **ClaudeProvider**: `@anthropic-ai/claude-agent-sdk` - **CodexProvider**: `@openai/codex-sdk` diff --git a/Dockerfile b/Dockerfile index da4783e019..139b3efaf7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,6 +24,7 @@ COPY packages/docs-web/package.json ./packages/docs-web/ COPY packages/git/package.json ./packages/git/ COPY packages/isolation/package.json ./packages/isolation/ COPY packages/paths/package.json ./packages/paths/ +COPY packages/providers/package.json ./packages/providers/ COPY packages/server/package.json ./packages/server/ COPY packages/web/package.json ./packages/web/ COPY packages/workflows/package.json ./packages/workflows/ @@ -130,6 +131,7 @@ COPY packages/docs-web/package.json ./packages/docs-web/ COPY packages/git/package.json ./packages/git/ COPY packages/isolation/package.json ./packages/isolation/ COPY packages/paths/package.json ./packages/paths/ +COPY packages/providers/package.json ./packages/providers/ COPY packages/server/package.json ./packages/server/ COPY packages/web/package.json ./packages/web/ COPY packages/workflows/package.json ./packages/workflows/ @@ -144,6 +146,7 @@ COPY packages/core/ ./packages/core/ COPY packages/git/ ./packages/git/ COPY packages/isolation/ ./packages/isolation/ COPY packages/paths/ ./packages/paths/ +COPY packages/providers/ ./packages/providers/ COPY packages/server/ ./packages/server/ COPY packages/workflows/ ./packages/workflows/ diff --git a/bun.lock b/bun.lock index 04517f4fbf..356a76ed8d 100644 --- a/bun.lock +++ b/bun.lock @@ -23,7 +23,7 @@ }, "packages/adapters": { "name": "@archon/adapters", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@archon/core": "workspace:*", "@archon/git": "workspace:*", @@ -41,7 +41,7 @@ }, "packages/cli": { "name": "@archon/cli", - "version": "0.3.5", + "version": "0.3.6", "bin": { "archon": "./src/cli.ts", }, @@ -51,6 +51,7 @@ "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/server": "workspace:*", "@archon/workflows": "workspace:*", "@clack/prompts": "^1.0.0", @@ -62,14 +63,13 @@ }, "packages/core": { "name": "@archon/core", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.89", "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/workflows": "workspace:*", - "@openai/codex-sdk": "^0.116.0", "pg": "^8.11.0", "zod": "^3", }, @@ -83,7 +83,7 @@ }, "packages/docs-web": { "name": "@archon/docs-web", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@astrojs/starlight": "^0.38.0", "astro": "^6.1.0", @@ -92,7 +92,7 @@ }, "packages/git": { "name": "@archon/git", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@archon/paths": "workspace:*", }, @@ -102,7 +102,7 @@ }, "packages/isolation": { "name": "@archon/isolation", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", @@ -113,7 +113,7 @@ }, "packages/paths": { "name": "@archon/paths", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "dotenv": "^17", "pino": "^9", @@ -123,14 +123,30 @@ "typescript": "^5.0.0", }, }, + "packages/providers": { + "name": "@archon/providers", + "version": "0.3.6", + "dependencies": { + "@anthropic-ai/claude-agent-sdk": "^0.2.89", + "@archon/paths": "workspace:*", + "@openai/codex-sdk": "^0.116.0", + }, + "devDependencies": { + "pino": "^9", + }, + "peerDependencies": { + "typescript": "^5.0.0", + }, + }, "packages/server": { "name": "@archon/server", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@archon/adapters": "workspace:*", "@archon/core": "workspace:*", "@archon/git": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/workflows": "workspace:*", "@hono/zod-openapi": "^0.19.6", "dotenv": "^17.2.3", @@ -143,7 +159,7 @@ }, "packages/web": { "name": "@archon/web", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@dagrejs/dagre": "^2.0.4", "@radix-ui/react-alert-dialog": "^1.1.15", @@ -195,10 +211,11 @@ }, "packages/workflows": { "name": "@archon/workflows", - "version": "0.3.5", + "version": "0.3.6", "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@hono/zod-openapi": "^0.19.6", "zod": "^3.25.28", }, @@ -231,6 +248,8 @@ "@archon/paths": ["@archon/paths@workspace:packages/paths"], + "@archon/providers": ["@archon/providers@workspace:packages/providers"], + "@archon/server": ["@archon/server@workspace:packages/server"], "@archon/web": ["@archon/web@workspace:packages/web"], @@ -2437,7 +2456,7 @@ "@antfu/ni/tinyexec": ["tinyexec@1.0.2", "", {}, "sha512-W/KYk+NFhkmsYpuHq5JykngiOCnxeVL8v8dFnqxSD8qEEdRfXk1SDM6JzNqcERbcGYj9tMrDQBYV9cjgnunFIg=="], - "@archon/core/@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.2.89", "", { "dependencies": { "@anthropic-ai/sdk": "^0.74.0", "@modelcontextprotocol/sdk": "^1.27.1" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "^0.34.2", "@img/sharp-darwin-x64": "^0.34.2", "@img/sharp-linux-arm": "^0.34.2", "@img/sharp-linux-arm64": "^0.34.2", "@img/sharp-linux-x64": "^0.34.2", "@img/sharp-linuxmusl-arm64": "^0.34.2", "@img/sharp-linuxmusl-x64": "^0.34.2", "@img/sharp-win32-arm64": "^0.34.2", "@img/sharp-win32-x64": "^0.34.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-/9W0lyBGuGHw1uu7pQafsp6BLpxfqCv1QYE0Z/eZTX6lGHht4j4Q+O3UImzjsiyEE9cGkOAwZBGAEHDEqt+QUA=="], + "@archon/providers/@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.2.89", "", { "dependencies": { "@anthropic-ai/sdk": "^0.74.0", "@modelcontextprotocol/sdk": "^1.27.1" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "^0.34.2", "@img/sharp-darwin-x64": "^0.34.2", "@img/sharp-linux-arm": "^0.34.2", "@img/sharp-linux-arm64": "^0.34.2", "@img/sharp-linux-x64": "^0.34.2", "@img/sharp-linuxmusl-arm64": "^0.34.2", "@img/sharp-linuxmusl-x64": "^0.34.2", "@img/sharp-win32-arm64": "^0.34.2", "@img/sharp-win32-x64": "^0.34.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-/9W0lyBGuGHw1uu7pQafsp6BLpxfqCv1QYE0Z/eZTX6lGHht4j4Q+O3UImzjsiyEE9cGkOAwZBGAEHDEqt+QUA=="], "@astrojs/markdown-remark/remark-parse": ["remark-parse@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-from-markdown": "^2.0.0", "micromark-util-types": "^2.0.0", "unified": "^11.0.0" } }, "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA=="], diff --git a/eslint.config.mjs b/eslint.config.mjs index 69bf635bd5..a7ba5b4c74 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -20,6 +20,7 @@ export default tseslint.config( '**/*.js', '*.mjs', '**/*.test.ts', + '**/src/test/**', // Test helper files (mock factories, fixtures) '*.d.ts', // Root-level declaration files (not in tsconfig project scope) '**/*.generated.d.ts', // Auto-generated declaration files (e.g. openapi-typescript output) 'packages/web/vite.config.ts', // Vite config doesn't need type-checked linting diff --git a/packages/cli/package.json b/packages/cli/package.json index bd8c7390bf..f39e530ffd 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -17,6 +17,7 @@ "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/server": "workspace:*", "@archon/workflows": "workspace:*", "@clack/prompts": "^1.0.0", diff --git a/packages/core/package.json b/packages/core/package.json index 9199551431..4739c5328f 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -9,7 +9,6 @@ "./types": "./src/types/index.ts", "./db": "./src/db/index.ts", "./db/*": "./src/db/*.ts", - "./providers": "./src/providers/index.ts", "./operations": "./src/operations/index.ts", "./operations/*": "./src/operations/*.ts", "./workflows": "./src/workflows/index.ts", @@ -23,17 +22,16 @@ "./state/*": "./src/state/*.ts" }, "scripts": { - "test": "bun test src/providers/codex-binary-guard.test.ts && bun test src/utils/codex-binary-resolver.test.ts && bun test src/utils/codex-binary-resolver-dev.test.ts && bun test src/providers/claude.test.ts src/providers/codex.test.ts src/providers/factory.test.ts && bun test src/handlers/command-handler.test.ts && bun test src/handlers/clone.test.ts && bun test src/db/adapters/postgres.test.ts && bun test src/db/adapters/sqlite.test.ts src/db/codebases.test.ts src/db/connection.test.ts src/db/conversations.test.ts src/db/env-vars.test.ts src/db/isolation-environments.test.ts src/db/messages.test.ts src/db/sessions.test.ts src/db/workflow-events.test.ts src/db/workflows.test.ts src/utils/defaults-copy.test.ts src/utils/worktree-sync.test.ts src/utils/conversation-lock.test.ts src/utils/credential-sanitizer.test.ts src/utils/port-allocation.test.ts src/utils/error.test.ts src/utils/error-formatter.test.ts src/utils/github-graphql.test.ts src/utils/env-leak-scanner.test.ts src/config/ src/state/ && bun test src/utils/path-validation.test.ts && bun test src/services/cleanup-service.test.ts && bun test src/services/title-generator.test.ts && bun test src/workflows/ && bun test src/operations/workflow-operations.test.ts && bun test src/operations/isolation-operations.test.ts && bun test src/orchestrator/orchestrator.test.ts && bun test src/orchestrator/orchestrator-agent.test.ts && bun test src/orchestrator/orchestrator-isolation.test.ts", + "test": "bun test src/handlers/command-handler.test.ts && bun test src/handlers/clone.test.ts && bun test src/db/adapters/postgres.test.ts && bun test src/db/adapters/sqlite.test.ts src/db/codebases.test.ts src/db/connection.test.ts src/db/conversations.test.ts src/db/env-vars.test.ts src/db/isolation-environments.test.ts src/db/messages.test.ts src/db/sessions.test.ts src/db/workflow-events.test.ts src/db/workflows.test.ts src/utils/defaults-copy.test.ts src/utils/worktree-sync.test.ts src/utils/conversation-lock.test.ts src/utils/credential-sanitizer.test.ts src/utils/port-allocation.test.ts src/utils/error.test.ts src/utils/error-formatter.test.ts src/utils/github-graphql.test.ts src/utils/env-leak-scanner.test.ts src/config/ src/state/ && bun test src/utils/path-validation.test.ts && bun test src/services/cleanup-service.test.ts && bun test src/services/title-generator.test.ts && bun test src/workflows/ && bun test src/operations/workflow-operations.test.ts && bun test src/operations/isolation-operations.test.ts && bun test src/orchestrator/orchestrator.test.ts && bun test src/orchestrator/orchestrator-agent.test.ts && bun test src/orchestrator/orchestrator-isolation.test.ts", "type-check": "bun x tsc --noEmit", "build": "echo 'No build needed - Bun runs TypeScript directly'" }, "dependencies": { - "@anthropic-ai/claude-agent-sdk": "^0.2.89", "@archon/git": "workspace:*", "@archon/isolation": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/workflows": "workspace:*", - "@openai/codex-sdk": "^0.116.0", "pg": "^8.11.0", "zod": "^3" }, diff --git a/packages/core/src/config/config-types.ts b/packages/core/src/config/config-types.ts index 290ba48228..7dd74ac8ba 100644 --- a/packages/core/src/config/config-types.ts +++ b/packages/core/src/config/config-types.ts @@ -10,25 +10,12 @@ * Global configuration (non-secret user preferences) * Located at ~/.archon/config.yaml */ -import type { ModelReasoningEffort, WebSearchMode } from '../types'; -export interface CodexProviderDefaults { - model?: string; - modelReasoningEffort?: ModelReasoningEffort; - webSearchMode?: WebSearchMode; - additionalDirectories?: string[]; - /** Path to the Codex CLI binary. Overrides auto-detection in compiled Archon builds. - * Only relevant for the Codex provider; ignored for Claude. */ - codexBinaryPath?: string; -} +// Provider config defaults — canonical definitions live in @archon/providers/types. +// Imported and re-exported here so existing consumers don't break. +import type { ClaudeProviderDefaults, CodexProviderDefaults } from '@archon/providers/types'; -export interface ClaudeCodexProviderDefaults { - model?: string; - /** Claude Code settingSources — controls which CLAUDE.md files are loaded. - * @default ['project'] - * @see https://github.com/anthropics/claude-agent-sdk */ - settingSources?: ('project' | 'user')[]; -} +export type { ClaudeProviderDefaults, CodexProviderDefaults }; export interface GlobalConfig { /** @@ -47,7 +34,7 @@ export interface GlobalConfig { * Assistant-specific defaults (model, reasoning effort, etc.) */ assistants?: { - claude?: ClaudeCodexProviderDefaults; + claude?: ClaudeProviderDefaults; codex?: CodexProviderDefaults; }; @@ -118,7 +105,7 @@ export interface RepoConfig { * Assistant-specific defaults for this repository */ assistants?: { - claude?: ClaudeCodexProviderDefaults; + claude?: ClaudeProviderDefaults; codex?: CodexProviderDefaults; }; @@ -217,7 +204,7 @@ export interface MergedConfig { botName: string; assistant: 'claude' | 'codex'; assistants: { - claude: ClaudeCodexProviderDefaults; + claude: ClaudeProviderDefaults; codex: CodexProviderDefaults; }; streaming: { @@ -281,7 +268,7 @@ export interface SafeConfig { botName: string; assistant: 'claude' | 'codex'; assistants: { - claude: Pick; + claude: Pick; codex: Pick; }; streaming: { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 0f3cce7e79..a0c897481f 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -24,8 +24,6 @@ export { type IWebPlatformAdapter, isWebAdapter, type MessageMetadata, - type MessageChunk, - type IAgentProvider, } from './types'; // ============================================================================= @@ -52,13 +50,6 @@ export * as messageDb from './db/messages'; // Re-export SessionNotFoundError for error handling export { SessionNotFoundError } from './db/sessions'; -// ============================================================================= -// Agent Providers -// ============================================================================= -export { ClaudeProvider } from './providers/claude'; -export { CodexProvider } from './providers/codex'; -export { getAgentProvider } from './providers/factory'; - // ============================================================================= // Workflows // ============================================================================= diff --git a/packages/core/src/orchestrator/orchestrator-agent.test.ts b/packages/core/src/orchestrator/orchestrator-agent.test.ts index 8995a34046..b1e155a8f8 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.test.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.test.ts @@ -93,10 +93,11 @@ mock.module('@archon/workflows/executor', () => ({ executeWorkflow: mockExecuteWorkflow, })); -mock.module('../providers/factory', () => ({ +mock.module('@archon/providers', () => ({ getAgentProvider: mock(() => ({ sendQuery: mock(async function* () {}), getType: mock(() => 'claude'), + getCapabilities: mock(() => ({})), })), })); diff --git a/packages/core/src/orchestrator/orchestrator-agent.ts b/packages/core/src/orchestrator/orchestrator-agent.ts index 86f704b2fc..ca86f79a68 100644 --- a/packages/core/src/orchestrator/orchestrator-agent.ts +++ b/packages/core/src/orchestrator/orchestrator-agent.ts @@ -13,9 +13,9 @@ import type { HandleMessageContext, Conversation, Codebase, - AgentRequestOptions, AttachedFile, } from '../types'; +import type { SendQueryOptions } from '@archon/providers/types'; import { ConversationNotFoundError } from '../types'; import * as db from '../db/conversations'; import * as codebaseDb from '../db/codebases'; @@ -24,7 +24,7 @@ import * as commandHandler from '../handlers/command-handler'; import { formatToolCall } from '@archon/workflows/utils/tool-formatter'; import { classifyAndFormatError } from '../utils/error-formatter'; import { toError } from '../utils/error'; -import { getAgentProvider } from '../providers/factory'; +import { getAgentProvider } from '@archon/providers'; import { getArchonHome, getArchonWorkspacesPath } from '@archon/paths'; import { syncArchonToWorktree } from '../utils/worktree-sync'; import { syncWorkspace, toRepoPath } from '@archon/git'; @@ -758,10 +758,9 @@ export async function handleMessage( // Reuse the config already loaded during workflow discovery (avoids a second disk read). // Fall back to loadConfig only when no codebase is scoped (discoveredConfig is undefined). const config = discoveredConfig ?? (await loadConfig()); - const requestOptions: AgentRequestOptions = { - ...(conversation.ai_assistant_type === 'claude' && config.assistants.claude.settingSources - ? { settingSources: config.assistants.claude.settingSources } - : {}), + const providerKey = conversation.ai_assistant_type as 'claude' | 'codex'; + const requestOptions: SendQueryOptions = { + assistantConfig: (config.assistants[providerKey] ?? {}) as Record, }; const mode = platform.getStreamingMode(); @@ -831,7 +830,7 @@ async function handleStreamMode( isolationHints: HandleMessageContext['isolationHints'], conversation: Conversation, issueContext?: string, - requestOptions?: AgentRequestOptions + requestOptions?: SendQueryOptions ): Promise { const allMessages: string[] = []; let newSessionId: string | undefined; @@ -947,7 +946,7 @@ async function handleBatchMode( isolationHints: HandleMessageContext['isolationHints'], conversation: Conversation, issueContext?: string, - requestOptions?: AgentRequestOptions + requestOptions?: SendQueryOptions ): Promise { const allChunks: { type: string; content: string }[] = []; const assistantMessages: string[] = []; diff --git a/packages/core/src/orchestrator/orchestrator-isolation.test.ts b/packages/core/src/orchestrator/orchestrator-isolation.test.ts index 4d5ddb86a6..6aabc41597 100644 --- a/packages/core/src/orchestrator/orchestrator-isolation.test.ts +++ b/packages/core/src/orchestrator/orchestrator-isolation.test.ts @@ -50,7 +50,7 @@ mock.module('../handlers/command-handler', () => ({ })), })); -mock.module('../providers/factory', () => ({ +mock.module('@archon/providers', () => ({ getAgentProvider: mock(() => null), })); diff --git a/packages/core/src/orchestrator/orchestrator.test.ts b/packages/core/src/orchestrator/orchestrator.test.ts index 18d7f6109f..8f99efff64 100644 --- a/packages/core/src/orchestrator/orchestrator.test.ts +++ b/packages/core/src/orchestrator/orchestrator.test.ts @@ -82,7 +82,7 @@ mock.module('../handlers/command-handler', () => ({ // AI provider mock const mockGetAgentProvider = mock(() => null); -mock.module('../providers/factory', () => ({ +mock.module('@archon/providers', () => ({ getAgentProvider: mockGetAgentProvider, })); @@ -699,8 +699,8 @@ describe('orchestrator-agent handleMessage', () => { // ─── settingSources forwarding ──────────────────────────────────────── - describe('settingSources forwarding', () => { - test('passes settingSources from config to AI provider for claude', async () => { + describe('assistantConfig forwarding', () => { + test('passes assistantConfig with settingSources for claude', async () => { mockLoadConfig.mockResolvedValueOnce({ botName: 'Archon', assistant: 'claude', @@ -725,11 +725,13 @@ describe('orchestrator-agent handleMessage', () => { expect.any(String), expect.any(String), expect.anything(), - expect.objectContaining({ settingSources: ['project', 'user'] }) + expect.objectContaining({ + assistantConfig: expect.objectContaining({ settingSources: ['project', 'user'] }), + }) ); }); - test('does not pass settingSources for non-claude assistant', async () => { + test('passes codex assistantConfig for codex assistant', async () => { const codexConversation: Conversation = { ...mockConversation, ai_assistant_type: 'codex', @@ -758,11 +760,12 @@ describe('orchestrator-agent handleMessage', () => { await handleMessage(platform, 'chat-456', 'hello'); - // settingSources should NOT be in requestOptions since assistant type is codex + // Should pass codex assistantConfig, not claude's const callArgs = codexClient.sendQuery.mock.calls[0]; const requestOptions = callArgs?.[3] as Record | undefined; expect(requestOptions).toBeDefined(); expect(requestOptions).not.toHaveProperty('settingSources'); + expect(requestOptions?.assistantConfig).toBeDefined(); }); }); diff --git a/packages/core/src/providers/factory.test.ts b/packages/core/src/providers/factory.test.ts deleted file mode 100644 index 6867a1bf13..0000000000 --- a/packages/core/src/providers/factory.test.ts +++ /dev/null @@ -1,48 +0,0 @@ -import { describe, test, expect } from 'bun:test'; -import { getAgentProvider } from './factory'; - -describe('factory', () => { - describe('getAgentProvider', () => { - test('returns ClaudeProvider for claude type', () => { - const provider = getAgentProvider('claude'); - - expect(provider).toBeDefined(); - expect(provider.getType()).toBe('claude'); - expect(typeof provider.sendQuery).toBe('function'); - }); - - test('returns CodexProvider for codex type', () => { - const provider = getAgentProvider('codex'); - - expect(provider).toBeDefined(); - expect(provider.getType()).toBe('codex'); - expect(typeof provider.sendQuery).toBe('function'); - }); - - test('throws error for unknown type', () => { - expect(() => getAgentProvider('unknown')).toThrow( - "Unknown provider type: unknown. Supported types: 'claude', 'codex'" - ); - }); - - test('throws error for empty string', () => { - expect(() => getAgentProvider('')).toThrow( - "Unknown provider type: . Supported types: 'claude', 'codex'" - ); - }); - - test('is case sensitive - Claude throws', () => { - expect(() => getAgentProvider('Claude')).toThrow( - "Unknown provider type: Claude. Supported types: 'claude', 'codex'" - ); - }); - - test('each call returns new instance', () => { - const provider1 = getAgentProvider('claude'); - const provider2 = getAgentProvider('claude'); - - // Each call should return a new instance - expect(provider1).not.toBe(provider2); - }); - }); -}); diff --git a/packages/core/src/providers/index.ts b/packages/core/src/providers/index.ts deleted file mode 100644 index 55c0a55160..0000000000 --- a/packages/core/src/providers/index.ts +++ /dev/null @@ -1,16 +0,0 @@ -/** - * Agent Providers - * - * Prefer importing from '@archon/core' for most use cases: - * import { ClaudeProvider, getAgentProvider } from '@archon/core'; - * - * Use this submodule path when you only need provider-specific code: - * import { ClaudeProvider } from '@archon/core/providers'; - */ - -export { ClaudeProvider } from './claude'; -export { CodexProvider } from './codex'; -export { getAgentProvider } from './factory'; - -// Re-export types for consumers importing from this submodule directly -export type { IAgentProvider, MessageChunk } from '../types'; diff --git a/packages/core/src/services/title-generator.test.ts b/packages/core/src/services/title-generator.test.ts index ddea0d7df0..0d85e43c78 100644 --- a/packages/core/src/services/title-generator.test.ts +++ b/packages/core/src/services/title-generator.test.ts @@ -36,7 +36,7 @@ const mockGetAgentProvider = mock(() => ({ getType: () => 'claude', })); -mock.module('../providers/factory', () => ({ +mock.module('@archon/providers', () => ({ getAgentProvider: mockGetAgentProvider, })); @@ -167,11 +167,14 @@ describe('title-generator', () => { expect(optionsArg.model).toBeUndefined(); }); - test('passes tools: [] to disable tool access', async () => { + test('passes nodeConfig with allowed_tools: [] to disable tool access', async () => { await generateAndSetTitle('conv-11', 'Some message', 'claude', '/tmp'); - const optionsArg = mockSendQuery.mock.calls[0][3] as { model?: string; tools?: string[] }; - expect(optionsArg.tools).toEqual([]); + const optionsArg = mockSendQuery.mock.calls[0][3] as { + model?: string; + nodeConfig?: { allowed_tools?: string[] }; + }; + expect(optionsArg.nodeConfig?.allowed_tools).toEqual([]); }); test('handles double failure gracefully (AI fails + fallback DB write fails)', async () => { diff --git a/packages/core/src/services/title-generator.ts b/packages/core/src/services/title-generator.ts index 97412029cc..fdb9cdaab8 100644 --- a/packages/core/src/services/title-generator.ts +++ b/packages/core/src/services/title-generator.ts @@ -5,7 +5,7 @@ * Optionally uses TITLE_GENERATION_MODEL env var for a cheaper/faster model. * Designed to be fire-and-forget — never throws, all errors logged internally. */ -import { getAgentProvider } from '../providers/factory'; +import { getAgentProvider } from '@archon/providers'; import * as conversationDb from '../db/conversations'; import { createLogger } from '@archon/paths'; @@ -52,7 +52,7 @@ export async function generateAndSetTitle( for await (const chunk of client.sendQuery(titlePrompt, cwd, undefined, { model: titleModel, - tools: [], // No tool access — pure text generation + nodeConfig: { allowed_tools: [] }, // No tool access — pure text generation })) { if (chunk.type === 'assistant') { generatedTitle += chunk.content; diff --git a/packages/core/src/types/index.ts b/packages/core/src/types/index.ts index 095c04a73a..c847122c74 100644 --- a/packages/core/src/types/index.ts +++ b/packages/core/src/types/index.ts @@ -3,9 +3,11 @@ */ import type { TransitionTrigger } from '../state/session-transitions'; import type { WorkflowDefinition } from '@archon/workflows/schemas/workflow'; -import type { McpServerConfig, AgentDefinition } from '@anthropic-ai/claude-agent-sdk'; import { z } from 'zod'; +// MessageChunk imported for use in IPlatformAdapter/IWebPlatformAdapter below +import type { MessageChunk } from '@archon/providers/types'; + /** * Custom error for when a conversation is not found during update operations * Allows callers to programmatically handle this specific error case @@ -182,53 +184,7 @@ export function isWebAdapter(adapter: IPlatformAdapter): adapter is IWebPlatform return adapter.getPlatformType() === 'web'; } -/** - * Message chunk from AI assistant. - * Discriminated union with per-type required fields for type safety. - */ -export interface TokenUsage { - input: number; - output: number; - total?: number; - cost?: number; -} - -export type MessageChunk = - | { type: 'assistant'; content: string } - | { type: 'system'; content: string } - | { type: 'thinking'; content: string } - | { - type: 'result'; - sessionId?: string; - tokens?: TokenUsage; - structuredOutput?: unknown; - isError?: boolean; - errorSubtype?: string; - cost?: number; - stopReason?: string; - numTurns?: number; - modelUsage?: Record; - } - | { type: 'rate_limit'; rateLimitInfo: Record } - | { - type: 'tool'; - toolName: string; - toolInput?: Record; - /** Stable per-call ID from the underlying SDK (e.g. Claude `tool_use_id`). - * When present, the platform adapter uses it directly instead of generating - * one — guarantees `tool_call`/`tool_result` pair correctly even when - * multiple tools with the same name run concurrently. */ - toolCallId?: string; - } - | { - type: 'tool_result'; - toolName: string; - toolOutput: string; - /** Matching ID for the originating `tool` chunk. See `tool` variant above. */ - toolCallId?: string; - } - | { type: 'workflow_dispatch'; workerConversationId: string; workflowName: string }; - +// Re-export workflow schema types for config-types.ts compatibility import type { ModelReasoningEffort, WebSearchMode } from '@archon/workflows/schemas/workflow'; export type { ModelReasoningEffort, WebSearchMode }; import type { @@ -237,147 +193,3 @@ import type { SandboxSettings, } from '@archon/workflows/schemas/dag-node'; export type { EffortLevel, ThinkingConfig, SandboxSettings }; - -export interface AgentRequestOptions { - model?: string; - modelReasoningEffort?: ModelReasoningEffort; - webSearchMode?: WebSearchMode; - additionalDirectories?: string[]; - /** - * Restrict the set of built-in tools available to the assistant. - * - `[]` — disable all built-in tools (Claude SDK only; Codex ignores this field) - * - `string[]` — restrict to the named tools - * Omit entirely to use the assistant's default tool set. - * Note: `undefined` (omitted) and `[]` have different semantics — do not confuse them. - */ - tools?: string[]; - /** - * Remove specific tools from the assistant's available set. - * Applied after `tools` whitelist (if both are set, denied tools are removed from the whitelist result). - * Claude SDK only — Codex ignores this field. - */ - disallowedTools?: string[]; - /** - * Structured output schema. - * Claude: passed as outputFormat option to Claude Agent SDK. - * Codex: passed as outputSchema in TurnOptions to Codex SDK (v0.116.0+). - * Shape: { type: 'json_schema', schema: } - */ - outputFormat?: { type: 'json_schema'; schema: Record }; - /** SDK hooks configuration. Passed directly to Claude Agent SDK Options.hooks. Claude only — ignored for Codex. */ - hooks?: Partial< - Record< - string, - { - matcher?: string; - hooks: (( - input: unknown, - toolUseID: string | undefined, - options: { signal: AbortSignal } - ) => Promise)[]; - timeout?: number; - }[] - > - >; - /** - * MCP server configuration passed to Claude Agent SDK Options.mcpServers. - * Uses SDK type directly — @archon/core already depends on the SDK. - * Claude only — Codex ignores this. - */ - mcpServers?: Record; - /** Tools to auto-allow without permission prompts (e.g., MCP tool wildcards). - * Passed to Claude Agent SDK Options.allowedTools. Claude only. */ - allowedTools?: string[]; - /** Custom subagent definitions passed to Claude Agent SDK Options.agents. - * Used for per-node skill scoping via AgentDefinition wrapping. Claude only. */ - agents?: Record; - /** Name of agent definition for the main thread. References a key in `agents`. Claude only. */ - agent?: string; - /** - * Abort signal for cancelling in-flight AI requests. - * When aborted, the AI client should terminate the subprocess/query gracefully. - */ - abortSignal?: AbortSignal; - /** - * When false (default), skips writing session transcript to ~/.claude/projects/. - * Claude Agent SDK v0.2.74+. The SDK default is true, but Archon overrides it to false - * to avoid disk pollution. Set to true only when session persistence is explicitly needed. - */ - persistSession?: boolean; - /** - * When true, the SDK copies the prior session's history into a new session file - * before appending, leaving the original untouched. Use with `resume` to safely - * preserve conversation context without risk of corrupting the source session. - * Claude only — ignored for Codex. - */ - forkSession?: boolean; - /** - * Claude Code settingSources — controls which CLAUDE.md files are loaded. - * Passed directly to Claude Agent SDK Options.settingSources. - * Claude only — ignored for Codex. - * @default ['project'] - */ - settingSources?: ('project' | 'user')[]; - /** - * Additional env vars merged into Claude subprocess environment after buildSubprocessEnv(). - * Final env: { ...buildSubprocessEnv(), ...env } (auth tokens conditionally filtered). - * Claude only — Codex SDK does not support env injection. - */ - env?: Record; - /** - * Controls reasoning depth for Claude. Claude only — ignored for Codex. - */ - effort?: EffortLevel; - /** - * Controls Claude's thinking/reasoning behavior. Claude only — ignored for Codex. - */ - thinking?: ThinkingConfig; - /** - * Maximum USD cost budget. SDK returns error_max_budget_usd result if exceeded. - * Claude only — ignored for Codex. - */ - maxBudgetUsd?: number; - /** - * Per-node system prompt string. Overrides the default claude_code preset. - * Claude only — ignored for Codex. - */ - systemPrompt?: string; - /** - * Fallback model if primary fails. Claude only — ignored for Codex. - */ - fallbackModel?: string; - /** - * SDK beta feature flags. Claude only — ignored for Codex. - */ - betas?: string[]; - /** - * OS-level sandbox settings passed to Claude subprocess. - * Claude only — ignored for Codex. - */ - sandbox?: SandboxSettings; -} - -/** - * Generic agent provider interface - * Allows supporting multiple agent providers (Claude, Codex, etc.) - */ -export interface IAgentProvider { - /** - * Send a message and get streaming response - * @param prompt - User message or prompt - * @param cwd - Working directory for the provider - * @param resumeSessionId - Optional session ID to resume - * @param options - Optional request options (model, provider-specific settings) - */ - sendQuery( - prompt: string, - cwd: string, - resumeSessionId?: string, - options?: AgentRequestOptions - ): AsyncGenerator; - - /** - * Get the provider type identifier - */ - getType(): string; -} diff --git a/packages/core/src/workflows/store-adapter.test.ts b/packages/core/src/workflows/store-adapter.test.ts index 36fda8759b..f193a2075c 100644 --- a/packages/core/src/workflows/store-adapter.test.ts +++ b/packages/core/src/workflows/store-adapter.test.ts @@ -44,7 +44,7 @@ mock.module('../db/codebases', () => ({ getCodebase: mockGetCodebase, })); -mock.module('../providers/factory', () => ({ +mock.module('@archon/providers', () => ({ getAgentProvider: mock(() => ({})), })); diff --git a/packages/core/src/workflows/store-adapter.ts b/packages/core/src/workflows/store-adapter.ts index e370460f9f..67040fda93 100644 --- a/packages/core/src/workflows/store-adapter.ts +++ b/packages/core/src/workflows/store-adapter.ts @@ -10,7 +10,7 @@ import * as workflowDb from '../db/workflows'; import * as workflowEventDb from '../db/workflow-events'; import * as codebaseDb from '../db/codebases'; import * as envVarDb from '../db/env-vars'; -import { getAgentProvider } from '../providers/factory'; +import { getAgentProvider } from '@archon/providers'; import { loadConfig as loadMergedConfig } from '../config/config-loader'; import { createLogger } from '@archon/paths'; diff --git a/packages/docs-web/src/content/docs/reference/architecture.md b/packages/docs-web/src/content/docs/reference/architecture.md index 4aaa03d144..1a5badb8f7 100644 --- a/packages/docs-web/src/content/docs/reference/architecture.md +++ b/packages/docs-web/src/content/docs/reference/architecture.md @@ -328,7 +328,7 @@ interface MessageChunk { ### Implementation Guide -**1. Create provider file:** `packages/core/src/providers/your-assistant.ts` +**1. Create provider file:** `packages/providers/src/your-assistant/provider.ts` **2. Implement the interface:** @@ -377,7 +377,7 @@ export class YourAssistantProvider implements IAgentProvider { } ``` -**3. Register in factory:** `packages/core/src/providers/factory.ts` +**3. Register in factory:** `packages/providers/src/factory.ts` ```typescript import { YourAssistantProvider } from './your-assistant'; @@ -440,7 +440,7 @@ if (trigger && shouldCreateNewSession(trigger)) { Different SDKs use different event types. Map them to MessageChunk types: -**Claude Code SDK** (`packages/core/src/providers/claude.ts`): +**Claude Code SDK** (`packages/providers/src/claude/provider.ts`): ```typescript for await (const msg of query({ prompt, options })) { @@ -462,7 +462,7 @@ for await (const msg of query({ prompt, options })) { } ``` -**Codex SDK** (`packages/core/src/providers/codex.ts`): +**Codex SDK** (`packages/providers/src/codex/provider.ts`): ```typescript for await (const event of result.events) { @@ -1238,12 +1238,12 @@ Post single comment on issue with summary ### Adding a New AI Agent Provider -- [ ] Create `packages/core/src/providers/your-assistant.ts` +- [ ] Create `packages/providers/src/your-assistant/provider.ts` - [ ] Implement `IAgentProvider` interface - [ ] Map SDK events to `MessageChunk` types - [ ] Handle session creation and resumption - [ ] Implement error handling and recovery -- [ ] Add to `packages/core/src/providers/factory.ts` +- [ ] Add to `packages/providers/src/factory.ts` - [ ] Add environment variables to `.env.example` - [ ] Test session persistence across restarts - [ ] Test plan-to-execute transition (new session) @@ -1364,7 +1364,7 @@ Context is passed as a dedicated `issueContext` parameter to `handleMessage()`, **For detailed implementation examples, see:** - Platform adapter: `packages/adapters/src/chat/telegram/adapter.ts`, `packages/adapters/src/forge/github/adapter.ts` -- AI provider: `packages/core/src/providers/claude.ts`, `packages/core/src/providers/codex.ts` +- AI provider: `packages/providers/src/claude/provider.ts`, `packages/providers/src/codex/provider.ts` - Isolation provider: `packages/isolation/src/providers/worktree.ts` - Isolation resolver: `packages/isolation/src/resolver.ts` - Isolation factory: `packages/isolation/src/factory.ts` diff --git a/packages/providers/package.json b/packages/providers/package.json new file mode 100644 index 0000000000..2ef285486a --- /dev/null +++ b/packages/providers/package.json @@ -0,0 +1,33 @@ +{ + "name": "@archon/providers", + "version": "0.3.6", + "type": "module", + "main": "./src/index.ts", + "types": "./src/index.ts", + "exports": { + ".": "./src/index.ts", + "./types": "./src/types.ts", + "./claude/provider": "./src/claude/provider.ts", + "./claude/config": "./src/claude/config.ts", + "./codex/provider": "./src/codex/provider.ts", + "./codex/config": "./src/codex/config.ts", + "./codex/binary-resolver": "./src/codex/binary-resolver.ts", + "./errors": "./src/errors.ts", + "./factory": "./src/factory.ts" + }, + "scripts": { + "test": "bun test src/claude/provider.test.ts && bun test src/codex/provider.test.ts && bun test src/factory.test.ts && bun test src/codex/binary-guard.test.ts && bun test src/codex/binary-resolver.test.ts && bun test src/codex/binary-resolver-dev.test.ts", + "type-check": "bun x tsc --noEmit" + }, + "dependencies": { + "@anthropic-ai/claude-agent-sdk": "^0.2.89", + "@archon/paths": "workspace:*", + "@openai/codex-sdk": "^0.116.0" + }, + "devDependencies": { + "pino": "^9" + }, + "peerDependencies": { + "typescript": "^5.0.0" + } +} diff --git a/packages/providers/src/claude/config.ts b/packages/providers/src/claude/config.ts new file mode 100644 index 0000000000..3dca726e5f --- /dev/null +++ b/packages/providers/src/claude/config.ts @@ -0,0 +1,31 @@ +/** + * Typed config parsing for Claude provider defaults. + * Validates and narrows the opaque assistantConfig to typed fields. + */ +import type { ClaudeProviderDefaults } from '../types'; + +// Re-export so consumers can import the type from either location +export type { ClaudeProviderDefaults } from '../types'; + +/** + * Parse raw assistantConfig into typed Claude defaults. + * Defensive: invalid fields are silently dropped (not thrown). + */ +export function parseClaudeConfig(raw: Record): ClaudeProviderDefaults { + const result: ClaudeProviderDefaults = {}; + + if (typeof raw.model === 'string') { + result.model = raw.model; + } + + if (Array.isArray(raw.settingSources)) { + const valid = raw.settingSources.filter( + (s): s is 'project' | 'user' => s === 'project' || s === 'user' + ); + if (valid.length > 0) { + result.settingSources = valid; + } + } + + return result; +} diff --git a/packages/providers/src/claude/index.ts b/packages/providers/src/claude/index.ts new file mode 100644 index 0000000000..cc540542e4 --- /dev/null +++ b/packages/providers/src/claude/index.ts @@ -0,0 +1,8 @@ +export { ClaudeProvider } from './provider'; +export { parseClaudeConfig, type ClaudeProviderDefaults } from './config'; +export { + loadMcpConfig, + buildSDKHooksFromYAML, + withFirstMessageTimeout, + getProcessUid, +} from './provider'; diff --git a/packages/core/src/providers/claude.test.ts b/packages/providers/src/claude/provider.test.ts similarity index 77% rename from packages/core/src/providers/claude.test.ts rename to packages/providers/src/claude/provider.test.ts index b760837278..29503bb517 100644 --- a/packages/core/src/providers/claude.test.ts +++ b/packages/providers/src/claude/provider.test.ts @@ -1,4 +1,4 @@ -import { describe, test, expect, mock, beforeEach, afterEach, spyOn } from 'bun:test'; +import { describe, test, expect, mock, beforeEach, spyOn } from 'bun:test'; import { createMockLogger } from '../test/mocks/logger'; const mockLogger = createMockLogger(); @@ -16,11 +16,8 @@ mock.module('@anthropic-ai/claude-agent-sdk', () => ({ query: mockQuery, })); -import { ClaudeProvider } from './claude'; -import * as claudeModule from './claude'; -import * as codebaseDb from '../db/codebases'; -import * as envLeakScanner from '../utils/env-leak-scanner'; -import * as configLoader from '../config/config-loader'; +import { ClaudeProvider } from './provider'; +import * as claudeModule from './provider'; describe('ClaudeProvider', () => { let client: ClaudeProvider; @@ -62,6 +59,26 @@ describe('ClaudeProvider', () => { }); }); + describe('getCapabilities', () => { + test('returns full capability set for Claude provider', () => { + const caps = client.getCapabilities(); + expect(caps).toEqual({ + sessionResume: true, + mcp: true, + hooks: true, + skills: true, + toolRestrictions: true, + structuredOutput: true, + envInjection: true, + costControl: true, + effortControl: true, + thinkingControl: true, + fallbackModel: true, + sandbox: true, + }); + }); + }); + describe('sendQuery', () => { test('yields text events from assistant messages', async () => { mockQuery.mockImplementation(async function* () { @@ -306,7 +323,6 @@ describe('ClaudeProvider', () => { }); // Consume the generator - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('my prompt', '/my/workspace', undefined, { model: 'sonnet', })) { @@ -328,7 +344,6 @@ describe('ClaudeProvider', () => { // Empty generator }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } @@ -343,7 +358,6 @@ describe('ClaudeProvider', () => { // Empty generator }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace', undefined, { persistSession: true, })) { @@ -363,7 +377,6 @@ describe('ClaudeProvider', () => { // Empty generator }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('prompt', '/workspace', 'session-to-resume')) { // consume } @@ -447,9 +460,6 @@ describe('ClaudeProvider', () => { }); test('subprocess env passes through all process.env keys (no allowlist filtering)', async () => { - // With the allowlist removed, buildSubprocessEnv returns { ...process.env }. - // CWD .env leakage and CLAUDECODE markers are handled at entry point by - // stripCwdEnv(), not by buildSubprocessEnv(). See #1067, #1097. const originalKey = process.env.CUSTOM_USER_KEY; process.env.CUSTOM_USER_KEY = 'user-trusted-value'; @@ -457,7 +467,6 @@ describe('ClaudeProvider', () => { // Empty generator }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } @@ -549,35 +558,29 @@ describe('ClaudeProvider', () => { }); test('classifies "Operation aborted" errors as crash and retries', async () => { - // Simulates the SDK cleanup race: PostToolUse hook writes to a closed pipe - // after a DAG node abort. Should be classified as 'crash' (not 'unknown') - // so the retry path is taken. const error = new Error('Operation aborted'); mockQuery.mockImplementation(async function* () { throw error; }); const consumeGenerator = async (): Promise => { - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } }; - // crash classification = retried up to 3 times → 4 total calls + // crash classification = retried up to 3 times -> 4 total calls await expect(consumeGenerator()).rejects.toThrow(/Claude Code crash/); expect(mockQuery).toHaveBeenCalledTimes(4); }, 5_000); test('classifies mixed-case "OPERATION ABORTED" errors as crash', async () => { - // Pattern matching uses .toLowerCase() — case must not matter const error = new Error('OPERATION ABORTED'); mockQuery.mockImplementation(async function* () { throw error; }); const consumeGenerator = async (): Promise => { - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/workspace')) { // consume } @@ -588,8 +591,6 @@ describe('ClaudeProvider', () => { }, 5_000); test('captures all stderr output for diagnostics', async () => { - // When the subprocess crashes, the enriched error should include all stderr, - // not just lines matching error keywords mockQuery.mockImplementation(async function* (args: { options: { stderr?: (data: string) => void }; }) { @@ -608,7 +609,7 @@ describe('ClaudeProvider', () => { } }; - // Use rejects so assertions always execute — prevents vacuous pass when mock doesn't throw + // Use rejects so assertions always execute const err = await consumeGenerator().catch((e: unknown) => e as Error); expect(err).toBeInstanceOf(Error); // The error should contain stderr context from ALL captured lines @@ -617,14 +618,13 @@ describe('ClaudeProvider', () => { expect(err.message).toContain('startup diagnostic'); }, 5_000); - test('passes settingSources from request options', async () => { + test('passes settingSources from assistantConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'test-session' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { - settingSources: ['project', 'user'], + assistantConfig: { settingSources: ['project', 'user'] }, })) { // consume } @@ -639,7 +639,6 @@ describe('ClaudeProvider', () => { yield { type: 'result', session_id: 'test-session' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp')) { // consume } @@ -654,7 +653,6 @@ describe('ClaudeProvider', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { env: { MY_SECRET: 'abc123' }, })) { @@ -675,8 +673,7 @@ describe('ClaudeProvider', () => { yield { type: 'result', session_id: 'sid' }; }); - // HOME is always in process.env — override it to verify priority - // eslint-disable-next-line @typescript-eslint/no-unused-vars + // HOME is always in process.env -- override it to verify priority for await (const _ of client.sendQuery('test', '/tmp', undefined, { env: { HOME: '/custom/home' }, })) { @@ -689,13 +686,14 @@ describe('ClaudeProvider', () => { expect(env.HOME).toBe('/custom/home'); }); - test('passes effort to SDK when provided', async () => { + test('passes effort to SDK via nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of client.sendQuery('test', '/tmp', undefined, { effort: 'high' })) { + for await (const _ of client.sendQuery('test', '/tmp', undefined, { + nodeConfig: { effort: 'high' }, + })) { // consume } @@ -704,12 +702,11 @@ describe('ClaudeProvider', () => { expect(callArgs.options.effort).toBe('high'); }); - test('omits effort from SDK when not provided', async () => { + test('omits effort from SDK when not provided in nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp')) { // consume } @@ -719,14 +716,13 @@ describe('ClaudeProvider', () => { expect(callArgs.options).not.toHaveProperty('effort'); }); - test('passes thinking object to SDK', async () => { + test('passes thinking object to SDK via nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { - thinking: { type: 'enabled', budgetTokens: 8000 }, + nodeConfig: { thinking: { type: 'enabled', budgetTokens: 8000 } }, })) { // consume } @@ -741,7 +737,6 @@ describe('ClaudeProvider', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { maxBudgetUsd: 5.0 })) { // consume } @@ -756,7 +751,6 @@ describe('ClaudeProvider', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { systemPrompt: 'You are a security reviewer', })) { @@ -773,7 +767,6 @@ describe('ClaudeProvider', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp')) { // consume } @@ -788,7 +781,6 @@ describe('ClaudeProvider', () => { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { fallbackModel: 'claude-haiku-4-5', })) { @@ -800,14 +792,13 @@ describe('ClaudeProvider', () => { expect(callArgs.options.fallbackModel).toBe('claude-haiku-4-5'); }); - test('passes betas array to SDK', async () => { + test('passes betas array to SDK via nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test', '/tmp', undefined, { - betas: ['context-1m-2025-08-07'], + nodeConfig: { betas: ['context-1m-2025-08-07'] }, })) { // consume } @@ -817,15 +808,16 @@ describe('ClaudeProvider', () => { expect(callArgs.options.betas).toEqual(['context-1m-2025-08-07']); }); - test('passes sandbox object to SDK', async () => { + test('passes sandbox object to SDK via nodeConfig', async () => { mockQuery.mockImplementation(async function* () { yield { type: 'result', session_id: 'sid' }; }); const sandbox = { enabled: true, network: { allowedDomains: [] } }; - // eslint-disable-next-line @typescript-eslint/no-unused-vars - for await (const _ of client.sendQuery('test', '/tmp', undefined, { sandbox })) { + for await (const _ of client.sendQuery('test', '/tmp', undefined, { + nodeConfig: { sandbox }, + })) { // consume } @@ -857,157 +849,6 @@ describe('ClaudeProvider', () => { expect(chunks[0]).toEqual({ type: 'assistant', content: 'Real content' }); }); }); - - describe('pre-spawn env leak gate', () => { - let spyFindByDefaultCwd: ReturnType; - let spyFindByPathPrefix: ReturnType; - let spyScan: ReturnType; - - beforeEach(() => { - spyFindByDefaultCwd = spyOn(codebaseDb, 'findCodebaseByDefaultCwd').mockResolvedValue(null); - spyFindByPathPrefix = spyOn(codebaseDb, 'findCodebaseByPathPrefix').mockResolvedValue(null); - spyScan = spyOn(envLeakScanner, 'scanPathForSensitiveKeys').mockReturnValue({ - path: '/workspace', - findings: [], - }); - mockQuery.mockImplementation(async function* () { - yield { type: 'result', session_id: 'sid-gate' }; - }); - }); - - afterEach(() => { - spyFindByDefaultCwd.mockRestore(); - spyFindByPathPrefix.mockRestore(); - spyScan.mockRestore(); - }); - - test('throws EnvLeakError when .env contains sensitive keys and registered codebase has no consent', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: false, - default_cwd: '/workspace', - }); - spyScan.mockReturnValueOnce({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - - await expect(async () => { - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume - } - }).toThrow('Cannot run workflow'); - }); - - test('skips scan entirely when cwd is not a registered codebase', async () => { - // Both lookups return null (default from beforeEach) → unregistered cwd. - // Even if sensitive keys would be present, the pre-spawn check must not run - // because the canonical gate is registerRepoAtPath, not sendQuery. - spyScan.mockReturnValue({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } - - expect(spyScan).not.toHaveBeenCalled(); - expect(chunks).toHaveLength(1); - }); - - test('skips scan when codebase has allow_env_keys: true', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: true, - default_cwd: '/workspace', - }); - - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } - - expect(spyScan).not.toHaveBeenCalled(); - expect(chunks).toHaveLength(1); - }); - - test('proceeds without scanning when cwd has no registered codebase', async () => { - // Unregistered cwd — the pre-spawn safety net is out of scope. - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } - - expect(spyScan).not.toHaveBeenCalled(); - expect(chunks).toHaveLength(1); - }); - - test('skips scan when allowTargetRepoKeys is true in merged config', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: false, - default_cwd: '/workspace', - }); - const spyLoadConfig = spyOn(configLoader, 'loadConfig').mockResolvedValueOnce({ - allowTargetRepoKeys: true, - } as Awaited>); - // Even though scanner would return a finding, the config bypass must short-circuit - spyScan.mockReturnValueOnce({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } - - expect(spyScan).not.toHaveBeenCalled(); - expect(chunks).toHaveLength(1); - spyLoadConfig.mockRestore(); - }); - - test('falls back to scanner when loadConfig throws (fail-closed)', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: false, - default_cwd: '/workspace', - }); - const spyLoadConfig = spyOn(configLoader, 'loadConfig').mockRejectedValueOnce( - new Error('YAML parse error') - ); - spyScan.mockReturnValueOnce({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], - }); - - await expect(async () => { - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume - } - }).toThrow('Cannot run workflow'); - expect(spyScan).toHaveBeenCalled(); - spyLoadConfig.mockRestore(); - }); - - test('uses prefix lookup for worktree paths when exact match returns null', async () => { - spyFindByPathPrefix.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: true, - default_cwd: '/workspace/source', - }); - - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace/worktrees/feature')) { - chunks.push(chunk); - } - - expect(spyFindByPathPrefix).toHaveBeenCalledWith('/workspace/worktrees/feature'); - expect(spyScan).not.toHaveBeenCalled(); - }); - }); }); describe('withFirstMessageTimeout', () => { diff --git a/packages/core/src/providers/claude.ts b/packages/providers/src/claude/provider.ts similarity index 60% rename from packages/core/src/providers/claude.ts rename to packages/providers/src/claude/provider.ts index 0d8c6d4596..7b2f0f44df 100644 --- a/packages/core/src/providers/claude.ts +++ b/packages/providers/src/claude/provider.ts @@ -4,8 +4,7 @@ * * Type Safety Pattern: * - Uses `Options` type from SDK for query configuration - * - SDK message types (SDKMessage, SDKAssistantMessage, etc.) have strict - * type checking that requires explicit type handling for content blocks + * - SDK message types have strict type checking for content blocks * - Content blocks are typed via inline assertions for clarity * * Authentication: @@ -19,29 +18,19 @@ import { type HookCallback, type HookCallbackMatcher, } from '@anthropic-ai/claude-agent-sdk'; -// The `/embed` entry point uses `import ... with { type: 'file' }` to embed -// the SDK's `cli.js` into the compiled binary's $bunfs virtual filesystem, -// then extracts it to a temp path at runtime so the subprocess can exec it. -// Without this, the SDK falls back to resolving `cli.js` from -// `import.meta.url` of its own module — which bun freezes at build time to -// the build host's absolute node_modules path, producing a "Module not found -// /Users/runner/..." error on any machine other than the CI runner. -// Safe in dev too: resolves to the real on-disk cli.js. import cliPath from '@anthropic-ai/claude-agent-sdk/embed'; -import { - type AgentRequestOptions, - type IAgentProvider, - type MessageChunk, - type TokenUsage, +import type { + IAgentProvider, + SendQueryOptions, + MessageChunk, + TokenUsage, + ProviderCapabilities, + NodeConfig, } from '../types'; +import { parseClaudeConfig } from './config'; import { createLogger } from '@archon/paths'; -// No env filtering here — process.env is already clean: -// stripCwdEnv() at entry point stripped CWD .env keys + CLAUDECODE markers, -// then ~/.archon/.env was loaded as the trusted source. All keys the user sets -// in ~/.archon/.env are intentional and pass through to the subprocess. -import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner'; -import * as codebaseDb from '../db/codebases'; -import { loadConfig } from '../config/config-loader'; +import { readFile } from 'fs/promises'; +import { resolve, isAbsolute } from 'path'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ let cachedLog: ReturnType | undefined; @@ -52,14 +41,12 @@ function getLog(): ReturnType { /** * Content block type for assistant messages - * Represents text or tool_use blocks from Claude API responses */ interface ContentBlock { type: 'text' | 'tool_use'; text?: string; name?: string; input?: Record; - /** Stable Anthropic `tool_use_id` — used to pair `tool_call`/`tool_result` events. */ id?: string; } @@ -73,7 +60,6 @@ function normalizeClaudeUsage(usage?: { const output = usage.output_tokens; if (typeof input !== 'number' || typeof output !== 'number') return undefined; const total = usage.total_tokens; - return { input, output, @@ -87,14 +73,6 @@ function normalizeClaudeUsage(usage?: { * process.env is already clean at this point: * - stripCwdEnv() at entry point removed CWD .env keys + CLAUDECODE markers * - ~/.archon/.env loaded with override:true as the trusted source - * - * Auth mode is determined by the SDK based on what tokens are present: - * - Tokens in env → SDK uses them (explicit auth) - * - No tokens → SDK uses `claude /login` credentials (global auth) - * - User controls this by what they put in ~/.archon/.env - * - * We log the detected mode for diagnostics but don't filter — the user's - * config is trusted. See coleam00/Archon#1067 for design rationale. */ function buildSubprocessEnv(): NodeJS.ProcessEnv { const hasExplicitTokens = Boolean( @@ -105,23 +83,14 @@ function buildSubprocessEnv(): NodeJS.ProcessEnv { { authMode }, authMode === 'global' ? 'using_global_auth' : 'using_explicit_tokens' ); - return { ...process.env }; } -/** Max retries for transient subprocess failures (3 = 4 total attempts). - * SDK subprocess crashes (exit code 1) are often intermittent — AJV schema validation - * regressions, stale HTTP/2 connections, and other transient SDK issues typically - * succeed on retry 3 or 4. See: anthropics/claude-code#22973, claude-code-action#853 */ +/** Max retries for transient subprocess failures */ const MAX_SUBPROCESS_RETRIES = 3; - -/** Delay between retries in milliseconds */ const RETRY_BASE_DELAY_MS = 2000; -/** Patterns indicating rate limiting in stderr/error messages */ const RATE_LIMIT_PATTERNS = ['rate limit', 'too many requests', '429', 'overloaded']; - -/** Patterns indicating auth issues in stderr/error messages */ const AUTH_PATTERNS = [ 'credit balance', 'unauthorized', @@ -130,17 +99,7 @@ const AUTH_PATTERNS = [ '401', '403', ]; - -/** Patterns indicating the subprocess crashed (transient, worth retrying) */ -const SUBPROCESS_CRASH_PATTERNS = [ - 'exited with code', - 'killed', - 'signal', - // "Operation aborted" can appear when the SDK's PostToolUse hook tries to write() - // back to a subprocess pipe that was closed by an abort signal. This is a race - // condition in SDK cleanup — safe to classify as a crash and retry. - 'operation aborted', -]; +const SUBPROCESS_CRASH_PATTERNS = ['exited with code', 'killed', 'signal', 'operation aborted']; function classifySubprocessError( errorMessage: string, @@ -153,7 +112,6 @@ function classifySubprocessError( return 'unknown'; } -/** Default timeout for first SDK message (ms). Configurable via env var. */ function getFirstEventTimeoutMs(): number { const raw = process.env.ARCHON_CLAUDE_FIRST_EVENT_TIMEOUT_MS; if (raw) { @@ -163,7 +121,6 @@ function getFirstEventTimeoutMs(): number { return 60_000; } -/** Build a diagnostic payload for claude.first_event_timeout log */ function buildFirstEventHangDiagnostics( subprocessEnv: Record, model: string | undefined @@ -182,16 +139,11 @@ function buildFirstEventHangDiagnostics( }; } -/** Sentinel error class to identify timeout rejections in withFirstMessageTimeout. */ class FirstEventTimeoutError extends Error {} /** * Wraps an async generator so that the first call to .next() must resolve - * within `timeoutMs`. If it doesn't, aborts the controller and throws a - * descriptive error. Subsequent .next() calls are forwarded directly. - * - * Uses Promise.race() — not just AbortController — because the pathological - * case is "SDK ignores abort", so we need an independent unblocking mechanism. + * within `timeoutMs`. If it doesn't, aborts the controller and throws. */ export async function* withFirstMessageTimeout( gen: AsyncGenerator, @@ -199,7 +151,6 @@ export async function* withFirstMessageTimeout( timeoutMs: number, diagnostics: Record ): AsyncGenerator { - // Race first event against timeout let timerId: ReturnType | undefined; let firstValue: IteratorResult; try { @@ -230,31 +181,310 @@ export async function* withFirstMessageTimeout( if (firstValue.done) return; yield firstValue.value; - - // Forward remaining events directly yield* gen; } /** - * Returns the current process UID, or undefined on platforms that don't support it (e.g. Windows). - * Exported for testing — spyOn(claudeModule, 'getProcessUid') works cross-platform. + * Returns the current process UID, or undefined on platforms that don't support it. */ export function getProcessUid(): number | undefined { return typeof process.getuid === 'function' ? process.getuid() : undefined; } +// ─── MCP Config Loading (absorbed from dag-executor) ─────────────────────── + /** - * Claude AI agent provider - * Implements generic IAgentProvider interface + * Expand $VAR_NAME references in string-valued records from process.env. + */ +function expandEnvVarsInRecord( + record: Record, + missingVars: string[] +): Record { + const result: Record = {}; + for (const [key, val] of Object.entries(record)) { + if (typeof val !== 'string') { + getLog().warn({ key, valueType: typeof val }, 'mcp_env_value_coerced_to_string'); + result[key] = String(val); + continue; + } + result[key] = val.replace(/\$([A-Z_][A-Z0-9_]*)/g, (_, varName: string) => { + const envVal = process.env[varName]; + if (envVal === undefined) { + missingVars.push(varName); + } + return envVal ?? ''; + }); + } + return result; +} + +function expandEnvVars(config: Record): { + expanded: Record; + missingVars: string[]; +} { + const result: Record = {}; + const missingVars: string[] = []; + for (const [serverName, serverConfig] of Object.entries(config)) { + if (typeof serverConfig !== 'object' || serverConfig === null) { + getLog().warn({ serverName, valueType: typeof serverConfig }, 'mcp_server_config_not_object'); + continue; + } + const server = { ...(serverConfig as Record) }; + if (server.env && typeof server.env === 'object') { + server.env = expandEnvVarsInRecord(server.env as Record, missingVars); + } + if (server.headers && typeof server.headers === 'object') { + server.headers = expandEnvVarsInRecord( + server.headers as Record, + missingVars + ); + } + result[serverName] = server; + } + return { expanded: result, missingVars }; +} + +/** + * Load MCP server config from a JSON file and expand environment variables. + */ +export async function loadMcpConfig( + mcpPath: string, + cwd: string +): Promise<{ servers: Record; serverNames: string[]; missingVars: string[] }> { + const fullPath = isAbsolute(mcpPath) ? mcpPath : resolve(cwd, mcpPath); + + let raw: string; + try { + raw = await readFile(fullPath, 'utf-8'); + } catch (err) { + const e = err as NodeJS.ErrnoException; + if (e.code === 'ENOENT') { + throw new Error(`MCP config file not found: ${mcpPath} (resolved to ${fullPath})`); + } + throw new Error(`Failed to read MCP config file: ${mcpPath} — ${e.message}`); + } + + let parsed: Record; + try { + parsed = JSON.parse(raw) as Record; + } catch (parseErr) { + const detail = (parseErr as SyntaxError).message; + throw new Error(`MCP config file is not valid JSON: ${mcpPath} — ${detail}`); + } + + if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { + throw new Error(`MCP config must be a JSON object (Record): ${mcpPath}`); + } + + const { expanded, missingVars } = expandEnvVars(parsed); + const serverNames = Object.keys(expanded); + return { servers: expanded, serverNames, missingVars }; +} + +// ─── SDK Hooks Building (absorbed from dag-executor) ─────────────────────── + +/** YAML hook matcher shape (matches @archon/workflows/schemas/dag-node WorkflowNodeHooks) */ +interface YAMLHookMatcher { + matcher?: string; + response: unknown; + timeout?: number; +} + +type SDKHooksMap = Partial< + Record< + string, + { + matcher?: string; + hooks: (( + input: unknown, + toolUseID: string | undefined, + options: { signal: AbortSignal } + ) => Promise)[]; + timeout?: number; + }[] + > +>; + +/** + * Convert declarative YAML hook definitions to SDK HookCallbackMatcher arrays. + */ +export function buildSDKHooksFromYAML( + nodeHooks: Record +): SDKHooksMap { + const sdkHooks: SDKHooksMap = {}; + + for (const [event, matchers] of Object.entries(nodeHooks)) { + if (!matchers) continue; + sdkHooks[event] = matchers.map(m => ({ + ...(m.matcher ? { matcher: m.matcher } : {}), + hooks: [async (): Promise => m.response], + ...(m.timeout ? { timeout: m.timeout } : {}), + })); + } + + if (Object.keys(sdkHooks).length === 0) { + getLog().warn( + { nodeHooksKeys: Object.keys(nodeHooks) }, + 'claude.hooks_build_produced_empty_map' + ); + } + + return sdkHooks; +} + +// ─── NodeConfig → SDK Options Translation ────────────────────────────────── + +/** + * Translate nodeConfig into Claude SDK-specific options. + * Called inside sendQuery when nodeConfig is present (workflow path). + * Returns user-facing warnings that the caller should yield as system chunks. + */ +async function applyNodeConfig( + options: Options, + nodeConfig: NodeConfig, + cwd: string +): Promise { + const warnings: string[] = []; + // allowed_tools → tools + if (nodeConfig.allowed_tools !== undefined) { + options.tools = nodeConfig.allowed_tools; + } + + // denied_tools → disallowedTools + if (nodeConfig.denied_tools !== undefined) { + options.disallowedTools = nodeConfig.denied_tools; + } + + // hooks → build SDK hooks + if (nodeConfig.hooks) { + const builtHooks = buildSDKHooksFromYAML( + nodeConfig.hooks as Record + ); + if (Object.keys(builtHooks).length > 0) { + // Merge with existing hooks (PostToolUse capture hook) + const existingHooks = options.hooks as SDKHooksMap | undefined; + for (const [event, matchers] of Object.entries(builtHooks)) { + if (!matchers) continue; + const existing = existingHooks?.[event] as HookCallbackMatcher[] | undefined; + if (existing) { + (options.hooks as Record)[event] = [ + ...(matchers as HookCallbackMatcher[]), + ...existing, + ]; + } else { + (options.hooks as Record)[event] = + matchers as HookCallbackMatcher[]; + } + } + } + } + + // mcp → load config and set mcpServers + allowedTools wildcards + if (nodeConfig.mcp) { + const mcpPath = nodeConfig.mcp; + const { servers, serverNames, missingVars } = await loadMcpConfig(mcpPath, cwd); + options.mcpServers = servers as Options['mcpServers']; + const mcpWildcards = serverNames.map(name => `mcp__${name}__*`); + options.allowedTools = [...(options.allowedTools ?? []), ...mcpWildcards]; + getLog().info({ serverNames, mcpPath }, 'claude.mcp_config_loaded'); + if (missingVars.length > 0) { + const uniqueVars = [...new Set(missingVars)]; + getLog().warn({ missingVars: uniqueVars }, 'claude.mcp_env_vars_missing'); + warnings.push( + `MCP config references undefined env vars: ${uniqueVars.join(', ')}. These will be empty strings — MCP servers may fail to authenticate.` + ); + } + // Haiku models don't support tool search (lazy loading for many tools) + if (options.model?.toLowerCase().includes('haiku')) { + getLog().warn({ model: options.model }, 'claude.mcp_haiku_tool_search_unsupported'); + warnings.push( + 'Using Haiku model with MCP servers — tool search (lazy loading for many tools) is not supported on Haiku. Consider using Sonnet or Opus.' + ); + } + } + + // skills → AgentDefinition wrapping + if (nodeConfig.skills) { + const skills = nodeConfig.skills; + const agentId = 'dag-node-skills'; + const agentTools = options.tools ? [...(options.tools as string[]), 'Skill'] : ['Skill']; + const agentDef: { + description: string; + prompt: string; + skills: string[]; + tools: string[]; + model?: string; + } = { + description: 'DAG node with skills', + prompt: `You have preloaded skills: ${skills.join(', ')}. Use them when relevant.`, + skills, + tools: agentTools, + }; + if (options.model) agentDef.model = options.model; + options.agents = { [agentId]: agentDef }; + options.agent = agentId; + if (!options.allowedTools?.includes('Skill')) { + options.allowedTools = [...(options.allowedTools ?? []), 'Skill']; + } + getLog().info({ skills, agentId }, 'claude.skills_agent_created'); + } + + // effort + if (nodeConfig.effort !== undefined) { + options.effort = nodeConfig.effort as Options['effort']; + } + + // thinking + if (nodeConfig.thinking !== undefined) { + options.thinking = nodeConfig.thinking as Options['thinking']; + } + + // sandbox + if (nodeConfig.sandbox !== undefined) { + options.sandbox = nodeConfig.sandbox as Options['sandbox']; + } + + // betas + if (nodeConfig.betas !== undefined) { + options.betas = nodeConfig.betas as Options['betas']; + } + + // output_format (from nodeConfig, overrides base outputFormat if present) + if (nodeConfig.output_format) { + options.outputFormat = { + type: 'json_schema', + schema: nodeConfig.output_format, + } as Options['outputFormat']; + } + + // maxBudgetUsd from nodeConfig + if (nodeConfig.maxBudgetUsd !== undefined) { + options.maxBudgetUsd = nodeConfig.maxBudgetUsd; + } + + // systemPrompt from nodeConfig + if (nodeConfig.systemPrompt !== undefined) { + options.systemPrompt = nodeConfig.systemPrompt; + } + + // fallbackModel from nodeConfig + if (nodeConfig.fallbackModel !== undefined) { + options.fallbackModel = nodeConfig.fallbackModel; + } + + return warnings; +} + +// ─── Claude Provider ─────────────────────────────────────────────────────── + +/** + * Claude AI agent provider. + * Implements IAgentProvider with full SDK integration. */ export class ClaudeProvider implements IAgentProvider { private readonly retryBaseDelayMs: number; constructor(options?: { retryBaseDelayMs?: number }) { - // Claude Code SDK silently rejects bypassPermissions when running as root (UID 0). - // Check once at construction time so the error surfaces early, not on first query. - // IS_SANDBOX=1 bypasses this check — the SDK itself honours this env var in sandboxed - // environments (Docker, VPS, CI) where running as root is expected. if (getProcessUid() === 0 && process.env.IS_SANDBOX !== '1') { throw new Error( 'Claude Code SDK does not support bypassPermissions when running as root (UID 0). ' + @@ -264,50 +494,40 @@ export class ClaudeProvider implements IAgentProvider { this.retryBaseDelayMs = options?.retryBaseDelayMs ?? RETRY_BASE_DELAY_MS; } + getCapabilities(): ProviderCapabilities { + return { + sessionResume: true, + mcp: true, + hooks: true, + skills: true, + toolRestrictions: true, + structuredOutput: true, + envInjection: true, + costControl: true, + effortControl: true, + thinkingControl: true, + fallbackModel: true, + sandbox: true, + }; + } + /** * Send a query to Claude and stream responses. * Includes retry logic for transient failures (up to 3 retries with exponential backoff). - * Enriches errors with stderr context and classification. */ + // TODO(#1135): Pre-spawn env-leak gate was removed during provider extraction. + // Caller-side enforcement (orchestrator, dag-executor) is tracked in #1135. + // Providers must NOT implement security gates — the platform guarantees safety + // before a provider runs. async *sendQuery( prompt: string, cwd: string, resumeSessionId?: string, - requestOptions?: AgentRequestOptions + requestOptions?: SendQueryOptions ): AsyncGenerator { - // Pre-spawn: check for env key leak if codebase is not explicitly consented. - // Use prefix lookup so worktree paths (e.g. .../worktrees/feature-branch) still - // match the registered source cwd (e.g. .../source). - const codebase = - (await codebaseDb.findCodebaseByDefaultCwd(cwd)) ?? - (await codebaseDb.findCodebaseByPathPrefix(cwd)); - if (codebase && !codebase.allow_env_keys) { - // Fail-closed: a config load failure (corrupt YAML, permission denied) - // must NOT silently bypass the gate. Catch, log, and treat as - // `allowTargetRepoKeys = false` so the scanner still runs. - let allowTargetRepoKeys = false; - try { - const merged = await loadConfig(cwd); - allowTargetRepoKeys = merged.allowTargetRepoKeys; - } catch (configErr) { - getLog().warn({ err: configErr, cwd }, 'env_leak_gate.config_load_failed_gate_enforced'); - } - if (!allowTargetRepoKeys) { - const report = scanPathForSensitiveKeys(cwd); - if (report.findings.length > 0) { - throw new EnvLeakError(report, 'spawn-existing'); - } - } - } - - // Note: If subprocess crashes mid-stream after yielding chunks, those chunks - // are already consumed by the caller. Retry starts a fresh subprocess, so the - // caller may receive partial output from the failed attempt followed by full - // output from the retry. This is a known limitation of async generator retries. let lastError: Error | undefined; for (let attempt = 0; attempt <= MAX_SUBPROCESS_RETRIES; attempt++) { - // Check if already aborted before starting attempt if (requestOptions?.abortSignal?.aborted) { throw new Error('Query aborted'); } @@ -315,7 +535,6 @@ export class ClaudeProvider implements IAgentProvider { const stderrLines: string[] = []; const toolResultQueue: { toolName: string; toolOutput: string; toolCallId?: string }[] = []; - // Create per-attempt abort controller and wire to caller's signal const controller = new AbortController(); if (requestOptions?.abortSignal) { requestOptions.abortSignal.addEventListener( @@ -327,69 +546,38 @@ export class ClaudeProvider implements IAgentProvider { ); } + // Parse assistantConfig for typed defaults + const assistantDefaults = parseClaudeConfig(requestOptions?.assistantConfig ?? {}); + const options: Options = { cwd, pathToClaudeCodeExecutable: cliPath, env: requestOptions?.env ? { ...buildSubprocessEnv(), ...requestOptions.env } : buildSubprocessEnv(), - model: requestOptions?.model, + model: requestOptions?.model ?? assistantDefaults.model, abortController: controller, - ...(requestOptions?.tools !== undefined ? { tools: requestOptions.tools } : {}), - ...(requestOptions?.disallowedTools !== undefined - ? { disallowedTools: requestOptions.disallowedTools } - : {}), - // Pass outputFormat for json_schema structured output (Claude Agent SDK v0.2.45+) ...(requestOptions?.outputFormat !== undefined ? { outputFormat: requestOptions.outputFormat } : {}), - // Note: hooks are merged below (line with `hooks: { ... }`) — not spread here - // Pass MCP servers for per-node MCP support (Claude Agent SDK v0.2.74+) - ...(requestOptions?.mcpServers !== undefined - ? { mcpServers: requestOptions.mcpServers } + ...(requestOptions?.maxBudgetUsd !== undefined + ? { maxBudgetUsd: requestOptions.maxBudgetUsd } : {}), - // Pass allowedTools for MCP tool wildcards (e.g., 'mcp__github__*') - ...(requestOptions?.allowedTools !== undefined - ? { allowedTools: requestOptions.allowedTools } + ...(requestOptions?.fallbackModel !== undefined + ? { fallbackModel: requestOptions.fallbackModel } : {}), - // Pass agents/agent for per-node skill scoping via AgentDefinition wrapping - ...(requestOptions?.agents !== undefined ? { agents: requestOptions.agents } : {}), - ...(requestOptions?.agent !== undefined ? { agent: requestOptions.agent } : {}), - // Skip writing session transcripts to ~/.claude/projects/ — Archon manages its own - // session persistence. persistSession: false reduces disk I/O and keeps the session - // directory clean. Claude Agent SDK v0.2.74+. ...(requestOptions?.persistSession !== undefined ? { persistSession: requestOptions.persistSession } : {}), - // When forkSession is true, the SDK copies the prior session's history into a new - // session file, leaving the original untouched — safe to use on retries. ...(requestOptions?.forkSession !== undefined ? { forkSession: requestOptions.forkSession } : {}), - // Forward Claude-only SDK options (effort, thinking, maxBudgetUsd, fallbackModel, betas, sandbox) - ...(requestOptions?.effort !== undefined ? { effort: requestOptions.effort } : {}), - ...(requestOptions?.thinking !== undefined ? { thinking: requestOptions.thinking } : {}), - ...(requestOptions?.maxBudgetUsd !== undefined - ? { maxBudgetUsd: requestOptions.maxBudgetUsd } - : {}), - ...(requestOptions?.fallbackModel !== undefined - ? { fallbackModel: requestOptions.fallbackModel } - : {}), - // betas: string[] from user config; SDK expects SdkBeta[] (string literal union). - // User-provided values are validated upstream — cast is safe. - ...(requestOptions?.betas !== undefined - ? { betas: requestOptions.betas as Options['betas'] } - : {}), - ...(requestOptions?.sandbox !== undefined ? { sandbox: requestOptions.sandbox } : {}), permissionMode: 'bypassPermissions', allowDangerouslySkipPermissions: true, systemPrompt: requestOptions?.systemPrompt ?? { type: 'preset', preset: 'claude_code' }, - settingSources: requestOptions?.settingSources ?? ['project'], - // Merge user-provided hooks with our PostToolUse capture hook + settingSources: assistantDefaults.settingSources ?? ['project'], hooks: { - ...(requestOptions?.hooks ?? {}), PostToolUse: [ - ...((requestOptions?.hooks?.PostToolUse ?? []) as HookCallbackMatcher[]), { hooks: [ (async (input: Record): Promise<{ continue: true }> => { @@ -400,7 +588,6 @@ export class ClaudeProvider implements IAgentProvider { typeof toolResponse === 'string' ? toolResponse : JSON.stringify(toolResponse ?? ''); - // Truncate large outputs (e.g., file reads) to prevent DB bloat const maxLen = 10_000; toolResultQueue.push({ toolName, @@ -412,16 +599,10 @@ export class ClaudeProvider implements IAgentProvider { ], }, ], - // Without this, errored / interrupted / permission-denied tools never produce - // a paired tool_result chunk and the corresponding UI card spins forever. - // SDK type: PostToolUseFailureHookInput { tool_name, tool_use_id, error, is_interrupt? } PostToolUseFailure: [ - ...((requestOptions?.hooks?.PostToolUseFailure ?? []) as HookCallbackMatcher[]), { hooks: [ (async (input: Record): Promise<{ continue: true }> => { - // Always return { continue: true } even on internal errors so a - // malformed SDK payload can never crash the hook dispatch silently. try { const toolName = (input as { tool_name?: string }).tool_name ?? 'unknown'; const toolUseId = (input as { tool_use_id?: string }).tool_use_id; @@ -449,9 +630,6 @@ export class ClaudeProvider implements IAgentProvider { stderr: (data: string) => { const output = data.trim(); if (!output) return; - - // Always capture stderr for diagnostics — previous filtering discarded - // useful SDK startup output, leaving stderrContext empty on crashes. stderrLines.push(output); const isError = @@ -473,6 +651,13 @@ export class ClaudeProvider implements IAgentProvider { }, }; + // Apply nodeConfig if present (workflow path) — translates YAML to SDK options + const nodeConfigWarnings: string[] = []; + if (requestOptions?.nodeConfig) { + const warns = await applyNodeConfig(options, requestOptions.nodeConfig, cwd); + nodeConfigWarnings.push(...warns); + } + if (resumeSessionId) { options.resume = resumeSessionId; getLog().debug( @@ -484,6 +669,11 @@ export class ClaudeProvider implements IAgentProvider { } try { + // Yield nodeConfig warnings before starting the query + for (const warning of nodeConfigWarnings) { + yield { type: 'system' as const, content: `⚠️ ${warning}` }; + } + const rawEvents = query({ prompt, options }); const timeoutMs = getFirstEventTimeoutMs(); const diagnostics = buildFirstEventHangDiagnostics( @@ -492,7 +682,6 @@ export class ClaudeProvider implements IAgentProvider { ); const events = withFirstMessageTimeout(rawEvents, controller, timeoutMs, diagnostics); for await (const msg of events) { - // Drain tool results captured by PostToolUse hook before processing the next message while (toolResultQueue.length > 0) { const tr = toolResultQueue.shift(); if (tr) { @@ -522,7 +711,6 @@ export class ClaudeProvider implements IAgentProvider { } } } else if (msg.type === 'system') { - // Check MCP server connection status from system/init const sysMsg = msg as { subtype?: string; mcp_servers?: { name: string; status: string }[]; @@ -581,10 +769,6 @@ export class ClaudeProvider implements IAgentProvider { }; } } - // Drain any remaining tool results from the hook queue. - // Must mirror the in-loop drain — PostToolUseFailure results commonly land - // here (they fire just before the SDK's terminal `result` message), so - // dropping toolCallId here would defeat the stable-pairing fix. while (toolResultQueue.length > 0) { const tr = toolResultQueue.shift(); if (tr) { @@ -596,11 +780,10 @@ export class ClaudeProvider implements IAgentProvider { }; } } - return; // Success - exit retry loop + return; } catch (error) { const err = error as Error; - // Don't retry aborted queries if (controller.signal.aborted) { throw new Error('Query aborted'); } @@ -613,7 +796,6 @@ export class ClaudeProvider implements IAgentProvider { 'query_error' ); - // Don't retry auth errors - they won't resolve if (errorClass === 'auth') { const enrichedError = new Error( `Claude Code auth error: ${err.message}${stderrContext ? ` (${stderrContext})` : ''}` @@ -622,7 +804,6 @@ export class ClaudeProvider implements IAgentProvider { throw enrichedError; } - // Retry transient failures (rate limit, crash) if ( attempt < MAX_SUBPROCESS_RETRIES && (errorClass === 'rate_limit' || errorClass === 'crash') @@ -634,7 +815,6 @@ export class ClaudeProvider implements IAgentProvider { continue; } - // Final failure - enrich and throw const enrichedMessage = stderrContext ? `Claude Code ${errorClass}: ${err.message} (stderr: ${stderrContext})` : `Claude Code ${errorClass}: ${err.message}`; @@ -644,13 +824,9 @@ export class ClaudeProvider implements IAgentProvider { } } - // Should not reach here, but handle defensively throw lastError ?? new Error('Claude Code query failed after retries'); } - /** - * Get the assistant type identifier - */ getType(): string { return 'claude'; } diff --git a/packages/core/src/providers/codex-binary-guard.test.ts b/packages/providers/src/codex/binary-guard.test.ts similarity index 77% rename from packages/core/src/providers/codex-binary-guard.test.ts rename to packages/providers/src/codex/binary-guard.test.ts index 6a0047b948..891262cf47 100644 --- a/packages/core/src/providers/codex-binary-guard.test.ts +++ b/packages/providers/src/codex/binary-guard.test.ts @@ -2,7 +2,7 @@ * Tests for Codex binary resolution in compiled binary mode. * * Separate file because mock.module('@archon/paths') with BUNDLED_IS_BINARY=true - * conflicts with codex.test.ts which mocks it without BUNDLED_IS_BINARY. + * conflicts with provider.test.ts which mocks it without BUNDLED_IS_BINARY. * Must run in its own bun test invocation (see package.json test script). */ import { describe, test, expect, mock, beforeEach } from 'bun:test'; @@ -45,37 +45,16 @@ mock.module('@openai/codex-sdk', () => ({ Codex: MockCodex, })); -// Mock resolver — controls binary resolution behavior per test +// Mock resolver -- controls binary resolution behavior per test const mockResolveCodexBinaryPath = mock( (_configPath?: string): Promise => Promise.resolve('/tmp/test-archon/vendor/codex/codex') ); -mock.module('../utils/codex-binary-resolver', () => ({ +mock.module('./binary-resolver', () => ({ resolveCodexBinaryPath: mockResolveCodexBinaryPath, })); -// Config mock with configurable return value -const mockLoadConfig = mock(() => - Promise.resolve({ - allowTargetRepoKeys: false, - assistants: { codex: {} }, - }) -); - -// Mock db and config dependencies to prevent real DB access -mock.module('../db/codebases', () => ({ - findCodebaseByDefaultCwd: mock(() => Promise.resolve(null)), - findCodebaseByPathPrefix: mock(() => Promise.resolve(null)), -})); -mock.module('../config/config-loader', () => ({ - loadConfig: mockLoadConfig, -})); -mock.module('../utils/env-leak-scanner', () => ({ - scanPathForSensitiveKeys: mock(() => ({ findings: [] })), - EnvLeakError: class extends Error {}, -})); - -import { CodexProvider, resetCodexSingleton } from './codex'; +import { CodexProvider, resetCodexSingleton } from './provider'; describe('CodexProvider binary mode resolution', () => { beforeEach(() => { @@ -83,19 +62,12 @@ describe('CodexProvider binary mode resolution', () => { MockCodex.mockClear(); mockStartThread.mockClear(); mockResolveCodexBinaryPath.mockClear(); - mockLoadConfig.mockClear(); capturedOptions = undefined; // Restore default mock implementations mockResolveCodexBinaryPath.mockImplementation(() => Promise.resolve('/tmp/test-archon/vendor/codex/codex') ); - mockLoadConfig.mockImplementation(() => - Promise.resolve({ - allowTargetRepoKeys: false, - assistants: { codex: {} }, - }) - ); }); test('passes resolved binary path to Codex constructor via codexPathOverride', async () => { @@ -161,14 +133,11 @@ describe('CodexProvider binary mode resolution', () => { expect(capturedOptions?.codexPathOverride).toBeUndefined(); }); - test('passes config codexBinaryPath to resolver', async () => { - mockLoadConfig.mockResolvedValueOnce({ - allowTargetRepoKeys: false, - assistants: { codex: { codexBinaryPath: '/user/custom/codex' } }, - }); - + test('passes config codexBinaryPath to resolver via assistantConfig', async () => { const client = new CodexProvider(); - const generator = client.sendQuery('test prompt', '/tmp/test'); + const generator = client.sendQuery('test prompt', '/tmp/test', undefined, { + assistantConfig: { codexBinaryPath: '/user/custom/codex' }, + }); for await (const _chunk of generator) { // drain diff --git a/packages/core/src/utils/codex-binary-resolver-dev.test.ts b/packages/providers/src/codex/binary-resolver-dev.test.ts similarity index 92% rename from packages/core/src/utils/codex-binary-resolver-dev.test.ts rename to packages/providers/src/codex/binary-resolver-dev.test.ts index ac8761ee02..9635d8d59c 100644 --- a/packages/core/src/utils/codex-binary-resolver-dev.test.ts +++ b/packages/providers/src/codex/binary-resolver-dev.test.ts @@ -11,7 +11,7 @@ mock.module('@archon/paths', () => ({ getArchonHome: mock(() => '/tmp/test-archon-home'), })); -import { resolveCodexBinaryPath } from './codex-binary-resolver'; +import { resolveCodexBinaryPath } from './binary-resolver'; describe('resolveCodexBinaryPath (dev mode)', () => { test('returns undefined when BUNDLED_IS_BINARY is false', async () => { diff --git a/packages/core/src/utils/codex-binary-resolver.test.ts b/packages/providers/src/codex/binary-resolver.test.ts similarity index 98% rename from packages/core/src/utils/codex-binary-resolver.test.ts rename to packages/providers/src/codex/binary-resolver.test.ts index 3425a6fa17..1df4e7c6f6 100644 --- a/packages/core/src/utils/codex-binary-resolver.test.ts +++ b/packages/providers/src/codex/binary-resolver.test.ts @@ -16,7 +16,7 @@ mock.module('@archon/paths', () => ({ getArchonHome: mock(() => '/tmp/test-archon-home'), })); -import * as resolver from './codex-binary-resolver'; +import * as resolver from './binary-resolver'; describe('resolveCodexBinaryPath (binary mode)', () => { const originalEnv = process.env.CODEX_BIN_PATH; diff --git a/packages/core/src/utils/codex-binary-resolver.ts b/packages/providers/src/codex/binary-resolver.ts similarity index 96% rename from packages/core/src/utils/codex-binary-resolver.ts rename to packages/providers/src/codex/binary-resolver.ts index e927918c95..a1e0f01a5b 100644 --- a/packages/core/src/utils/codex-binary-resolver.ts +++ b/packages/providers/src/codex/binary-resolver.ts @@ -5,9 +5,6 @@ * native Codex CLI binary, which breaks in compiled binaries where * `import.meta.url` is frozen to the build host's path. * - * This module resolves an alternative path and passes it to the SDK's - * `codexPathOverride` constructor option, bypassing the broken resolution. - * * Resolution order: * 1. `CODEX_BIN_PATH` environment variable * 2. `assistants.codex.codexBinaryPath` in config diff --git a/packages/providers/src/codex/config.ts b/packages/providers/src/codex/config.ts new file mode 100644 index 0000000000..f8d6f2d7e6 --- /dev/null +++ b/packages/providers/src/codex/config.ts @@ -0,0 +1,46 @@ +/** + * Typed config parsing for Codex provider defaults. + * Validates and narrows the opaque assistantConfig to typed fields. + */ +import type { CodexProviderDefaults } from '../types'; + +// Re-export so consumers can import the type from either location +export type { CodexProviderDefaults } from '../types'; + +/** + * Parse raw assistantConfig into typed Codex defaults. + * Defensive: invalid fields are silently dropped. + */ +export function parseCodexConfig(raw: Record): CodexProviderDefaults { + const result: CodexProviderDefaults = {}; + + if (typeof raw.model === 'string') { + result.model = raw.model; + } + + const validEfforts = ['minimal', 'low', 'medium', 'high', 'xhigh']; + if ( + typeof raw.modelReasoningEffort === 'string' && + validEfforts.includes(raw.modelReasoningEffort) + ) { + result.modelReasoningEffort = + raw.modelReasoningEffort as CodexProviderDefaults['modelReasoningEffort']; + } + + const validSearchModes = ['disabled', 'cached', 'live']; + if (typeof raw.webSearchMode === 'string' && validSearchModes.includes(raw.webSearchMode)) { + result.webSearchMode = raw.webSearchMode as CodexProviderDefaults['webSearchMode']; + } + + if (Array.isArray(raw.additionalDirectories)) { + result.additionalDirectories = raw.additionalDirectories.filter( + (d): d is string => typeof d === 'string' + ); + } + + if (typeof raw.codexBinaryPath === 'string') { + result.codexBinaryPath = raw.codexBinaryPath; + } + + return result; +} diff --git a/packages/providers/src/codex/index.ts b/packages/providers/src/codex/index.ts new file mode 100644 index 0000000000..71302f6884 --- /dev/null +++ b/packages/providers/src/codex/index.ts @@ -0,0 +1,3 @@ +export { CodexProvider, resetCodexSingleton } from './provider'; +export { parseCodexConfig, type CodexProviderDefaults } from './config'; +export { resolveCodexBinaryPath, fileExists } from './binary-resolver'; diff --git a/packages/core/src/providers/codex.test.ts b/packages/providers/src/codex/provider.test.ts similarity index 81% rename from packages/core/src/providers/codex.test.ts rename to packages/providers/src/codex/provider.test.ts index 16bcfa76c6..1a5c3c926f 100644 --- a/packages/core/src/providers/codex.test.ts +++ b/packages/providers/src/codex/provider.test.ts @@ -1,4 +1,4 @@ -import { describe, test, expect, mock, beforeEach, afterEach, spyOn } from 'bun:test'; +import { describe, test, expect, mock, beforeEach } from 'bun:test'; import { createMockLogger } from '../test/mocks/logger'; const mockLogger = createMockLogger(); @@ -39,9 +39,7 @@ mock.module('@openai/codex-sdk', () => ({ Codex: MockCodex, })); -import { CodexProvider } from './codex'; -import * as codebaseDb from '../db/codebases'; -import * as envLeakScanner from '../utils/env-leak-scanner'; +import { CodexProvider } from './provider'; describe('CodexProvider', () => { let client: CodexProvider; @@ -67,6 +65,26 @@ describe('CodexProvider', () => { }); }); + describe('getCapabilities', () => { + test('returns limited capability set for Codex provider', () => { + const caps = client.getCapabilities(); + expect(caps).toEqual({ + sessionResume: true, + mcp: false, + hooks: false, + skills: false, + toolRestrictions: false, + structuredOutput: true, + envInjection: false, + costControl: false, + effortControl: false, + thinkingControl: false, + fallbackModel: false, + sandbox: false, + }); + }); + }); + describe('sendQuery', () => { test('yields text events from agent_message items', async () => { mockRunStreamed.mockResolvedValue({ @@ -114,8 +132,6 @@ describe('CodexProvider', () => { chunks.push(chunk); } - // Codex item.completed fires once the command is fully done, so we emit - // start + result back-to-back to close the UI tool card immediately. expect(chunks[0]).toEqual({ type: 'tool', toolName: 'npm test' }); expect(chunks[1]).toEqual({ type: 'tool_result', @@ -184,10 +200,10 @@ describe('CodexProvider', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔍 Searching: codex sdk' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50D} Searching: codex sdk' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔍 Searching: codex sdk', + toolName: '\u{1F50D} Searching: codex sdk', toolOutput: '', }); }); @@ -216,7 +232,7 @@ describe('CodexProvider', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '📋 Tasks:\n✅ Scan repo\n⬜ Add tests', + content: '\u{1F4CB} Tasks:\n\u2705 Scan repo\n\u2B1C Add tests', }); expect(chunks).toHaveLength(2); }); @@ -253,11 +269,11 @@ describe('CodexProvider', () => { expect(chunks).toHaveLength(3); // todoV1 + todoV2 + result expect(chunks[0]).toEqual({ type: 'system', - content: '📋 Tasks:\n⬜ Scan repo\n⬜ Add tests', + content: '\u{1F4CB} Tasks:\n\u2B1C Scan repo\n\u2B1C Add tests', }); expect(chunks[1]).toEqual({ type: 'system', - content: '📋 Tasks:\n✅ Scan repo\n⬜ Add tests', + content: '\u{1F4CB} Tasks:\n\u2705 Scan repo\n\u2B1C Add tests', }); }); @@ -287,7 +303,7 @@ describe('CodexProvider', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '✅ File changes:\n➕ src/new.ts\n📝 src/app.ts\n➖ src/old.ts', + content: '\u2705 File changes:\n\u2795 src/new.ts\n\u{1F4DD} src/app.ts\n\u2796 src/old.ts', }); }); @@ -314,7 +330,7 @@ describe('CodexProvider', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '❌ File changes:\n📝 src/locked.ts\nPermission denied', + content: '\u274C File changes:\n\u{1F4DD} src/locked.ts\nPermission denied', }); }); @@ -340,7 +356,7 @@ describe('CodexProvider', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '❌ File change failed: Disk full', + content: '\u274C File change failed: Disk full', }); expect(mockLogger.warn).toHaveBeenCalledWith( expect.objectContaining({ status: 'failed' }), @@ -366,7 +382,7 @@ describe('CodexProvider', () => { expect(chunks[0]).toEqual({ type: 'system', - content: '❌ File change failed', + content: '\u274C File change failed', }); }); @@ -397,18 +413,18 @@ describe('CodexProvider', () => { } // First mcp call (in_progress on item.completed): start + empty result - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: fs/readFile' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: fs/readFile', + toolName: '\u{1F50C} MCP: fs/readFile', toolOutput: '', }); // Second mcp call (failed): start + error result so the UI card closes - expect(chunks[2]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' }); + expect(chunks[2]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: fs/readFile' }); expect(chunks[3]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: fs/readFile', - toolOutput: '❌ Error: Permission denied', + toolName: '\u{1F50C} MCP: fs/readFile', + toolOutput: '\u274C Error: Permission denied', }); expect(mockLogger.warn).toHaveBeenCalledWith( expect.objectContaining({ server: 'fs', tool: 'readFile' }), @@ -440,19 +456,22 @@ describe('CodexProvider', () => { chunks.push(chunk); } - // Each item now emits start + empty result so the UI cards always close. - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: readFile' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: readFile' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: readFile', + toolName: '\u{1F50C} MCP: readFile', + toolOutput: '', + }); + expect(chunks[2]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: fs' }); + expect(chunks[3]).toEqual({ + type: 'tool_result', + toolName: '\u{1F50C} MCP: fs', toolOutput: '', }); - expect(chunks[2]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs' }); - expect(chunks[3]).toEqual({ type: 'tool_result', toolName: '🔌 MCP: fs', toolOutput: '' }); - expect(chunks[4]).toEqual({ type: 'tool', toolName: '🔌 MCP: MCP tool' }); + expect(chunks[4]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: MCP tool' }); expect(chunks[5]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: MCP tool', + toolName: '\u{1F50C} MCP: MCP tool', toolOutput: '', }); }); @@ -473,11 +492,11 @@ describe('CodexProvider', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: db/query' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: db/query' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: db/query', - toolOutput: '❌ Error: MCP tool failed', + toolName: '\u{1F50C} MCP: db/query', + toolOutput: '\u274C Error: MCP tool failed', }); }); @@ -503,12 +522,11 @@ describe('CodexProvider', () => { chunks.push(chunk); } - // Completed MCP calls now emit tool + tool_result so the UI card closes. expect(chunks).toHaveLength(3); - expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' }); + expect(chunks[0]).toEqual({ type: 'tool', toolName: '\u{1F50C} MCP: fs/readFile' }); expect(chunks[1]).toEqual({ type: 'tool_result', - toolName: '🔌 MCP: fs/readFile', + toolName: '\u{1F50C} MCP: fs/readFile', toolOutput: JSON.stringify([{ type: 'text', text: 'file contents' }]), }); expect(chunks[2]).toEqual({ @@ -525,7 +543,6 @@ describe('CodexProvider', () => { })(), }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test prompt', '/my/workspace')) { // consume } @@ -548,7 +565,6 @@ describe('CodexProvider', () => { })(), }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test prompt', '/workspace', 'existing-thread')) { // consume } @@ -585,7 +601,6 @@ describe('CodexProvider', () => { } expect(mockResumeThread).toHaveBeenCalled(); - // Verify fallback startThread is called with correct config options expect(mockStartThread).toHaveBeenCalledWith( expect.objectContaining({ workingDirectory: '/workspace', @@ -595,7 +610,6 @@ describe('CodexProvider', () => { approvalPolicy: 'never', }) ); - // Verify error was logged expect(mockLogger.error).toHaveBeenCalledWith( { err: resumeError, sessionId: 'bad-thread-id' }, 'resume_thread_failed' @@ -612,19 +626,20 @@ describe('CodexProvider', () => { }); }); - test('passes model and codex options to thread options', async () => { + test('passes model and codex options via assistantConfig to thread options', async () => { mockRunStreamed.mockResolvedValue({ events: (async function* () { yield { type: 'turn.completed', usage: defaultUsage }; })(), }); - // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of client.sendQuery('test prompt', '/workspace', undefined, { model: 'gpt-5.2-codex', - modelReasoningEffort: 'medium', - webSearchMode: 'live', - additionalDirectories: ['/other/repo'], + assistantConfig: { + modelReasoningEffort: 'medium', + webSearchMode: 'live', + additionalDirectories: ['/other/repo'], + }, })) { // consume } @@ -740,13 +755,11 @@ describe('CodexProvider', () => { chunks.push(chunk); } - // Verify item.started logging with correct format expect(mockLogger.debug).toHaveBeenCalledWith( { eventType: 'item.started', itemType: 'command_execution', itemId: 'item-1' }, 'item_started' ); - // Verify item.completed logging includes command context expect(mockLogger.debug).toHaveBeenCalledWith( { eventType: 'item.completed', @@ -771,7 +784,7 @@ describe('CodexProvider', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'system', content: '⚠️ Something went wrong' }); + expect(chunks[0]).toEqual({ type: 'system', content: '\u26A0\uFE0F Something went wrong' }); expect(mockLogger.error).toHaveBeenCalledWith( { message: 'Something went wrong' }, 'stream_error' @@ -818,7 +831,10 @@ describe('CodexProvider', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'system', content: '❌ Turn failed: Rate limit exceeded' }); + expect(chunks[0]).toEqual({ + type: 'system', + content: '\u274C Turn failed: Rate limit exceeded', + }); expect(mockLogger.error).toHaveBeenCalledWith( { errorMessage: 'Rate limit exceeded' }, 'turn_failed' @@ -837,7 +853,10 @@ describe('CodexProvider', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'system', content: '❌ Turn failed: Unknown error' }); + expect(chunks[0]).toEqual({ + type: 'system', + content: '\u274C Turn failed: Unknown error', + }); expect(mockLogger.error).toHaveBeenCalledWith( { errorMessage: 'Unknown error' }, 'turn_failed' @@ -1001,109 +1020,109 @@ describe('CodexProvider', () => { expect(mockRunStreamed).toHaveBeenCalledTimes(1); }); }); - }); - - describe('pre-spawn env leak gate', () => { - let spyFindByDefaultCwd: ReturnType; - let spyFindByPathPrefix: ReturnType; - let spyScan: ReturnType; - beforeEach(() => { - // Restore a working runStreamed default so retry-test bleed doesn't break gate tests - mockRunStreamed.mockResolvedValue({ - events: (async function* () { - yield { type: 'turn.completed', usage: defaultUsage }; - })(), - }); - spyFindByDefaultCwd = spyOn(codebaseDb, 'findCodebaseByDefaultCwd').mockResolvedValue(null); - spyFindByPathPrefix = spyOn(codebaseDb, 'findCodebaseByPathPrefix').mockResolvedValue(null); - spyScan = spyOn(envLeakScanner, 'scanPathForSensitiveKeys').mockReturnValue({ - path: '/workspace', - findings: [], - }); - }); + describe('structured output normalization', () => { + test('populates structuredOutput on result when outputFormat is set and text is valid JSON', async () => { + const jsonPayload = { status: 'ok', count: 42 }; + mockRunStreamed.mockResolvedValueOnce({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', id: 'msg-1', text: JSON.stringify(jsonPayload) }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); - afterEach(() => { - spyFindByDefaultCwd.mockRestore(); - spyFindByPathPrefix.mockRestore(); - spyScan.mockRestore(); - }); + const chunks = []; + for await (const chunk of client.sendQuery('test', '/tmp', undefined, { + outputFormat: { type: 'json_schema', schema: { type: 'object' } }, + })) { + chunks.push(chunk); + } - test('throws EnvLeakError when .env contains sensitive keys and registered codebase has no consent', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: false, - default_cwd: '/workspace', - }); - spyScan.mockReturnValueOnce({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], + const resultChunk = chunks.find(c => c.type === 'result'); + expect(resultChunk).toBeDefined(); + expect(resultChunk!.type === 'result' && resultChunk!.structuredOutput).toEqual( + jsonPayload + ); }); - const consumeGenerator = async (): Promise => { - for await (const _ of client.sendQuery('test', '/workspace')) { - // consume + test('yields system warning when outputFormat is set but text is not valid JSON', async () => { + mockRunStreamed.mockResolvedValueOnce({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', id: 'msg-1', text: 'not json at all' }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/tmp', undefined, { + outputFormat: { type: 'json_schema', schema: { type: 'object' } }, + })) { + chunks.push(chunk); } - }; - await expect(consumeGenerator()).rejects.toThrow('Cannot run workflow'); - }); + const systemChunk = chunks.find(c => c.type === 'system'); + expect(systemChunk).toBeDefined(); + expect(systemChunk!.type === 'system' && systemChunk!.content).toContain( + 'Structured output requested but Codex returned non-JSON' + ); - test('skips scan entirely when cwd is not a registered codebase', async () => { - // Both lookups return null (default from beforeEach). Pre-spawn safety net - // is only for registered codebases; unregistered paths go through registerRepoAtPath. - spyScan.mockReturnValue({ - path: '/workspace', - findings: [{ file: '.env', keys: ['ANTHROPIC_API_KEY'] }], + const resultChunk = chunks.find(c => c.type === 'result'); + expect(resultChunk).toBeDefined(); + expect(resultChunk!.type === 'result' && resultChunk!.structuredOutput).toBeUndefined(); }); - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } + test('does not populate structuredOutput when outputFormat is not set', async () => { + mockRunStreamed.mockResolvedValueOnce({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', id: 'msg-1', text: '{"valid":"json"}' }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); - expect(spyScan).not.toHaveBeenCalled(); - }); + const chunks = []; + for await (const chunk of client.sendQuery('test', '/tmp')) { + chunks.push(chunk); + } - test('skips scan when codebase has allow_env_keys: true', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: true, - default_cwd: '/workspace', + const resultChunk = chunks.find(c => c.type === 'result'); + expect(resultChunk).toBeDefined(); + expect(resultChunk!.type === 'result' && resultChunk!.structuredOutput).toBeUndefined(); }); - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } - - expect(spyScan).not.toHaveBeenCalled(); - }); - - test('proceeds without scanning when cwd has no registered codebase', async () => { - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace')) { - chunks.push(chunk); - } + test('handles nodeConfig.output_format path', async () => { + const jsonPayload = { key: 'value' }; + mockRunStreamed.mockResolvedValueOnce({ + events: (async function* () { + yield { + type: 'item.completed', + item: { type: 'agent_message', id: 'msg-1', text: JSON.stringify(jsonPayload) }, + }; + yield { type: 'turn.completed', usage: defaultUsage }; + })(), + }); - expect(spyScan).not.toHaveBeenCalled(); - }); + const chunks = []; + for await (const chunk of client.sendQuery('test', '/tmp', undefined, { + nodeConfig: { output_format: { type: 'object' } }, + })) { + chunks.push(chunk); + } - test('uses prefix lookup for worktree paths when exact match returns null', async () => { - spyFindByDefaultCwd.mockResolvedValueOnce(null); - spyFindByPathPrefix.mockResolvedValueOnce({ - id: 'codebase-1', - allow_env_keys: true, - default_cwd: '/workspace/source', + const resultChunk = chunks.find(c => c.type === 'result'); + expect(resultChunk).toBeDefined(); + expect(resultChunk!.type === 'result' && resultChunk!.structuredOutput).toEqual( + jsonPayload + ); }); - - const chunks = []; - for await (const chunk of client.sendQuery('test', '/workspace/worktrees/feature')) { - chunks.push(chunk); - } - - expect(spyFindByPathPrefix).toHaveBeenCalledWith('/workspace/worktrees/feature'); - expect(spyScan).not.toHaveBeenCalled(); }); }); }); diff --git a/packages/core/src/providers/codex.ts b/packages/providers/src/codex/provider.ts similarity index 68% rename from packages/core/src/providers/codex.ts rename to packages/providers/src/codex/provider.ts index 387d959ce5..996ca33ff6 100644 --- a/packages/core/src/providers/codex.ts +++ b/packages/providers/src/codex/provider.ts @@ -1,9 +1,6 @@ /** * Codex SDK wrapper * Provides async generator interface for streaming Codex responses - * - * With Bun runtime, we can directly import ESM packages without the - * dynamic import workaround that was needed for CommonJS/Node.js. */ import { Codex, @@ -11,17 +8,16 @@ import { type TurnOptions, type TurnCompletedEvent, } from '@openai/codex-sdk'; -import { - type AgentRequestOptions, - type IAgentProvider, - type MessageChunk, - type TokenUsage, +import type { + IAgentProvider, + SendQueryOptions, + MessageChunk, + TokenUsage, + ProviderCapabilities, } from '../types'; +import { parseCodexConfig } from './config'; +import { resolveCodexBinaryPath } from './binary-resolver'; import { createLogger } from '@archon/paths'; -import { scanPathForSensitiveKeys, EnvLeakError } from '../utils/env-leak-scanner'; -import * as codebaseDb from '../db/codebases'; -import { loadConfig } from '../config/config-loader'; -import { resolveCodexBinaryPath } from '../utils/codex-binary-resolver'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ let cachedLog: ReturnType | undefined; @@ -42,13 +38,10 @@ export function resetCodexSingleton(): void { /** * Get or create Codex SDK instance. - * Async because in compiled binary mode, binary path resolution is async. - * Once initialized, the binary path is fixed for the process lifetime. */ async function getCodex(configCodexBinaryPath?: string): Promise { if (codexInstance) return codexInstance; - // Prevent concurrent initialization race if (!codexInitPromise) { codexInitPromise = (async (): Promise => { const codexPathOverride = await resolveCodexBinaryPath(configCodexBinaryPath); @@ -56,7 +49,6 @@ async function getCodex(configCodexBinaryPath?: string): Promise { codexInstance = instance; return instance; })().catch(err => { - // Clear promise so next call can retry (e.g. after user installs Codex) codexInitPromise = null; throw err; }); @@ -66,19 +58,23 @@ async function getCodex(configCodexBinaryPath?: string): Promise { /** * Build thread options for Codex SDK - * Extracted to avoid duplication across thread creation paths */ -function buildThreadOptions(cwd: string, options?: AgentRequestOptions): ThreadOptions { +function buildThreadOptions( + cwd: string, + model?: string, + assistantConfig?: Record +): ThreadOptions { + const config = parseCodexConfig(assistantConfig ?? {}); return { workingDirectory: cwd, skipGitRepoCheck: true, - sandboxMode: 'danger-full-access', // Full filesystem access (needed for git worktree operations) - networkAccessEnabled: true, // Allow network calls (GitHub CLI, HTTP requests) - approvalPolicy: 'never', // Auto-approve all operations without user confirmation - model: options?.model, - modelReasoningEffort: options?.modelReasoningEffort, - webSearchMode: options?.webSearchMode, - additionalDirectories: options?.additionalDirectories, + sandboxMode: 'danger-full-access', + networkAccessEnabled: true, + approvalPolicy: 'never', + model: model ?? config.model, + modelReasoningEffort: config.modelReasoningEffort, + webSearchMode: config.webSearchMode, + additionalDirectories: config.additionalDirectories, }; } @@ -110,17 +106,9 @@ function buildModelAccessMessage(model?: string): string { return `❌ Model "${selectedModel}" is not available for your account.\n\n${fixLine}\n\n${workflowLine}`; } -/** Max retries for transient failures (3 = 4 total attempts). - * Mirrors ClaudeProvider retry logic — Codex process crashes are similarly intermittent. */ const MAX_SUBPROCESS_RETRIES = 3; - -/** Delay between retries in milliseconds */ const RETRY_BASE_DELAY_MS = 2000; - -/** Patterns indicating rate limiting in error messages */ const RATE_LIMIT_PATTERNS = ['rate limit', 'too many requests', '429', 'overloaded']; - -/** Patterns indicating auth issues in error messages */ const AUTH_PATTERNS = [ 'credit balance', 'unauthorized', @@ -129,8 +117,6 @@ const AUTH_PATTERNS = [ '401', '403', ]; - -/** Patterns indicating a transient process crash (worth retrying) */ const SUBPROCESS_CRASH_PATTERNS = ['exited with code', 'killed', 'signal', 'codex exec']; function classifyCodexError( @@ -156,8 +142,8 @@ function extractUsageFromCodexEvent(event: TurnCompletedEvent): TokenUsage { } /** - * Codex AI agent provider - * Implements generic IAgentProvider interface + * Codex AI agent provider. + * Implements IAgentProvider with Codex SDK integration. */ export class CodexProvider implements IAgentProvider { private readonly retryBaseDelayMs: number; @@ -166,75 +152,56 @@ export class CodexProvider implements IAgentProvider { this.retryBaseDelayMs = options?.retryBaseDelayMs ?? RETRY_BASE_DELAY_MS; } - /** - * Send a query to Codex and stream responses - * @param prompt - User message or prompt - * @param cwd - Working directory for Codex - * @param resumeSessionId - Optional thread ID to resume - */ + getCapabilities(): ProviderCapabilities { + return { + sessionResume: true, + mcp: false, + hooks: false, + skills: false, + toolRestrictions: false, + structuredOutput: true, + envInjection: false, + costControl: false, + effortControl: false, + thinkingControl: false, + fallbackModel: false, + sandbox: false, + }; + } + + // TODO(#1135): Pre-spawn env-leak gate was removed during provider extraction. + // Caller-side enforcement (orchestrator, dag-executor) is tracked in #1135. async *sendQuery( prompt: string, cwd: string, resumeSessionId?: string, - options?: AgentRequestOptions + requestOptions?: SendQueryOptions ): AsyncGenerator { - // Load config once — used for env-leak gate and (on first call) codexBinaryPath resolution. - let mergedConfig: Awaited> | undefined; - try { - mergedConfig = await loadConfig(cwd); - } catch (configErr) { - // Fail-closed: config load failure enforces the env-leak gate (allowTargetRepoKeys stays false) - getLog().warn({ err: configErr, cwd }, 'env_leak_gate.config_load_failed_gate_enforced'); - } - - // Pre-spawn: check for env key leak if codebase is not explicitly consented. - // Use prefix lookup so worktree paths (e.g. .../worktrees/feature-branch) still - // match the registered source cwd (e.g. .../source). - const codebase = - (await codebaseDb.findCodebaseByDefaultCwd(cwd)) ?? - (await codebaseDb.findCodebaseByPathPrefix(cwd)); - if (codebase && !codebase.allow_env_keys) { - // Fail-closed: a config load failure must NOT silently bypass the gate. - const allowTargetRepoKeys = mergedConfig?.allowTargetRepoKeys ?? false; - if (!allowTargetRepoKeys) { - const report = scanPathForSensitiveKeys(cwd); - if (report.findings.length > 0) { - throw new EnvLeakError(report, 'spawn-existing'); - } - } - } + const assistantConfig = requestOptions?.assistantConfig ?? {}; + const codexConfig = parseCodexConfig(assistantConfig); - // Initialize Codex SDK with binary path override (resolved from env/config/vendor). - // In dev mode, resolveCodexBinaryPath returns undefined and the SDK uses node_modules. - // In binary mode, it resolves from env/config/vendor or throws with install instructions. - const codex = await getCodex(mergedConfig?.assistants.codex.codexBinaryPath); - const threadOptions = buildThreadOptions(cwd, options); + // Initialize Codex SDK with binary path override + const codex = await getCodex(codexConfig.codexBinaryPath); + const threadOptions = buildThreadOptions(cwd, requestOptions?.model, assistantConfig); - // Check if already aborted before starting - if (options?.abortSignal?.aborted) { + if (requestOptions?.abortSignal?.aborted) { throw new Error('Query aborted'); } - // Track if we fell back from a failed resume (to notify user) let sessionResumeFailed = false; - - // Get or create thread (synchronous operations!) let thread; if (resumeSessionId) { getLog().debug({ sessionId: resumeSessionId }, 'resuming_thread'); try { - // NOTE: resumeThread is synchronous, not async - // IMPORTANT: Must pass options when resuming! thread = codex.resumeThread(resumeSessionId, threadOptions); } catch (error) { getLog().error({ err: error, sessionId: resumeSessionId }, 'resume_thread_failed'); - // Fall back to creating new thread try { thread = codex.startThread(threadOptions); } catch (startError) { const err = startError as Error; if (isModelAccessError(err.message)) { - throw new Error(buildModelAccessMessage(options?.model)); + throw new Error(buildModelAccessMessage(requestOptions?.model)); } throw new Error(`Codex query failed: ${err.message}`); } @@ -242,19 +209,17 @@ export class CodexProvider implements IAgentProvider { } } else { getLog().debug({ cwd }, 'starting_new_thread'); - // NOTE: startThread is synchronous, not async try { thread = codex.startThread(threadOptions); } catch (error) { const err = error as Error; if (isModelAccessError(err.message)) { - throw new Error(buildModelAccessMessage(options?.model)); + throw new Error(buildModelAccessMessage(requestOptions?.model)); } throw new Error(`Codex query failed: ${err.message}`); } } - // Notify user if session resume failed (don't silently lose context) if (sessionResumeFailed) { yield { type: 'system', @@ -266,12 +231,10 @@ export class CodexProvider implements IAgentProvider { let lastError: Error | undefined; for (let attempt = 0; attempt <= MAX_SUBPROCESS_RETRIES; attempt++) { - // Check abort signal before each attempt - if (options?.abortSignal?.aborted) { + if (requestOptions?.abortSignal?.aborted) { throw new Error('Query aborted'); } - // On retries, create a fresh thread (crashed thread is invalid) if (attempt > 0) { getLog().debug({ cwd, attempt }, 'starting_new_thread'); try { @@ -279,34 +242,38 @@ export class CodexProvider implements IAgentProvider { } catch (startError) { const err = startError as Error; if (isModelAccessError(err.message)) { - throw new Error(buildModelAccessMessage(options?.model)); + throw new Error(buildModelAccessMessage(requestOptions?.model)); } throw new Error(`Codex query failed: ${err.message}`); } } try { - // Build per-turn options (structured output schema, abort signal) const turnOptions: TurnOptions = {}; - if (options?.outputFormat) { - turnOptions.outputSchema = options.outputFormat.schema; + const hasOutputFormat = !!( + requestOptions?.outputFormat ?? requestOptions?.nodeConfig?.output_format + ); + if (requestOptions?.outputFormat) { + turnOptions.outputSchema = requestOptions.outputFormat.schema; + } + // Also check nodeConfig.output_format (workflow path) + if (requestOptions?.nodeConfig?.output_format && !requestOptions?.outputFormat) { + turnOptions.outputSchema = requestOptions.nodeConfig.output_format; } - if (options?.abortSignal) { - turnOptions.signal = options.abortSignal; + // Track accumulated text for structured output normalization + let accumulatedText = ''; + if (requestOptions?.abortSignal) { + turnOptions.signal = requestOptions.abortSignal; } - // Run streamed query (this IS async) const result = await thread.runStreamed(prompt, turnOptions); - // Process streaming events for await (const event of result.events) { - // Check abort signal between events - if (options?.abortSignal?.aborted) { + if (requestOptions?.abortSignal?.aborted) { getLog().info('query_aborted_between_events'); break; } - // Log progress for item.started (visibility fix for Codex appearing to hang) if (event.type === 'item.started') { const item = event.item; getLog().debug( @@ -315,17 +282,14 @@ export class CodexProvider implements IAgentProvider { ); } - // Handle error events if (event.type === 'error') { getLog().error({ message: event.message }, 'stream_error'); - // Don't send MCP timeout errors (they're optional) if (!event.message.includes('MCP client')) { yield { type: 'system', content: `⚠️ ${event.message}` }; } continue; } - // Handle turn failed events if (event.type === 'turn.failed') { const errorObj = event.error as { message?: string } | undefined; const errorMessage = errorObj?.message ?? 'Unknown error'; @@ -337,11 +301,9 @@ export class CodexProvider implements IAgentProvider { break; } - // Handle item.completed events - map to MessageChunk types if (event.type === 'item.completed') { const item = event.item; - // Log progress with context for debugging const logContext: Record = { eventType: event.type, itemType: item.type, @@ -354,17 +316,13 @@ export class CodexProvider implements IAgentProvider { switch (item.type) { case 'agent_message': - // Agent text response if (item.text) { + if (hasOutputFormat) accumulatedText += item.text; yield { type: 'assistant', content: item.text }; } break; case 'command_execution': - // Tool/command execution. The Codex SDK only emits item.completed - // once the command has fully run, so we emit the start + result - // back-to-back to close the UI's tool card immediately. Without - // the paired tool_result, the card spins forever until lock release. if (item.command) { yield { type: 'tool', toolName: item.command }; const exitSuffix = @@ -382,7 +340,6 @@ export class CodexProvider implements IAgentProvider { break; case 'reasoning': - // Agent reasoning/thinking if (item.text) { yield { type: 'thinking', content: item.text }; } @@ -392,7 +349,6 @@ export class CodexProvider implements IAgentProvider { if (item.query) { const searchToolName = `🔍 Searching: ${item.query}`; yield { type: 'tool', toolName: searchToolName }; - // Web search items only fire on completion, so close the card immediately. yield { type: 'tool_result', toolName: searchToolName, toolOutput: '' }; } else { getLog().debug({ itemId: item.id }, 'web_search_missing_query'); @@ -466,13 +422,16 @@ export class CodexProvider implements IAgentProvider { : (item.tool ?? item.server ?? 'MCP tool'); const mcpToolName = `🔌 MCP: ${toolInfo}`; - // Always emit start+result so the UI card closes. item.completed - // fires once the call is final (completed or failed). yield { type: 'tool', toolName: mcpToolName }; if (item.status === 'failed') { getLog().warn( - { server: item.server, tool: item.tool, error: item.error, itemId: item.id }, + { + server: item.server, + tool: item.tool, + error: item.error, + itemId: item.id, + }, 'mcp_tool_call_failed' ); const errMsg = item.error?.message @@ -480,8 +439,6 @@ export class CodexProvider implements IAgentProvider { : '❌ Error: MCP tool failed'; yield { type: 'tool_result', toolName: mcpToolName, toolOutput: errMsg }; } else { - // status === 'completed' (or 'in_progress', which shouldn't reach - // item.completed but is closed defensively). let toolOutput = ''; if (item.result?.content) { if (Array.isArray(item.result.content)) { @@ -502,32 +459,49 @@ export class CodexProvider implements IAgentProvider { } break; } - - // Other item types are ignored (like file edits, etc.) } } - // Handle turn.completed event if (event.type === 'turn.completed') { getLog().debug('turn_completed'); - // Yield result with thread ID for persistence const usage = extractUsageFromCodexEvent(event); + + // Codex returns structured output inline in agent_message text. + // Normalize: parse as JSON and put on structuredOutput so the + // dag-executor can handle all providers uniformly. + let structuredOutput: unknown; + if (hasOutputFormat && accumulatedText) { + try { + structuredOutput = JSON.parse(accumulatedText); + getLog().debug('codex.structured_output_parsed'); + } catch { + getLog().warn( + { outputPreview: accumulatedText.slice(0, 200) }, + 'codex.structured_output_not_json' + ); + yield { + type: 'system', + content: + '⚠️ Structured output requested but Codex returned non-JSON text. ' + + 'Downstream $nodeId.output.field references may not evaluate correctly.', + }; + } + } + yield { type: 'result', sessionId: thread.id ?? undefined, tokens: usage, + ...(structuredOutput !== undefined ? { structuredOutput } : {}), }; - // CRITICAL: Break out of event loop - turn is complete! - // Without this, the loop waits for stream to end (causes 90s timeout) break; } } - return; // Success - exit retry loop + return; } catch (error) { const err = error as Error; - // Don't retry aborted queries - if (options?.abortSignal?.aborted) { + if (requestOptions?.abortSignal?.aborted) { throw new Error('Query aborted'); } @@ -537,19 +511,16 @@ export class CodexProvider implements IAgentProvider { 'query_error' ); - // Model access errors are never retryable if (errorClass === 'model_access') { - throw new Error(buildModelAccessMessage(options?.model)); + throw new Error(buildModelAccessMessage(requestOptions?.model)); } - // Auth errors won't resolve on retry if (errorClass === 'auth') { const enrichedError = new Error(`Codex auth error: ${err.message}`); enrichedError.cause = error; throw enrichedError; } - // Retry transient failures (rate limit, crash) if ( attempt < MAX_SUBPROCESS_RETRIES && (errorClass === 'rate_limit' || errorClass === 'crash') @@ -561,20 +532,15 @@ export class CodexProvider implements IAgentProvider { continue; } - // Final failure - enrich and throw const enrichedError = new Error(`Codex ${errorClass}: ${err.message}`); enrichedError.cause = error; throw enrichedError; } } - // Should not reach here, but handle defensively throw lastError ?? new Error('Codex query failed after retries'); } - /** - * Get the assistant type identifier - */ getType(): string { return 'codex'; } diff --git a/packages/providers/src/errors.ts b/packages/providers/src/errors.ts new file mode 100644 index 0000000000..15849d3c92 --- /dev/null +++ b/packages/providers/src/errors.ts @@ -0,0 +1,14 @@ +/** + * Standardized error for unknown provider types. + * Thrown by getAgentProvider() — all surfaces (CLI, server, orchestrator, workflows) + * get the same error shape and message format. + */ +export class UnknownProviderError extends Error { + constructor( + public readonly requestedProvider: string, + public readonly registeredProviders: string[] + ) { + super(`Unknown provider: '${requestedProvider}'. Available: ${registeredProviders.join(', ')}`); + this.name = 'UnknownProviderError'; + } +} diff --git a/packages/providers/src/factory.test.ts b/packages/providers/src/factory.test.ts new file mode 100644 index 0000000000..fcc62c09a6 --- /dev/null +++ b/packages/providers/src/factory.test.ts @@ -0,0 +1,65 @@ +import { describe, test, expect } from 'bun:test'; +import { getAgentProvider } from './factory'; +import { UnknownProviderError } from './errors'; + +describe('factory', () => { + describe('getAgentProvider', () => { + test('returns ClaudeProvider for claude type', () => { + const provider = getAgentProvider('claude'); + + expect(provider).toBeDefined(); + expect(provider.getType()).toBe('claude'); + expect(typeof provider.sendQuery).toBe('function'); + }); + + test('returns CodexProvider for codex type', () => { + const provider = getAgentProvider('codex'); + + expect(provider).toBeDefined(); + expect(provider.getType()).toBe('codex'); + expect(typeof provider.sendQuery).toBe('function'); + }); + + test('throws UnknownProviderError for unknown type', () => { + expect(() => getAgentProvider('unknown')).toThrow(UnknownProviderError); + expect(() => getAgentProvider('unknown')).toThrow( + "Unknown provider: 'unknown'. Available: claude, codex" + ); + }); + + test('throws UnknownProviderError for empty string', () => { + expect(() => getAgentProvider('')).toThrow(UnknownProviderError); + expect(() => getAgentProvider('')).toThrow("Unknown provider: ''"); + }); + + test('is case sensitive - Claude throws', () => { + expect(() => getAgentProvider('Claude')).toThrow(UnknownProviderError); + expect(() => getAgentProvider('Claude')).toThrow("Unknown provider: 'Claude'"); + }); + + test('each call returns new instance', () => { + const provider1 = getAgentProvider('claude'); + const provider2 = getAgentProvider('claude'); + + // Each call should return a new instance + expect(provider1).not.toBe(provider2); + }); + + test('providers expose getCapabilities', () => { + const claude = getAgentProvider('claude'); + const codex = getAgentProvider('codex'); + + expect(typeof claude.getCapabilities).toBe('function'); + expect(typeof codex.getCapabilities).toBe('function'); + + const claudeCaps = claude.getCapabilities(); + const codexCaps = codex.getCapabilities(); + + // Claude supports more features than Codex + expect(claudeCaps.mcp).toBe(true); + expect(codexCaps.mcp).toBe(false); + expect(claudeCaps.hooks).toBe(true); + expect(codexCaps.hooks).toBe(false); + }); + }); +}); diff --git a/packages/core/src/providers/factory.ts b/packages/providers/src/factory.ts similarity index 63% rename from packages/core/src/providers/factory.ts rename to packages/providers/src/factory.ts index 9e3b60f3bf..836f3edce5 100644 --- a/packages/core/src/providers/factory.ts +++ b/packages/providers/src/factory.ts @@ -2,13 +2,17 @@ * Agent Provider Factory * * Dynamically instantiates the appropriate agent provider based on type string. - * Supports Claude and Codex providers. + * Built-in providers only: Claude and Codex. */ -import type { IAgentProvider } from '../types'; -import { ClaudeProvider } from './claude'; -import { CodexProvider } from './codex'; +import type { IAgentProvider } from './types'; +import { ClaudeProvider } from './claude/provider'; +import { CodexProvider } from './codex/provider'; +import { UnknownProviderError } from './errors'; import { createLogger } from '@archon/paths'; +/** Built-in provider types. */ +const REGISTERED_PROVIDERS = ['claude', 'codex'] as const; + /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ let cachedLog: ReturnType | undefined; function getLog(): ReturnType { @@ -17,11 +21,11 @@ function getLog(): ReturnType { } /** - * Get the appropriate agent provider based on type + * Get the appropriate agent provider based on type. * * @param type - Provider type identifier ('claude' or 'codex') * @returns Instantiated agent provider - * @throws Error if provider type is unknown + * @throws UnknownProviderError if provider type is not registered */ export function getAgentProvider(type: string): IAgentProvider { switch (type) { @@ -32,6 +36,6 @@ export function getAgentProvider(type: string): IAgentProvider { getLog().debug({ provider: 'codex' }, 'provider_selected'); return new CodexProvider(); default: - throw new Error(`Unknown provider type: ${type}. Supported types: 'claude', 'codex'`); + throw new UnknownProviderError(type, [...REGISTERED_PROVIDERS]); } } diff --git a/packages/providers/src/index.ts b/packages/providers/src/index.ts new file mode 100644 index 0000000000..b46cb84111 --- /dev/null +++ b/packages/providers/src/index.ts @@ -0,0 +1,31 @@ +// Types (contract layer — re-exported for convenience) +export type { + IAgentProvider, + AgentRequestOptions, + SendQueryOptions, + NodeConfig, + ProviderCapabilities, + MessageChunk, + TokenUsage, +} from './types'; + +// Provider config types (canonical definitions in ./types, re-exported via config modules) +// Import from ./types directly or from the config modules — both work. + +// Factory +export { getAgentProvider } from './factory'; + +// Error +export { UnknownProviderError } from './errors'; + +// Provider classes +export { ClaudeProvider } from './claude/provider'; +export { CodexProvider } from './codex/provider'; + +// Config parsers +export { parseClaudeConfig, type ClaudeProviderDefaults } from './claude/config'; +export { parseCodexConfig, type CodexProviderDefaults } from './codex/config'; + +// Utilities (needed by consumers) +export { resetCodexSingleton } from './codex/provider'; +export { resolveCodexBinaryPath, fileExists } from './codex/binary-resolver'; diff --git a/packages/providers/src/test/mocks/logger.ts b/packages/providers/src/test/mocks/logger.ts new file mode 100644 index 0000000000..79e1198b8a --- /dev/null +++ b/packages/providers/src/test/mocks/logger.ts @@ -0,0 +1,28 @@ +import { mock } from 'bun:test'; +import type { Logger } from 'pino'; + +export interface MockLogger extends Logger { + fatal: ReturnType; + error: ReturnType; + warn: ReturnType; + info: ReturnType; + debug: ReturnType; + trace: ReturnType; + child: ReturnType; +} + +export function createMockLogger(): MockLogger { + const logger = { + fatal: mock(() => undefined), + error: mock(() => undefined), + warn: mock(() => undefined), + info: mock(() => undefined), + debug: mock(() => undefined), + trace: mock(() => undefined), + child: mock(() => logger), + bindings: mock(() => ({ module: 'test' })), + isLevelEnabled: mock(() => true), + level: 'info', + } as unknown as MockLogger; + return logger; +} diff --git a/packages/providers/src/types.ts b/packages/providers/src/types.ts new file mode 100644 index 0000000000..e0f196a500 --- /dev/null +++ b/packages/providers/src/types.ts @@ -0,0 +1,178 @@ +// CONTRACT LAYER — no SDK imports, no runtime deps. +// @archon/workflows and @archon/core import from this subpath (@archon/providers/types). +// HARD RULE: This file must never import SDK packages or other @archon/* packages. + +// ─── Provider Config Defaults ────────────────────────────────────────────── +// Canonical definitions — @archon/core/config/config-types.ts imports from here. +// Single source of truth for provider-specific config shapes. + +export interface ClaudeProviderDefaults { + model?: string; + /** Claude Code settingSources — controls which CLAUDE.md files are loaded. + * @default ['project'] + */ + settingSources?: ('project' | 'user')[]; +} + +export interface CodexProviderDefaults { + model?: string; + /** Structurally matches @archon/workflows ModelReasoningEffort */ + modelReasoningEffort?: 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'; + /** Structurally matches @archon/workflows WebSearchMode */ + webSearchMode?: 'disabled' | 'cached' | 'live'; + additionalDirectories?: string[]; + /** Path to the Codex CLI binary. Overrides auto-detection in compiled Archon builds. */ + codexBinaryPath?: string; +} + +/** + * Token usage statistics from AI provider responses. + */ +export interface TokenUsage { + input: number; + output: number; + total?: number; + cost?: number; +} + +/** + * Message chunk from AI assistant. + * Discriminated union with per-type required fields for type safety. + */ +export type MessageChunk = + | { type: 'assistant'; content: string } + | { type: 'system'; content: string } + | { type: 'thinking'; content: string } + | { + type: 'result'; + sessionId?: string; + tokens?: TokenUsage; + structuredOutput?: unknown; + isError?: boolean; + errorSubtype?: string; + cost?: number; + stopReason?: string; + numTurns?: number; + modelUsage?: Record; + } + | { type: 'rate_limit'; rateLimitInfo: Record } + | { + type: 'tool'; + toolName: string; + toolInput?: Record; + /** Stable per-call ID from the underlying SDK (e.g. Claude `tool_use_id`). + * When present, the platform adapter uses it directly instead of generating + * one — guarantees `tool_call`/`tool_result` pair correctly even when + * multiple tools with the same name run concurrently. */ + toolCallId?: string; + } + | { + type: 'tool_result'; + toolName: string; + toolOutput: string; + /** Matching ID for the originating `tool` chunk. See `tool` variant above. */ + toolCallId?: string; + } + | { type: 'workflow_dispatch'; workerConversationId: string; workflowName: string }; + +/** + * Universal request options accepted by all providers. + * Provider-specific fields go through `nodeConfig` and `assistantConfig` in SendQueryOptions. + */ +export interface AgentRequestOptions { + model?: string; + abortSignal?: AbortSignal; + systemPrompt?: string; + outputFormat?: { type: 'json_schema'; schema: Record }; + env?: Record; + maxBudgetUsd?: number; + fallbackModel?: string; + /** Session fork flag — when true, copies prior session history before appending. */ + forkSession?: boolean; + /** When false, skip writing session transcript to disk. */ + persistSession?: boolean; +} + +/** + * Raw node configuration from workflow YAML. + * Providers translate fields they understand; unknown fields are ignored. + */ +export interface NodeConfig { + mcp?: string; + hooks?: unknown; + skills?: string[]; + allowed_tools?: string[]; + denied_tools?: string[]; + effort?: string; + thinking?: unknown; + sandbox?: unknown; + betas?: string[]; + output_format?: Record; + maxBudgetUsd?: number; + systemPrompt?: string; + fallbackModel?: string; + idle_timeout?: number; + [key: string]: unknown; +} + +/** + * Extended options for sendQuery, adding workflow-specific context. + * The orchestrator path uses base AgentRequestOptions fields only. + * The workflow path additionally passes nodeConfig and assistantConfig. + */ +export interface SendQueryOptions extends AgentRequestOptions { + /** Raw YAML node config — provider translates internally to SDK-specific options. */ + nodeConfig?: NodeConfig; + /** Per-provider defaults from .archon/config.yaml assistants section. */ + assistantConfig?: Record; +} + +/** + * Provider capability flags. The dag-executor uses these for capability warnings + * when a node specifies features the target provider doesn't support. + */ +export interface ProviderCapabilities { + sessionResume: boolean; + mcp: boolean; + hooks: boolean; + skills: boolean; + toolRestrictions: boolean; + structuredOutput: boolean; + envInjection: boolean; + costControl: boolean; + effortControl: boolean; + thinkingControl: boolean; + fallbackModel: boolean; + sandbox: boolean; +} + +/** + * Generic agent provider interface. + * Allows supporting multiple agent providers (Claude, Codex, etc.) + */ +export interface IAgentProvider { + /** + * Send a message and get streaming response. + * @param prompt - User message or prompt + * @param cwd - Working directory for the provider + * @param resumeSessionId - Optional session ID to resume + * @param options - Optional request options (universal + nodeConfig + assistantConfig) + */ + sendQuery( + prompt: string, + cwd: string, + resumeSessionId?: string, + options?: SendQueryOptions + ): AsyncGenerator; + + /** + * Get the provider type identifier (e.g. 'claude', 'codex'). + */ + getType(): string; + + /** + * Get the provider's capability flags. + * Used by the dag-executor to warn when nodes specify unsupported features. + */ + getCapabilities(): ProviderCapabilities; +} diff --git a/packages/providers/tsconfig.json b/packages/providers/tsconfig.json new file mode 100644 index 0000000000..144d879a1c --- /dev/null +++ b/packages/providers/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "noEmit": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.test.ts"] +} diff --git a/packages/server/package.json b/packages/server/package.json index 58fd364c6f..ac5c4b7187 100644 --- a/packages/server/package.json +++ b/packages/server/package.json @@ -15,6 +15,7 @@ "@archon/core": "workspace:*", "@archon/git": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@archon/workflows": "workspace:*", "@hono/zod-openapi": "^0.19.6", "dotenv": "^17.2.3", diff --git a/packages/server/src/adapters/web.ts b/packages/server/src/adapters/web.ts index 20570824e3..50d3c0e5f3 100644 --- a/packages/server/src/adapters/web.ts +++ b/packages/server/src/adapters/web.ts @@ -2,7 +2,8 @@ * Web platform adapter implementing IPlatformAdapter with SSE stream management. * Bridge between the orchestrator and the React frontend via Server-Sent Events. */ -import type { IWebPlatformAdapter, MessageChunk, MessageMetadata } from '@archon/core'; +import type { IWebPlatformAdapter, MessageMetadata } from '@archon/core'; +import type { MessageChunk } from '@archon/providers/types'; import { createLogger } from '@archon/paths'; import { MessagePersistence } from './web/persistence'; import { SSETransport, type SSEWriter } from './web/transport'; diff --git a/packages/workflows/package.json b/packages/workflows/package.json index 7126c5ffff..1c0e89514c 100644 --- a/packages/workflows/package.json +++ b/packages/workflows/package.json @@ -25,6 +25,7 @@ "dependencies": { "@archon/git": "workspace:*", "@archon/paths": "workspace:*", + "@archon/providers": "workspace:*", "@hono/zod-openapi": "^0.19.6", "zod": "^3.25.28" }, diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts index 77beaa3a91..86d00f5e60 100644 --- a/packages/workflows/src/dag-executor.test.ts +++ b/packages/workflows/src/dag-executor.test.ts @@ -31,8 +31,8 @@ import { checkTriggerRule, substituteNodeOutputRefs, executeDagWorkflow, - loadMcpConfig, } from './dag-executor'; +import { loadMcpConfig } from '@archon/providers/claude/provider'; import type { DagNode, BashNode, ScriptNode, NodeOutput, WorkflowRun } from './schemas'; import { discoverWorkflows } from './workflow-discovery'; import { parseWorkflow } from './loader'; @@ -93,6 +93,37 @@ function createMockStore(): IWorkflowStore { }; } +/** All-true capabilities for Claude mock */ +const mockClaudeCapabilities = () => ({ + sessionResume: true, + mcp: true, + hooks: true, + skills: true, + toolRestrictions: true, + structuredOutput: true, + envInjection: true, + costControl: true, + effortControl: true, + thinkingControl: true, + fallbackModel: true, + sandbox: true, +}); +/** Limited capabilities for Codex mock */ +const mockCodexCapabilities = () => ({ + sessionResume: true, + mcp: false, + hooks: false, + skills: false, + toolRestrictions: false, + structuredOutput: true, + envInjection: false, + costControl: false, + effortControl: false, + thinkingControl: false, + fallbackModel: false, + sandbox: false, +}); + /** Mock AI sendQuery generator */ const mockSendQueryDag = mock(function* () { yield { type: 'assistant', content: 'DAG AI response' }; @@ -102,6 +133,7 @@ const mockSendQueryDag = mock(function* () { const mockGetAgentProviderDag = mock(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); function createMockDeps(storeOverride?: IWorkflowStore): WorkflowDeps { @@ -762,6 +794,7 @@ describe('executeDagWorkflow -- tool restrictions', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -796,13 +829,15 @@ describe('executeDagWorkflow -- tool restrictions', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - expect(optionsArg?.tools).toEqual(['Read', 'Grep']); + const nodeConfig = optionsArg?.nodeConfig as Record; + expect(nodeConfig?.allowed_tools).toEqual(['Read', 'Grep']); }); it('warns user when Codex DAG node has denied_tools only', async () => { mockGetAgentProviderDag.mockReturnValue({ sendQuery: mockSendQueryDag, getType: () => 'codex', + getCapabilities: mockCodexCapabilities, }); const mockDeps = createMockDeps(); @@ -832,7 +867,9 @@ describe('executeDagWorkflow -- tool restrictions', () => { const sendMessage = platform.sendMessage as ReturnType; const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1] as string); - const warning = messages.find(m => m.includes('denied_tools') && m.includes('Codex')); + const warning = messages.find( + m => m.includes('allowed_tools/denied_tools') && m.includes('codex') + ); expect(warning).toBeDefined(); }); @@ -859,7 +896,8 @@ describe('executeDagWorkflow -- tool restrictions', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - expect(optionsArg?.tools).toEqual([]); + const nodeConfig = optionsArg?.nodeConfig as Record; + expect(nodeConfig?.allowed_tools).toEqual([]); }); it('passes hooks to sendQuery options for Claude node', async () => { @@ -896,8 +934,9 @@ describe('executeDagWorkflow -- tool restrictions', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - expect(optionsArg?.hooks).toBeDefined(); - const hooks = optionsArg?.hooks as Record; + const nodeConfig = optionsArg?.nodeConfig as Record; + expect(nodeConfig?.hooks).toBeDefined(); + const hooks = nodeConfig?.hooks as Record; expect(hooks.PreToolUse).toHaveLength(1); }); @@ -905,6 +944,7 @@ describe('executeDagWorkflow -- tool restrictions', () => { mockGetAgentProviderDag.mockReturnValue({ sendQuery: mockSendQueryDag, getType: () => 'codex', + getCapabilities: mockCodexCapabilities, }); const mockDeps = createMockDeps(); @@ -941,7 +981,7 @@ describe('executeDagWorkflow -- tool restrictions', () => { const sendMessage = platform.sendMessage as ReturnType; const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1] as string); - const warning = messages.find(m => m.includes('hooks') && m.includes('Codex')); + const warning = messages.find(m => m.includes('hooks') && m.includes('codex')); expect(warning).toBeDefined(); }); }); @@ -964,6 +1004,7 @@ describe('executeDagWorkflow -- bash nodes', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -1228,6 +1269,7 @@ describe('executeDagWorkflow -- output_format structured output', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -1393,15 +1435,16 @@ describe('executeDagWorkflow -- output_format structured output', () => { }); it('passes outputFormat to Codex nodes and uses inline JSON response', async () => { - // Codex returns structured output inline as agent_message text (no structuredOutput field) + // Codex provider normalizes inline JSON into structuredOutput on the result chunk const classifyJson = { run_code_review: 'true', run_tests: 'false' }; mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'codex', + getCapabilities: mockCodexCapabilities, })); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: JSON.stringify(classifyJson) }; - yield { type: 'result', sessionId: 'codex-sid-1' }; + yield { type: 'result', sessionId: 'codex-sid-1', structuredOutput: classifyJson }; }); const mockDeps = createMockDeps(); @@ -1464,14 +1507,15 @@ describe('executeDagWorkflow -- output_format structured output', () => { }); it('does not warn about missing structuredOutput for Codex nodes', async () => { - // Codex returns structured output inline — no structuredOutput field on result + // Codex provider normalizes inline JSON into structuredOutput on the result chunk mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'codex', + getCapabilities: mockCodexCapabilities, })); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: '{"status":"ok"}' }; - yield { type: 'result', sessionId: 'codex-sid-2' }; + yield { type: 'result', sessionId: 'codex-sid-2', structuredOutput: { status: 'ok' } }; }); const mockDeps = createMockDeps(); @@ -1528,6 +1572,7 @@ describe('executeDagWorkflow -- when condition parse errors (fail-closed)', () = mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'AI response' }; @@ -1539,6 +1584,7 @@ describe('executeDagWorkflow -- when condition parse errors (fail-closed)', () = mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -1656,6 +1702,7 @@ describe('executeDagWorkflow -- node-level retry for transient errors', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'DAG AI response' }; @@ -1667,6 +1714,7 @@ describe('executeDagWorkflow -- node-level retry for transient errors', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -1845,6 +1893,7 @@ describe('executeDagWorkflow -- tool_called event persistence', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -1953,6 +2002,7 @@ describe('executeDagWorkflow -- tool_completed event emission', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -2222,6 +2272,7 @@ describe('executeDagWorkflow -- skills options', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -2256,17 +2307,9 @@ describe('executeDagWorkflow -- skills options', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - // agents contains the agent definition - const agents = optionsArg?.agents as Record>; - expect(agents).toBeDefined(); - expect(agents['dag-node-review']).toBeDefined(); - expect(agents['dag-node-review'].skills).toEqual(['codebase-search', 'test-runner']); - // tools always includes 'Skill' explicitly - expect(agents['dag-node-review'].tools).toEqual(['Skill']); - // agent references the key - expect(optionsArg?.agent).toBe('dag-node-review'); - // allowedTools includes 'Skill' for the parent session - expect(optionsArg?.allowedTools).toContain('Skill'); + const nodeConfig = optionsArg?.nodeConfig as Record; + // skills are passed in nodeConfig — provider translates to agents internally + expect(nodeConfig?.skills).toEqual(['codebase-search', 'test-runner']); }); it('appends Skill to existing allowed_tools list when node has both', async () => { @@ -2302,17 +2345,17 @@ describe('executeDagWorkflow -- skills options', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - const agents = optionsArg?.agents as Record>; - // Agent tools = allowed_tools + Skill - expect(agents['dag-node-review'].tools).toEqual(['Read', 'Grep', 'Skill']); - // Parent session also gets Skill - expect(optionsArg?.allowedTools).toContain('Skill'); + const nodeConfig = optionsArg?.nodeConfig as Record; + // skills and allowed_tools are both in nodeConfig — provider merges internally + expect(nodeConfig?.skills).toEqual(['codebase-search']); + expect(nodeConfig?.allowed_tools).toEqual(['Read', 'Grep']); }); it('warns user when Codex DAG node has skills and does not pass agents', async () => { mockGetAgentProviderDag.mockReturnValue({ sendQuery: mockSendQueryDag, getType: () => 'codex', + getCapabilities: mockCodexCapabilities, }); const mockDeps = createMockDeps(); @@ -2343,15 +2386,8 @@ describe('executeDagWorkflow -- skills options', () => { // Warning sent to user const sendMessage = platform.sendMessage as ReturnType; const messages = sendMessage.mock.calls.map((call: unknown[]) => call[1] as string); - const warning = messages.find(m => m.includes('skills') && m.includes('Codex')); + const warning = messages.find(m => m.includes('skills') && m.includes('codex')); expect(warning).toBeDefined(); - - // No agents/agent passed to Codex sendQuery - if (mockSendQueryDag.mock.calls.length > 0) { - const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - expect(optionsArg?.agents).toBeUndefined(); - expect(optionsArg?.agent).toBeUndefined(); - } }); }); @@ -2469,6 +2505,7 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -3583,6 +3620,7 @@ describe('executeDagWorkflow -- break after result (no hang on subprocess exit)' mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -3595,6 +3633,7 @@ describe('executeDagWorkflow -- break after result (no hang on subprocess exit)' mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -3705,6 +3744,7 @@ describe('executeDagWorkflow -- terminal node output selection', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -3716,6 +3756,7 @@ describe('executeDagWorkflow -- terminal node output selection', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -3958,6 +3999,7 @@ describe('executeDagWorkflow -- credit exhaustion', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'DAG AI response' }; @@ -3978,6 +4020,7 @@ describe('executeDagWorkflow -- credit exhaustion', () => { mockGetAgentProviderDag.mockReturnValue({ sendQuery: creditExhaustedQuery, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, }); const store = createMockStore(); @@ -4029,6 +4072,7 @@ describe('executeDagWorkflow -- approval node', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -4036,6 +4080,7 @@ describe('executeDagWorkflow -- approval node', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -4336,6 +4381,7 @@ describe('executeDagWorkflow -- env var injection', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -4343,6 +4389,7 @@ describe('executeDagWorkflow -- env var injection', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); try { await rm(testDir, { recursive: true, force: true }); @@ -4427,6 +4474,7 @@ describe('executeDagWorkflow -- Claude SDK advanced options', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -4558,7 +4606,8 @@ describe('executeDagWorkflow -- Claude SDK advanced options', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - expect(optionsArg?.effort).toBe('high'); + const nodeConfig = optionsArg?.nodeConfig as Record; + expect(nodeConfig?.effort).toBe('high'); }); it('per-node effort overrides workflow-level effort', async () => { @@ -4588,13 +4637,15 @@ describe('executeDagWorkflow -- Claude SDK advanced options', () => { expect(mockSendQueryDag.mock.calls.length).toBeGreaterThan(0); const optionsArg = mockSendQueryDag.mock.calls[0][3] as Record; - expect(optionsArg?.effort).toBe('max'); + const nodeConfig = optionsArg?.nodeConfig as Record; + expect(nodeConfig?.effort).toBe('max'); }); it('warns user when Codex node has Claude-only options (effort)', async () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'codex', + getCapabilities: mockCodexCapabilities, })); const mockDeps = createMockDeps(); @@ -4643,6 +4694,7 @@ describe('executeDagWorkflow -- cost tracking', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); @@ -4845,6 +4897,7 @@ describe('executeDagWorkflow -- script nodes', () => { mockGetAgentProviderDag.mockImplementation(() => ({ sendQuery: mockSendQueryDag, getType: () => 'claude', + getCapabilities: mockClaudeCapabilities, })); }); diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index af86b2e055..993f56162b 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -5,18 +5,21 @@ * Independent nodes within the same layer run concurrently via Promise.allSettled. * Captures all assistant output regardless of streaming mode for $node_id.output substitution. */ -import { readFile } from 'fs/promises'; -import { resolve, isAbsolute } from 'path'; +import { resolve } from 'path'; import { execFileAsync } from '@archon/git'; import { discoverScripts } from './script-discovery'; import type { - WorkflowAgentOptions, IWorkflowPlatform, WorkflowMessageMetadata, - WorkflowTokenUsage, WorkflowConfig, WorkflowDeps, } from './deps'; +import type { + SendQueryOptions, + NodeConfig, + ProviderCapabilities, + TokenUsage, +} from '@archon/providers/types'; import type { DagNode, ApprovalNode, @@ -28,7 +31,6 @@ import type { NodeOutput, TriggerRule, WorkflowRun, - WorkflowNodeHooks, EffortLevel, ThinkingConfig, SandboxSettings, @@ -228,137 +230,16 @@ export function substituteNodeOutputRefs( ); } -/** SDK-compatible hook structure returned by buildSDKHooksFromYAML */ -type SDKHooksMap = NonNullable; - -/** - * Convert declarative YAML hook definitions to SDK HookCallbackMatcher arrays. - * Each YAML matcher's `response` is wrapped in `async () => response`. - */ -export function buildSDKHooksFromYAML(nodeHooks: WorkflowNodeHooks): SDKHooksMap { - const sdkHooks: SDKHooksMap = {}; - - for (const [event, matchers] of Object.entries(nodeHooks)) { - if (!matchers) continue; - sdkHooks[event] = matchers.map(m => ({ - ...(m.matcher ? { matcher: m.matcher } : {}), - hooks: [async (): Promise => m.response], - ...(m.timeout ? { timeout: m.timeout } : {}), - })); - } - - if (Object.keys(sdkHooks).length === 0) { - getLog().warn({ nodeHooksKeys: Object.keys(nodeHooks) }, 'dag.hooks_build_produced_empty_map'); - } - - return sdkHooks; -} - -/** - * Load MCP server config from a JSON file and expand environment variables. - * Format: Record matching the SDK's expected shape. - * $VAR_NAME references in env/headers values are expanded from process.env. - * Secrets are NEVER logged. - */ -export async function loadMcpConfig( - mcpPath: string, - cwd: string -): Promise<{ servers: Record; serverNames: string[]; missingVars: string[] }> { - const fullPath = isAbsolute(mcpPath) ? mcpPath : resolve(cwd, mcpPath); - - let raw: string; - try { - raw = await readFile(fullPath, 'utf-8'); - } catch (err) { - const e = err as NodeJS.ErrnoException; - if (e.code === 'ENOENT') { - throw new Error(`MCP config file not found: ${mcpPath} (resolved to ${fullPath})`); - } - throw new Error(`Failed to read MCP config file: ${mcpPath} — ${e.message}`); - } - - let parsed: Record; - try { - parsed = JSON.parse(raw) as Record; - } catch (parseErr) { - const detail = (parseErr as SyntaxError).message; - throw new Error(`MCP config file is not valid JSON: ${mcpPath} — ${detail}`); - } - - if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { - throw new Error(`MCP config must be a JSON object (Record): ${mcpPath}`); - } - - const { expanded, missingVars } = expandEnvVars(parsed); - const serverNames = Object.keys(expanded); - - return { servers: expanded, serverNames, missingVars }; -} - -/** - * Expand $VAR_NAME references in a string-valued record from process.env. - * Undefined env vars are replaced with empty string; their names are collected in missingVars. - * Non-string values are coerced to string with a warning. - */ -function expandEnvVarsInRecord( - record: Record, - missingVars: string[] -): Record { - const result: Record = {}; - for (const [key, val] of Object.entries(record)) { - if (typeof val !== 'string') { - getLog().warn({ key, valueType: typeof val }, 'dag.mcp_env_value_coerced_to_string'); - result[key] = String(val); - continue; - } - result[key] = val.replace(/\$([A-Z_][A-Z0-9_]*)/g, (_, varName: string) => { - const envVal = process.env[varName]; - if (envVal === undefined) { - missingVars.push(varName); - } - return envVal ?? ''; - }); - } - return result; -} - -/** - * Expand $VAR_NAME references in 'env' and 'headers' string values from process.env. - * Other fields (command, args, url) are left untouched. - * Undefined env vars are replaced with empty string and collected in missingVars. - */ -function expandEnvVars(config: Record): { - expanded: Record; - missingVars: string[]; -} { - const result: Record = {}; - const missingVars: string[] = []; - for (const [serverName, serverConfig] of Object.entries(config)) { - if (typeof serverConfig !== 'object' || serverConfig === null) { - getLog().warn( - { serverName, valueType: typeof serverConfig }, - 'dag.mcp_server_config_not_object' - ); - continue; - } - const server = { ...(serverConfig as Record) }; - if (server.env && typeof server.env === 'object') { - server.env = expandEnvVarsInRecord(server.env as Record, missingVars); - } - if (server.headers && typeof server.headers === 'object') { - server.headers = expandEnvVarsInRecord( - server.headers as Record, - missingVars - ); - } - result[serverName] = server; - } - return { expanded: result, missingVars }; -} +// buildSDKHooksFromYAML moved to @archon/providers/src/claude/provider.ts +// loadMcpConfig moved to @archon/providers/src/claude/provider.ts /** * Resolve per-node provider and model. * Node-level overrides take precedence over workflow defaults. + * + * Provider-agnostic: builds universal base options + raw nodeConfig. + * The provider internally translates nodeConfig to SDK-specific options. + * Capability warnings inform users when features are unsupported. */ async function resolveNodeProviderAndModel( node: DagNode, @@ -368,12 +249,13 @@ async function resolveNodeProviderAndModel( platform: IWorkflowPlatform, conversationId: string, workflowRunId: string, - cwd: string, - workflowLevelOptions: WorkflowLevelOptions + _cwd: string, + workflowLevelOptions: WorkflowLevelOptions, + deps: WorkflowDeps ): Promise<{ provider: 'claude' | 'codex'; model: string | undefined; - options: WorkflowAgentOptions | undefined; + options: SendQueryOptions | undefined; }> { let provider: 'claude' | 'codex'; @@ -397,225 +279,90 @@ async function resolveNodeProviderAndModel( ); } - // Warn if Codex node has allowed_tools or denied_tools (unsupported per-call) - if ( - provider === 'codex' && - (node.allowed_tools !== undefined || node.denied_tools !== undefined) - ) { - getLog().warn({ nodeId: node.id }, 'dag_node_tool_restrictions_ignored_codex'); - const delivered = await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' has allowed_tools/denied_tools set but uses Codex — per-node tool restrictions are not supported for Codex. Configure MCP servers globally in the Codex CLI config instead.`, - { workflowId: workflowRunId, nodeName: node.id } - ); - if (!delivered) { - getLog().error({ nodeId: node.id, workflowRunId }, 'dag_node_codex_warning_delivery_failed'); - } - } - - // Warn if Codex node has hooks (unsupported) - if (provider === 'codex' && node.hooks) { - getLog().warn({ nodeId: node.id }, 'dag_node_hooks_ignored_codex'); - const delivered = await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' has hooks set but uses Codex provider — hooks are Claude-only and will be ignored.`, - { workflowId: workflowRunId, nodeName: node.id } - ); - if (!delivered) { - getLog().error({ nodeId: node.id, workflowRunId }, 'dag_node_hooks_warning_delivery_failed'); + // Get provider capabilities for capability warnings + const aiClient = deps.getAgentProvider(provider); + const caps = aiClient.getCapabilities(); + + // Capability warnings — inform users when features are unsupported + const capChecks: [string, keyof ProviderCapabilities, boolean][] = [ + [ + 'allowed_tools/denied_tools', + 'toolRestrictions', + node.allowed_tools !== undefined || node.denied_tools !== undefined, + ], + ['hooks', 'hooks', node.hooks !== undefined], + ['mcp', 'mcp', node.mcp !== undefined], + ['skills', 'skills', node.skills !== undefined && node.skills.length > 0], + ['effort', 'effortControl', (node.effort ?? workflowLevelOptions.effort) !== undefined], + ['thinking', 'thinkingControl', (node.thinking ?? workflowLevelOptions.thinking) !== undefined], + ['maxBudgetUsd', 'costControl', node.maxBudgetUsd !== undefined], + [ + 'fallbackModel', + 'fallbackModel', + (node.fallbackModel ?? workflowLevelOptions.fallbackModel) !== undefined, + ], + ['sandbox', 'sandbox', (node.sandbox ?? workflowLevelOptions.sandbox) !== undefined], + ]; + + const unsupported: string[] = []; + for (const [field, cap, isSet] of capChecks) { + if (isSet && !caps[cap]) { + unsupported.push(field); } } - // Warn if Codex node has mcp (unsupported per-call) - if (provider === 'codex' && node.mcp) { - getLog().warn({ nodeId: node.id }, 'dag.mcp_ignored_codex'); + if (unsupported.length > 0) { + getLog().warn({ nodeId: node.id, provider, unsupported }, 'dag.unsupported_capabilities'); const delivered = await safeSendMessage( platform, conversationId, - `Warning: Node '${node.id}' has mcp config but uses Codex — per-node MCP servers are not supported for Codex. Configure MCP servers globally in the Codex CLI config instead.`, + `Warning: Node '${node.id}' uses ${unsupported.join(', ')} but ${provider} doesn't support ${unsupported.length === 1 ? 'it' : 'them'} — ${unsupported.length === 1 ? 'this will be' : 'these will be'} ignored.`, { workflowId: workflowRunId, nodeName: node.id } ); if (!delivered) { - getLog().error({ nodeId: node.id, workflowRunId }, 'dag.mcp_warning_delivery_failed'); + getLog().error({ nodeId: node.id, workflowRunId }, 'dag.capability_warning_delivery_failed'); } } - // Warn if Codex node has skills (unsupported) - if (provider === 'codex' && node.skills) { - getLog().warn({ nodeId: node.id }, 'dag.skills_ignored_codex'); - const delivered = await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' has skills set but uses Codex — per-node skills are not supported for Codex.`, - { workflowId: workflowRunId, nodeName: node.id } - ); - if (!delivered) { - getLog().error({ nodeId: node.id, workflowRunId }, 'dag.skills_warning_delivery_failed'); - } + // Build universal base options + const baseOptions: SendQueryOptions = {}; + if (model) baseOptions.model = model; + if (config.envVars && Object.keys(config.envVars).length > 0) { + baseOptions.env = config.envVars; } - - // Warn if Codex node has Claude-only SDK options (effort, thinking, maxBudgetUsd, systemPrompt, fallbackModel, betas, sandbox) - if (provider === 'codex') { - const claudeOnlyFields = [ - ['effort', node.effort ?? workflowLevelOptions.effort], - ['thinking', node.thinking ?? workflowLevelOptions.thinking], - ['maxBudgetUsd', node.maxBudgetUsd], - ['systemPrompt', node.systemPrompt], - ['fallbackModel', node.fallbackModel ?? workflowLevelOptions.fallbackModel], - ['betas', node.betas ?? workflowLevelOptions.betas], - ['sandbox', node.sandbox ?? workflowLevelOptions.sandbox], - ] as const; - const present = claudeOnlyFields.filter(([, val]) => val !== undefined).map(([name]) => name); - if (present.length > 0) { - getLog().warn({ nodeId: node.id, fields: present }, 'dag.claude_options_ignored_codex'); - const delivered = await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' has Claude-only options (${present.join(', ')}) but uses Codex — these will be ignored.`, - { workflowId: workflowRunId, nodeName: node.id } - ); - if (!delivered) { - getLog().error( - { nodeId: node.id, workflowRunId }, - 'dag.claude_options_warning_delivery_failed' - ); - } - } + if (node.systemPrompt !== undefined) baseOptions.systemPrompt = node.systemPrompt; + if (node.maxBudgetUsd !== undefined) baseOptions.maxBudgetUsd = node.maxBudgetUsd; + const fb = node.fallbackModel ?? workflowLevelOptions.fallbackModel; + if (fb) baseOptions.fallbackModel = fb; + if (node.output_format) { + baseOptions.outputFormat = { type: 'json_schema', schema: node.output_format }; } - let options: WorkflowAgentOptions | undefined; - if (provider === 'codex') { - options = { - model, - modelReasoningEffort: config.assistants.codex.modelReasoningEffort, - webSearchMode: config.assistants.codex.webSearchMode, - additionalDirectories: config.assistants.codex.additionalDirectories, - }; - if (node.output_format) { - options.outputFormat = { type: 'json_schema', schema: node.output_format }; - } - } else { - const claudeOptions: WorkflowAgentOptions = {}; - if (model) claudeOptions.model = model; - // Propagate settingSources from config (controls which CLAUDE.md files the SDK loads) - if (config.assistants.claude.settingSources) { - claudeOptions.settingSources = config.assistants.claude.settingSources; - } - if (provider === 'claude' && node.output_format) { - claudeOptions.outputFormat = { - type: 'json_schema', - schema: node.output_format, - }; - } - if (node.allowed_tools !== undefined) claudeOptions.tools = node.allowed_tools; - if (node.denied_tools !== undefined) claudeOptions.disallowedTools = node.denied_tools; - if (node.hooks) { - const builtHooks = buildSDKHooksFromYAML(node.hooks); - if (Object.keys(builtHooks).length > 0) claudeOptions.hooks = builtHooks; - } - // Load MCP config if specified - if (node.mcp) { - try { - const { servers, serverNames, missingVars } = await loadMcpConfig(node.mcp, cwd); - // loadMcpConfig returns Record from JSON; cast to the structural - // union type — the SDK validates server configs at connection time - claudeOptions.mcpServers = servers as unknown as WorkflowAgentOptions['mcpServers']; - // Auto-allow all MCP tools via wildcards - const mcpWildcards = serverNames.map(name => `mcp__${name}__*`); - claudeOptions.allowedTools = [...(claudeOptions.allowedTools ?? []), ...mcpWildcards]; - getLog().info({ nodeId: node.id, serverNames, mcpPath: node.mcp }, 'dag.mcp_config_loaded'); - // Warn user about missing env vars (likely secrets that will cause auth failures) - if (missingVars.length > 0) { - const uniqueVars = [...new Set(missingVars)]; - getLog().warn({ nodeId: node.id, missingVars: uniqueVars }, 'dag.mcp_env_vars_missing'); - const delivered = await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' MCP config references undefined env vars: ${uniqueVars.join(', ')}. These will be empty strings — MCP servers may fail to authenticate.`, - { workflowId: workflowRunId, nodeName: node.id } - ); - if (!delivered) { - getLog().error( - { nodeId: node.id, workflowRunId }, - 'dag.mcp_env_vars_warning_delivery_failed' - ); - } - } - // Warn if Haiku model is used with MCP (tool search not supported) - if (model?.toLowerCase().includes('haiku')) { - getLog().warn({ nodeId: node.id, model }, 'dag.mcp_haiku_tool_search_unsupported'); - const haikuDelivered = await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' uses Haiku model with MCP servers — tool search (lazy loading for many tools) is not supported on Haiku. Consider using Sonnet or Opus.`, - { workflowId: workflowRunId, nodeName: node.id } - ); - if (!haikuDelivered) { - getLog().error( - { nodeId: node.id, workflowRunId }, - 'dag.mcp_haiku_warning_delivery_failed' - ); - } - } - } catch (mcpErr) { - const errMsg = (mcpErr as Error).message; - getLog().error( - { nodeId: node.id, mcpPath: node.mcp, error: errMsg }, - 'dag.mcp_config_load_failed' - ); - throw new Error(`Node '${node.id}': ${errMsg}`); - } - } - // Wrap node in AgentDefinition when skills are specified - if (node.skills) { - const agentId = `dag-node-${node.id}`; - // Always include 'Skill' explicitly — SDK behavior for undefined tools is undocumented - const agentTools = claudeOptions.tools ? [...claudeOptions.tools, 'Skill'] : ['Skill']; - const agentDef: { - description: string; - prompt: string; - skills: string[]; - tools: string[]; - model?: string; - } = { - description: `DAG node '${node.id}'`, - prompt: `You have preloaded skills: ${node.skills.join(', ')}. Use them when relevant.`, - skills: node.skills, - tools: agentTools, - }; - if (claudeOptions.model) agentDef.model = claudeOptions.model; + // Build raw nodeConfig — provider translates internally + const nodeConfig: NodeConfig = { + mcp: node.mcp, + hooks: node.hooks, + skills: node.skills, + allowed_tools: node.allowed_tools, + denied_tools: node.denied_tools, + effort: node.effort ?? workflowLevelOptions.effort, + thinking: node.thinking ?? workflowLevelOptions.thinking, + sandbox: node.sandbox ?? workflowLevelOptions.sandbox, + betas: node.betas ?? workflowLevelOptions.betas, + output_format: node.output_format, + maxBudgetUsd: node.maxBudgetUsd, + systemPrompt: node.systemPrompt, + fallbackModel: fb, + }; - claudeOptions.agents = { [agentId]: agentDef }; - claudeOptions.agent = agentId; - // Ensure 'Skill' is in allowedTools for the parent session - if (!claudeOptions.allowedTools?.includes('Skill')) { - claudeOptions.allowedTools = [...(claudeOptions.allowedTools ?? []), 'Skill']; - } - getLog().info({ nodeId: node.id, skills: node.skills, agentId }, 'dag.skills_agent_created'); - } - // Inject per-project env vars (config file + DB) into subprocess env - if (config.envVars && Object.keys(config.envVars).length > 0) { - claudeOptions.env = config.envVars; - } + // Pass assistantConfig from config — provider parses internally + const assistantConfig = config.assistants[provider] ?? {}; - // Per-node overrides take precedence over workflow-level defaults; maxBudgetUsd and systemPrompt are per-node only - const effort = node.effort ?? workflowLevelOptions.effort; - if (effort !== undefined) claudeOptions.effort = effort; - const thinking = node.thinking ?? workflowLevelOptions.thinking; - if (thinking !== undefined) claudeOptions.thinking = thinking; - if (node.maxBudgetUsd !== undefined) claudeOptions.maxBudgetUsd = node.maxBudgetUsd; - if (node.systemPrompt !== undefined) claudeOptions.systemPrompt = node.systemPrompt; - const fallbackModel = node.fallbackModel ?? workflowLevelOptions.fallbackModel; - if (fallbackModel !== undefined) claudeOptions.fallbackModel = fallbackModel; - const betas = node.betas ?? workflowLevelOptions.betas; - if (betas !== undefined) claudeOptions.betas = betas; - const sandbox = node.sandbox ?? workflowLevelOptions.sandbox; - if (sandbox !== undefined) claudeOptions.sandbox = sandbox; - - options = Object.keys(claudeOptions).length > 0 ? claudeOptions : undefined; - } + const options: SendQueryOptions = { + ...baseOptions, + nodeConfig, + assistantConfig: assistantConfig as Record, + }; return { provider, model, options }; } @@ -717,7 +464,7 @@ async function executeNodeInternal( workflowRun: WorkflowRun, node: CommandNode | PromptNode, provider: 'claude' | 'codex', - nodeOptions: WorkflowAgentOptions | undefined, + nodeOptions: SendQueryOptions | undefined, artifactsDir: string, logDir: string, baseBranch: string, @@ -825,7 +572,7 @@ async function executeNodeInternal( let nodeOutputText = ''; // Always accumulate regardless of streaming mode let structuredOutput: unknown; let newSessionId: string | undefined; - let nodeTokens: WorkflowTokenUsage | undefined; + let nodeTokens: TokenUsage | undefined; let nodeCostUsd: number | undefined; let nodeStopReason: string | undefined; let nodeNumTurns: number | undefined; @@ -836,7 +583,7 @@ async function executeNodeInternal( const nodeAbortController = new AbortController(); // Fork when resuming — leaves the source session untouched so retries are safe. const shouldForkSession = resumeSessionId !== undefined; - const nodeOptionsWithAbort: WorkflowAgentOptions | undefined = { + const nodeOptionsWithAbort: SendQueryOptions | undefined = { ...nodeOptions, abortSignal: nodeAbortController.signal, ...(shouldForkSession ? { forkSession: true } : {}), @@ -1026,11 +773,16 @@ async function executeNodeInternal( } break; // Result is the "I'm done" signal — don't wait for subprocess to exit } else if (msg.type === 'system' && msg.content) { - // Surface MCP connection failures to the user - if (msg.content.startsWith('MCP server connection failed:')) { + // Forward provider warnings (⚠️) and MCP connection failures to the user. + // Providers yield system chunks for user-actionable issues (missing env vars, + // Haiku+MCP, structured output failures, etc.) + if ( + msg.content.startsWith('MCP server connection failed:') || + msg.content.startsWith('⚠️') + ) { getLog().warn( - { nodeId: node.id, mcpStatus: msg.content }, - 'dag.mcp_server_connection_failed' + { nodeId: node.id, systemContent: msg.content }, + 'dag.provider_warning_forwarded' ); const delivered = await safeSendMessage( platform, @@ -1040,8 +792,8 @@ async function executeNodeInternal( ); if (!delivered) { getLog().error( - { nodeId: node.id, mcpStatus: msg.content, workflowRunId: workflowRun.id }, - 'dag.mcp_connection_failure_delivery_failed' + { nodeId: node.id, workflowRunId: workflowRun.id }, + 'dag.provider_warning_delivery_failed' ); } } else { @@ -1054,8 +806,10 @@ async function executeNodeInternal( // rate_limit chunks: already log.warn'd in claude.ts; not surfaced to SSE per design } - // When output_format is set and the SDK returned structured_output, - // use it instead of the concatenated assistant text (which includes prose) + // When output_format is set and the provider returned structured_output, + // use it instead of the concatenated assistant text (which includes prose). + // Each provider normalizes its own structured output onto the result chunk — + // no provider-specific branching here. if (nodeOptions?.outputFormat) { if (structuredOutput !== undefined) { try { @@ -1070,26 +824,9 @@ async function executeNodeInternal( ); } getLog().debug({ nodeId: node.id, streamingMode }, 'dag.structured_output_override'); - } else if (provider === 'codex') { - // Codex returns structured output inline in agent_message text - // (already accumulated in nodeOutputText). Validate it is valid JSON - // so downstream $nodeId.output.field references can parse it. - try { - JSON.parse(nodeOutputText); - getLog().debug({ nodeId: node.id }, 'dag.codex_structured_output_valid_json'); - } catch { - getLog().warn( - { nodeId: node.id, outputPreview: nodeOutputText.slice(0, 200) }, - 'dag.codex_structured_output_not_json' - ); - await safeSendMessage( - platform, - conversationId, - `Warning: Node '${node.id}' requested output_format but Codex returned non-JSON output. Downstream conditions referencing \`$${node.id}.output.field\` may not evaluate correctly.`, - nodeContext - ); - } } else { + // Provider did not populate structuredOutput — warn the user. + // If the provider detected invalid output, it already yielded a system warning. getLog().warn( { nodeId: node.id, workflowRunId: workflowRun.id }, 'dag.structured_output_missing' @@ -1097,7 +834,7 @@ async function executeNodeInternal( await safeSendMessage( platform, conversationId, - `Warning: Node '${node.id}' requested output_format but the SDK did not return structured output. Downstream conditions may not evaluate correctly.`, + `Warning: Node '${node.id}' requested output_format but the provider did not return structured output. Downstream conditions may not evaluate correctly.`, nodeContext ); } @@ -1663,30 +1400,32 @@ async function executeScriptNode( } /** - * Build WorkflowAgentOptions from resolved provider, model, and config. - * Caller is responsible for resolving per-node overrides before passing model. + * Build SendQueryOptions from resolved provider, model, and config. + * Uses the same nodeConfig + assistantConfig pattern as resolveNodeProviderAndModel. */ function buildLoopNodeOptions( provider: 'claude' | 'codex', model: string | undefined, - config: WorkflowConfig -): WorkflowAgentOptions | undefined { - const codexOptions = - provider === 'codex' - ? { - modelReasoningEffort: config.assistants.codex.modelReasoningEffort, - webSearchMode: config.assistants.codex.webSearchMode, - additionalDirectories: config.assistants.codex.additionalDirectories, - } - : undefined; - - const claudeOptions = - provider === 'claude' && config.assistants.claude.settingSources - ? { settingSources: config.assistants.claude.settingSources } - : undefined; - - if (!model && !codexOptions && !claudeOptions) return undefined; - return { ...(model ? { model } : {}), ...codexOptions, ...claudeOptions }; + config: WorkflowConfig, + workflowLevelOptions?: WorkflowLevelOptions +): SendQueryOptions { + const options: SendQueryOptions = {}; + if (model) options.model = model; + if (config.envVars && Object.keys(config.envVars).length > 0) { + options.env = config.envVars; + } + options.assistantConfig = (config.assistants[provider] ?? {}) as Record; + // Pass workflow-level options as nodeConfig so providers can apply them + if (workflowLevelOptions) { + options.nodeConfig = { + effort: workflowLevelOptions.effort, + thinking: workflowLevelOptions.thinking, + sandbox: workflowLevelOptions.sandbox, + betas: workflowLevelOptions.betas, + fallbackModel: workflowLevelOptions.fallbackModel, + }; + } + return options; } /** @@ -1712,7 +1451,8 @@ async function executeLoopNode( docsDir: string, nodeOutputs: Map, config: WorkflowConfig, - issueContext?: string + issueContext?: string, + workflowLevelOptions?: WorkflowLevelOptions ): Promise { const loop = node.loop; const msgContext = { workflowId: workflowRun.id, nodeName: node.id }; @@ -1745,7 +1485,12 @@ async function executeLoopNode( let loopTotalCostUsd: number | undefined; let loopFinalStopReason: string | undefined; let loopTotalNumTurns: number | undefined; - const resolvedOptions = buildLoopNodeOptions(workflowProvider, workflowModel, config); + const resolvedOptions = buildLoopNodeOptions( + workflowProvider, + workflowModel, + config, + workflowLevelOptions + ); // Helper to log event store errors consistently const logEventStoreError = (err: Error, iteration: number): void => { @@ -1817,7 +1562,7 @@ async function executeLoopNode( ); const finalPrompt = substituteNodeOutputRefs(substitutedPrompt, nodeOutputs); - const iterationOptions: WorkflowAgentOptions | undefined = { + const iterationOptions: SendQueryOptions | undefined = { ...resolvedOptions, abortSignal: iterationAbortController.signal, }; @@ -2283,7 +2028,8 @@ async function executeApprovalNode( conversationId, workflowRun.id, cwd, - workflowLevelOptions + workflowLevelOptions, + deps ); const output = await executeNodeInternal( @@ -2643,7 +2389,8 @@ export async function executeDagWorkflow( docsDir, nodeOutputs, config, - issueContext + issueContext, + workflowLevelOptions ); return { nodeId: node.id, output }; } @@ -2733,7 +2480,8 @@ export async function executeDagWorkflow( conversationId, workflowRun.id, cwd, - workflowLevelOptions + workflowLevelOptions, + deps ); // 5. Determine session — parallel or context:fresh → always fresh diff --git a/packages/workflows/src/deps.ts b/packages/workflows/src/deps.ts index f4aa79197e..171c653be7 100644 --- a/packages/workflows/src/deps.ts +++ b/packages/workflows/src/deps.ts @@ -3,50 +3,37 @@ * * Defines narrow interfaces for what the workflow engine needs from external systems. * Callers in @archon/core satisfy these structurally — no adapter wrappers needed. + * + * Provider types are imported directly from @archon/providers/types (contract layer). + * No more mirror copies — single source of truth for IAgentProvider, MessageChunk, etc. */ import type { IWorkflowStore } from './store'; +import type { ModelReasoningEffort, WebSearchMode } from './schemas'; import type { - ModelReasoningEffort, - WebSearchMode, - EffortLevel, - ThinkingConfig, - SandboxSettings, -} from './schemas'; + IAgentProvider, + MessageChunk, + TokenUsage, + SendQueryOptions, + NodeConfig, + ProviderCapabilities, +} from '@archon/providers/types'; -// --------------------------------------------------------------------------- -// Workflow-local type copies — structurally identical to the originals in -// @archon/core/types, but duplicated here to avoid a circular dependency -// (@archon/workflows must not depend on @archon/core). -// Keep these in sync with their counterparts if the originals change. -// --------------------------------------------------------------------------- +// Re-export provider types so existing workflow engine consumers don't break +export type { + IAgentProvider, + MessageChunk, + TokenUsage, + SendQueryOptions, + NodeConfig, + ProviderCapabilities, +}; -export interface WorkflowTokenUsage { - input: number; - output: number; - total?: number; - cost?: number; -} +// Backwards compat alias — deprecated, prefer direct import from @archon/providers/types +export type WorkflowTokenUsage = TokenUsage; -export type WorkflowMessageChunk = - | { type: 'assistant'; content: string } - | { type: 'system'; content: string } - | { type: 'thinking'; content: string } - | { - type: 'result'; - sessionId?: string; - tokens?: WorkflowTokenUsage; - structuredOutput?: unknown; - isError?: boolean; - errorSubtype?: string; - cost?: number; - stopReason?: string; - numTurns?: number; - modelUsage?: Record; - } - | { type: 'rate_limit'; rateLimitInfo: Record } - | { type: 'tool'; toolName: string; toolInput?: Record } - | { type: 'tool_result'; toolName: string; toolOutput: string } - | { type: 'workflow_dispatch'; workerConversationId: string; workflowName: string }; +// --------------------------------------------------------------------------- +// Platform-specific types (NOT mirrors — unique to workflow engine) +// --------------------------------------------------------------------------- export interface WorkflowMessageMetadata { category?: @@ -60,144 +47,8 @@ export interface WorkflowMessageMetadata { workflowResult?: { workflowName: string; runId: string }; } -export interface WorkflowAgentOptions { - model?: string; - modelReasoningEffort?: ModelReasoningEffort; - webSearchMode?: WebSearchMode; - additionalDirectories?: string[]; - /** - * Controls which CLAUDE.md files the SDK loads. - * Mirrors Claude Agent SDK Options.settingSources. - * Claude only — ignored for Codex. - */ - settingSources?: ('project' | 'user')[]; - tools?: string[]; - disallowedTools?: string[]; - outputFormat?: { type: 'json_schema'; schema: Record }; - /** - * SDK hooks callbacks. Structural match for Partial>. - * Inline type avoids @archon/workflows depending on @anthropic-ai/claude-agent-sdk. - * Claude only — ignored for Codex. - */ - hooks?: Partial< - Record< - string, - { - matcher?: string; - hooks: (( - input: unknown, - toolUseID: string | undefined, - options: { signal: AbortSignal } - ) => Promise)[]; - timeout?: number; - }[] - > - >; - /** - * MCP server configuration. Structural match for Record. - * Discriminated union mirrors the SDK types so that WorkflowAgentOptions is - * assignable to AgentRequestOptions without casts. - * @archon/workflows must not depend on @anthropic-ai/claude-agent-sdk. - * Claude only — ignored for Codex. - */ - mcpServers?: Record< - string, - | { type?: 'stdio'; command: string; args?: string[]; env?: Record } - | { type: 'sse'; url: string; headers?: Record } - | { type: 'http'; url: string; headers?: Record } - >; - /** - * Tools to auto-allow without permission prompts. - * Used for MCP tool wildcards (e.g., 'mcp__github__*'). - * Claude only — ignored for Codex. - */ - allowedTools?: string[]; - /** - * Custom subagent definitions. Structural match for Record. - * Used when a DAG node has skills — the node is wrapped in an AgentDefinition. - * @archon/workflows must not depend on @anthropic-ai/claude-agent-sdk. - * Claude only — ignored for Codex. - */ - agents?: Record< - string, - { - description: string; - prompt: string; - tools?: string[]; - model?: string; - skills?: string[]; - } - >; - /** - * Name of the agent definition to use for the main thread. - * References a key in `agents`. Claude only. - */ - agent?: string; - /** - * Additional env vars to merge into the Claude subprocess environment. - * Merged after buildSubprocessEnv() (auth tokens conditionally filtered): { ...buildSubprocessEnv(), ...env }. - * Claude only — ignored for Codex (Codex SDK does not expose env injection). - */ - env?: Record; - abortSignal?: AbortSignal; - /** - * When false (default), skips writing session transcript to ~/.claude/projects/. - * Claude Agent SDK v0.2.74+. The SDK default is true, but Archon overrides it to false - * to avoid disk pollution. Set to true only when session persistence is explicitly needed. - */ - persistSession?: boolean; - /** - * When true, the SDK copies the prior session's history into a new session file - * before appending, leaving the original untouched. Use with `resume` to safely - * preserve conversation context without risk of corrupting the source session. - * Claude only — ignored for Codex. - */ - forkSession?: boolean; - /** - * Controls reasoning depth for Claude. Claude only — ignored for Codex. - * Maps to SDK Options.effort. - */ - effort?: EffortLevel; - /** - * Controls Claude's thinking/reasoning behavior. Claude only — ignored for Codex. - * Maps to SDK Options.thinking (ThinkingConfig). - * String shorthand is resolved at the schema level before reaching here. - */ - thinking?: ThinkingConfig; - /** - * Maximum USD cost for this node. SDK returns error_max_budget_usd if exceeded. - * Claude only — ignored for Codex. - */ - maxBudgetUsd?: number; - /** - * Per-node system prompt override. Replaces the default claude_code preset. - * Claude only — ignored for Codex. - */ - systemPrompt?: string; - /** - * Fallback model if primary model fails. Claude only — ignored for Codex. - */ - fallbackModel?: string; - /** - * SDK beta features to enable (e.g., 'context-1m-2025-08-07'). - * Claude only — ignored for Codex. - */ - betas?: string[]; - /** - * OS-level sandbox restrictions for the Claude subprocess. - * Layers on top of worktree isolation — NOT a replacement for it. - * Claude only — ignored for Codex. - * Structural match for SDK SandboxSettings. - */ - sandbox?: SandboxSettings; -} - // --------------------------------------------------------------------------- // Narrow platform interface (subset of IPlatformAdapter) -// -// Intentionally excludes ensureThread(), start(), and stop() — the workflow -// engine operates within an already-established conversation context and -// never manages platform lifecycle or threading itself. // --------------------------------------------------------------------------- export interface IWorkflowPlatform { @@ -208,32 +59,12 @@ export interface IWorkflowPlatform { ): Promise; getStreamingMode(): 'stream' | 'batch'; getPlatformType(): string; - sendStructuredEvent?(conversationId: string, event: WorkflowMessageChunk): Promise; + sendStructuredEvent?(conversationId: string, event: MessageChunk): Promise; emitRetract?(conversationId: string): Promise; } -// --------------------------------------------------------------------------- -// Narrow agent provider interface (subset of IAgentProvider) -// --------------------------------------------------------------------------- - -export interface IWorkflowAgentProvider { - sendQuery( - prompt: string, - cwd: string, - resumeSessionId?: string, - options?: WorkflowAgentOptions - ): AsyncGenerator; - getType(): string; -} - -export type AgentProviderFactory = (provider: 'claude' | 'codex') => IWorkflowAgentProvider; - // --------------------------------------------------------------------------- // Narrow config interface (subset of MergedConfig) -// -// Only includes fields the workflow engine actually reads. Platform-level -// concerns (streaming modes, concurrency, botName, paths, copyDefaults) are -// deliberately excluded — those are @archon/core's responsibility. // --------------------------------------------------------------------------- export interface WorkflowConfig { @@ -241,10 +72,6 @@ export interface WorkflowConfig { assistant: 'claude' | 'codex'; baseBranch?: string; docsPath?: string; - /** - * Merged per-project env vars (config file + DB). Injected into Options.env on Claude SDK calls. - * Populated by executeWorkflow — loadConfig returns file-based vars; DB vars merged on top after. - */ envVars?: Record; commands: { folder?: string }; defaults?: { @@ -254,7 +81,6 @@ export interface WorkflowConfig { assistants: { claude: { model?: string; - /** Controls which CLAUDE.md files are loaded by the SDK. Claude only. */ settingSources?: ('project' | 'user')[]; }; codex: { @@ -266,6 +92,12 @@ export interface WorkflowConfig { }; } +// --------------------------------------------------------------------------- +// Agent provider factory type +// --------------------------------------------------------------------------- + +export type AgentProviderFactory = (provider: 'claude' | 'codex') => IAgentProvider; + // --------------------------------------------------------------------------- // WorkflowDeps — the single injection point // --------------------------------------------------------------------------- diff --git a/packages/workflows/src/hooks.test.ts b/packages/workflows/src/hooks.test.ts index 6bdaa6085a..eac6076bac 100644 --- a/packages/workflows/src/hooks.test.ts +++ b/packages/workflows/src/hooks.test.ts @@ -1,6 +1,6 @@ import { describe, test, expect } from 'bun:test'; import { parseNodeHooks } from './loader'; -import { buildSDKHooksFromYAML } from './dag-executor'; +import { buildSDKHooksFromYAML } from '@archon/providers/claude/provider'; import type { WorkflowNodeHooks } from './schemas'; import { parseWorkflow } from './loader';