diff --git a/CHANGELOG.md b/CHANGELOG.md index a95096e95b..76259ddb8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **`$LOOP_PREV_OUTPUT` workflow variable (loop nodes only)** — exposes the previous iteration's cleaned output (after `` tag stripping) to the current iteration's prompt. Empty on the first iteration and on the first iteration after resuming from an interactive approval gate. Enables `fresh_context: true` loops to reference what the prior pass said or did without carrying full session history. (#1367) +### Changed + +- **Provider/model resolution: trust the SDK, drop allow-lists.** Removed `inferProviderFromModel` and `isModelCompatible` entirely. Provider is now resolved via a flat explicit chain — `node.provider ?? workflow.provider ?? config.assistant` — and never inferred from the model string. Model strings pass through to the SDK unchanged; the SDK validates them at request time. Codex's stream loop now matches Claude's contract (every terminal close emits exactly one `result` chunk; `error` events without a recovering `turn.completed` synthesize `result.isError` with subtype `codex_stream_incomplete`; `turn.failed` becomes `codex_turn_failed`). AI nodes that exit the streaming loop with empty assistant text and no structured output now fail loudly with `dag.node_empty_output` instead of completing as silent zero-output successes. Provider-id typos (workflow-level and per-node) are caught at YAML load time. **Migration**: workflows that previously relied on cross-provider model inference (e.g. `model: gpt-5.2-codex` with no `provider:`, expecting Archon to pick `codex` because Claude's allow-list rejected the string) must now set `provider:` explicitly. Workflows that already set both `provider:` and `model:` — and workflows that set only `model:` matching `config.assistant` — keep working unchanged. (#1463) + ### Fixed - **Claude provider crashed in dev mode with `error: unknown option '--no-env-file'`.** The Claude Agent SDK switched from shipping `cli.js` to per-platform native binaries (via optional deps) in the 0.2.x series. Archon's `shouldPassNoEnvFile` predicate kept emitting the Bun-only `--no-env-file` flag in dev mode (when the SDK resolves its bundled binary), which the native binary rejects. Tightened the predicate to only emit the flag for explicitly-configured Bun-runnable JS entry points (`.js`/`.mjs`/`.cjs`). Target-repo `.env` isolation is unchanged — `stripCwdEnv()` at process boot remains the primary guard, and the native Claude binary does not auto-load `.env` from its cwd. (#1461) diff --git a/CLAUDE.md b/CLAUDE.md index 28d337c44e..de588e5987 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -501,10 +501,9 @@ assistants: 3. SDK defaults **Model Validation:** -- Workflows are validated at load time for provider/model compatibility -- Claude models: `sonnet`, `opus`, `haiku`, `claude-*`, `inherit` -- Codex models: Any model except Claude-specific aliases -- Invalid combinations fail workflow loading with clear error messages +- Workflows are validated at load time for provider _identity_ only — `provider:` (workflow-level and per-node) must be a registered provider id, otherwise the YAML is rejected with `Unknown provider ''. Registered: claude, codex, pi`. +- Model strings are NOT validated by Archon. Whatever the user writes in `model:` is forwarded verbatim to the resolved SDK. Vendor SDKs ship new models faster than Archon can update; the SDK and the upstream API are the source of truth for what names exist. +- Provider is resolved via an explicit chain: `node.provider ?? workflow.provider ?? config.assistant`. Model never influences provider selection. ### Running the App in Worktrees diff --git a/packages/docs-web/src/content/docs/book/quick-reference.md b/packages/docs-web/src/content/docs/book/quick-reference.md index a0c34643c3..6275f5487d 100644 --- a/packages/docs-web/src/content/docs/book/quick-reference.md +++ b/packages/docs-web/src/content/docs/book/quick-reference.md @@ -293,7 +293,7 @@ defaults: | `Routing unclear — falling back to archon-assist` | No workflow matched the input | Use an explicit workflow name: `archon workflow run my-workflow "..."` | | `Worktree already exists for branch X` | Prior run left a worktree | Run `archon complete X` or `archon isolation cleanup` | | `Not a git repository` | Running outside a repo | `cd` into a git repo first — workflow and isolation commands require one | -| `Model X is not valid for provider Y` | Provider/model mismatch | Each provider accepts specific models — check the provider's `isModelCompatible` rules. Claude accepts `sonnet`, `opus`, `haiku`, `claude-*`; Codex accepts other models. | +| `Unknown provider 'X'. Registered: claude, codex, pi` | Typo in `provider:` (workflow root or node-level) | Set `provider:` to one of the registered ids. Model strings themselves are not validated at load time — the SDK rejects unknown models at request time. | | `$BASE_BRANCH referenced but could not be detected` | No base branch set and auto-detection failed | Set `worktree.baseBranch` in `.archon/config.yaml` or ensure `main`/`master` exists | | Workflow hangs with no output | Node idle timeout hit | Increase `idle_timeout` on the node (milliseconds) | diff --git a/packages/docs-web/src/content/docs/contributing/adding-a-community-provider.md b/packages/docs-web/src/content/docs/contributing/adding-a-community-provider.md index 4a521a4a8d..ef23e8cd56 100644 --- a/packages/docs-web/src/content/docs/contributing/adding-a-community-provider.md +++ b/packages/docs-web/src/content/docs/contributing/adding-a-community-provider.md @@ -124,7 +124,6 @@ export function registerYourProvider(): void { displayName: 'Your Provider (community)', factory: () => new YourProvider(), capabilities: YOUR_CAPABILITIES, - isModelCompatible: (model) => /* pattern check */, builtIn: false, // ← important: community providers are NOT built-in }); } @@ -147,7 +146,7 @@ Co-locate tests next to your code. The Pi tests use this isolation pattern: - Mock the SDK (`mock.module` at the top of the file, before importing your provider). - Tests that touch `mock.module` are split into separate `bun test` invocations in `packages/providers/package.json` (see existing entries for the Pi files). Bun's `mock.module` is process-global and irreversible — splitting prevents cross-file pollution. -- Registry test (`packages/providers/src/registry.test.ts`): add a `describe` block asserting `builtIn: false`, idempotent registration, and `isModelCompatible` behavior. +- Registry test (`packages/providers/src/registry.test.ts`): add a `describe` block asserting `builtIn: false` and idempotent registration. ### 5. Capability discipline diff --git a/packages/docs-web/src/content/docs/guides/authoring-workflows.md b/packages/docs-web/src/content/docs/guides/authoring-workflows.md index 2e3f4f9e37..408fdb8e90 100644 --- a/packages/docs-web/src/content/docs/guides/authoring-workflows.md +++ b/packages/docs-web/src/content/docs/guides/authoring-workflows.md @@ -602,16 +602,15 @@ provider: claude # Any registered provider (default: from config) model: sonnet # Model override (default: from config assistants.claude.model) ``` -**Claude models:** -- `sonnet` - Fast, balanced (recommended) -- `opus` - Powerful, expensive -- `haiku` - Fast, lightweight -- `claude-*` - Full model IDs (e.g., `claude-3-5-sonnet-20241022`) -- `inherit` - Use model from previous session +**Model strings:** Whatever you write in `model:` is forwarded verbatim to the resolved provider's SDK. Archon doesn't keep an internal allow-list, because vendor SDKs ship new models faster than this doc can. The provider's API decides whether the string is valid at request time. -**Codex models:** -- Any OpenAI model ID (e.g., `gpt-5.3-codex`, `o5-pro`) -- Cannot use Claude model aliases +Common shapes you'll see in practice: + +- **Claude (Anthropic):** family aliases (`sonnet`, `opus`, `haiku`), full model IDs (`claude-opus-4-7`, `claude-3-5-sonnet-20241022`), context-window suffixed forms (`opus[1m]`, `claude-opus-4-7[1m]`), or `inherit` to reuse the previous session's model. +- **Codex (OpenAI):** any OpenAI model ID — `gpt-5.3-codex`, `gpt-5.2`, `o5-pro`, etc. +- **Pi (community):** `/` refs — e.g. `google/gemini-2.5-pro`, `openrouter/qwen/qwen3-coder`. + +If the SDK rejects the string at request time, the node fails loudly with the SDK's error message — Archon never silently re-routes a model from one provider to another based on the string. ### Codex-Specific Options @@ -676,18 +675,19 @@ nodes: **Platforms:** `interactive` only affects the web platform. CLI, Slack, Telegram, and GitHub always run workflows in foreground mode regardless of this setting. -### Model Validation +### Provider Validation -Workflows are validated at load time: -- Provider/model compatibility checked -- Invalid combinations fail with clear error messages -- Validation errors shown in `/workflow list` +Workflows are validated at load time for **provider identity only**: +- Both the workflow-level `provider:` and any per-node `provider:` overrides must name a registered provider (`claude`, `codex`, `pi`). +- Validation errors are shown in `/workflow list`. Example validation error: ``` -Model "sonnet" is not compatible with provider "codex" +Unknown provider 'claud'. Registered: claude, codex, pi ``` +Model strings are not validated at load time — they're forwarded to the SDK as-is and validated by the upstream API at request time. + ### Resource Validation (CLI) To validate that all referenced command files, MCP config files, and skill directories exist on disk, run: diff --git a/packages/docs-web/src/content/docs/reference/architecture.md b/packages/docs-web/src/content/docs/reference/architecture.md index be3dd7639e..1b92153f4f 100644 --- a/packages/docs-web/src/content/docs/reference/architecture.md +++ b/packages/docs-web/src/content/docs/reference/architecture.md @@ -414,7 +414,6 @@ export function registerBuiltinProviders(): void { displayName: 'Your Assistant', factory: () => new YourAssistantProvider(), capabilities: YOUR_ASSISTANT_CAPABILITIES, - isModelCompatible: (model) => /* pattern check */, builtIn: true, }, // ...existing entries diff --git a/packages/providers/src/codex/provider.test.ts b/packages/providers/src/codex/provider.test.ts index 669826ebc3..ffc0dbc119 100644 --- a/packages/providers/src/codex/provider.test.ts +++ b/packages/providers/src/codex/provider.test.ts @@ -870,10 +870,13 @@ describe('CodexProvider', () => { ); }); - test('handles error events', async () => { + test('error events followed by turn.completed yield a clean result (recoverable)', async () => { + // SDK error events that are followed by turn.completed indicate the SDK + // recovered internally. The dropped error message is logged but not + // surfaced \u2014 only one terminal result chunk is yielded. mockRunStreamed.mockResolvedValue({ events: (async function* () { - yield { type: 'error', message: 'Something went wrong' }; + yield { type: 'error', message: 'Transient blip' }; yield { type: 'turn.completed', usage: defaultUsage }; })(), }); @@ -883,14 +886,44 @@ describe('CodexProvider', () => { chunks.push(chunk); } - expect(chunks[0]).toEqual({ type: 'system', content: '\u26A0\uFE0F Something went wrong' }); - expect(mockLogger.error).toHaveBeenCalledWith( - { message: 'Something went wrong' }, - 'stream_error' - ); + expect(chunks).toHaveLength(1); + expect(chunks[0]).toEqual({ + type: 'result', + sessionId: 'new-thread-id', + tokens: { input: 10, output: 5 }, + }); + expect(mockLogger.error).toHaveBeenCalledWith({ message: 'Transient blip' }, 'stream_error'); + }); + + test('error event followed by stream close yields fail-stop result.isError', async () => { + // The SDK sends an error event (e.g. "model not supported") and the + // iterator closes without turn.completed or turn.failed. The provider + // synthesizes a fail-stop result so the dag-executor's msg.isError + // branch catches the failure \u2014 same chunk shape as Claude. + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { type: 'error', message: "'opus[1m]' model is not supported" }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(1); + expect(chunks[0]).toEqual({ + type: 'result', + sessionId: 'new-thread-id', + isError: true, + errorSubtype: 'codex_stream_incomplete', + errors: ["'opus[1m]' model is not supported"], + }); }); - test('suppresses MCP timeout errors', async () => { + test('MCP client errors followed by turn.completed yield clean result', async () => { + // MCP client errors are non-fatal \u2014 Codex retries internally. + // Only after turn.completed do we know the SDK recovered. mockRunStreamed.mockResolvedValue({ events: (async function* () { yield { type: 'error', message: 'MCP client connection timeout' }; @@ -903,22 +936,46 @@ describe('CodexProvider', () => { chunks.push(chunk); } - // Should only have the result, not the MCP error expect(chunks).toHaveLength(1); expect(chunks[0]).toEqual({ type: 'result', sessionId: 'new-thread-id', tokens: { input: 10, output: 5 }, }); - - // Error is still logged even though not sent to user + // Logged but not surfaced as failure expect(mockLogger.error).toHaveBeenCalledWith( { message: 'MCP client connection timeout' }, 'stream_error' ); }); - test('handles turn.failed events', async () => { + test('MCP-only error followed by stream close still fails (no terminal = failure)', async () => { + // The stream-incomplete fail-stop fires whenever the iterator closes + // without a terminal event \u2014 that's an SDK contract violation + // regardless of cause. But the captured error message does NOT carry + // the MCP-client text, since MCP errors are filtered from capture. + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + yield { type: 'error', message: 'MCP client transport closed' }; + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(1); + expect(chunks[0]).toMatchObject({ + type: 'result', + isError: true, + errorSubtype: 'codex_stream_incomplete', + }); + const errors = (chunks[0] as { errors?: string[] }).errors; + expect(errors?.[0]).not.toContain('MCP client'); + }); + + test('turn.failed yields result.isError with codex_turn_failed subtype', async () => { mockRunStreamed.mockResolvedValue({ events: (async function* () { yield { type: 'turn.failed', error: { message: 'Rate limit exceeded' } }; @@ -930,9 +987,13 @@ describe('CodexProvider', () => { chunks.push(chunk); } + expect(chunks).toHaveLength(1); expect(chunks[0]).toEqual({ - type: 'system', - content: '\u274C Turn failed: Rate limit exceeded', + type: 'result', + sessionId: 'new-thread-id', + isError: true, + errorSubtype: 'codex_turn_failed', + errors: ['Rate limit exceeded'], }); expect(mockLogger.error).toHaveBeenCalledWith( { errorMessage: 'Rate limit exceeded' }, @@ -940,7 +1001,7 @@ describe('CodexProvider', () => { ); }); - test('handles turn.failed without error message', async () => { + test('turn.failed without error message yields fail-stop with Unknown error', async () => { mockRunStreamed.mockResolvedValue({ events: (async function* () { yield { type: 'turn.failed', error: null }; @@ -952,9 +1013,13 @@ describe('CodexProvider', () => { chunks.push(chunk); } + expect(chunks).toHaveLength(1); expect(chunks[0]).toEqual({ - type: 'system', - content: '\u274C Turn failed: Unknown error', + type: 'result', + sessionId: 'new-thread-id', + isError: true, + errorSubtype: 'codex_turn_failed', + errors: ['Unknown error'], }); expect(mockLogger.error).toHaveBeenCalledWith( { errorMessage: 'Unknown error' }, @@ -962,6 +1027,31 @@ describe('CodexProvider', () => { ); }); + test('iterator that closes with zero events yields codex_stream_incomplete with default message', async () => { + // Bare-stream-close fallback: no error event, no terminal event, + // iterator just ends. Locks in the default message used when there is + // no captured non-MCP error to attribute the failure to. + mockRunStreamed.mockResolvedValue({ + events: (async function* () { + // no events + })(), + }); + + const chunks = []; + for await (const chunk of client.sendQuery('test', '/workspace')) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(1); + expect(chunks[0]).toEqual({ + type: 'result', + sessionId: 'new-thread-id', + isError: true, + errorSubtype: 'codex_stream_incomplete', + errors: ['Codex stream closed without turn.completed or turn.failed'], + }); + }); + test('throws on runStreamed error', async () => { const networkError = new Error('Network failure'); mockRunStreamed.mockRejectedValue(networkError); diff --git a/packages/providers/src/codex/provider.ts b/packages/providers/src/codex/provider.ts index b9e1d493e9..89a0796b94 100644 --- a/packages/providers/src/codex/provider.ts +++ b/packages/providers/src/codex/provider.ts @@ -196,6 +196,13 @@ async function* streamCodexEvents( const state: CodexStreamState = {}; let accumulatedText = ''; + // If the iterator closes without a terminal event (e.g. the model was + // rejected before the turn even started), we synthesize a fail-stop result + // after the loop so the dag-executor's `msg.isError` branch catches it + // — matching Claude's contract. Both terminal branches below `return`, + // so reaching the post-loop block can only mean no terminal fired. + let lastNonMcpError: string | undefined; + for await (const event of events) { if (abortSignal?.aborted) { getLog().info('query_aborted_between_events'); @@ -213,8 +220,14 @@ async function* streamCodexEvents( if (event.type === 'error') { const errorEvent = event as { message: string }; getLog().error({ message: errorEvent.message }, 'stream_error'); + // MCP client errors are non-fatal — Codex retries internally and may + // still reach turn.completed. Other errors are captured; whether they + // are fatal is decided when the stream terminates: turn.completed + // means the SDK recovered, so the captured error is dropped; loop + // closure without a terminal means the captured error caused the + // stream to abort and is surfaced as the failure cause. if (!errorEvent.message.includes('MCP client')) { - yield { type: 'system', content: `⚠️ ${errorEvent.message}` }; + lastNonMcpError = errorEvent.message; } continue; } @@ -223,8 +236,14 @@ async function* streamCodexEvents( const errorObj = (event as { error?: { message?: string } }).error; const errorMessage = errorObj?.message ?? 'Unknown error'; getLog().error({ errorMessage }, 'turn_failed'); - yield { type: 'system', content: `❌ Turn failed: ${errorMessage}` }; - break; + yield { + type: 'result', + sessionId: threadId ?? undefined, + isError: true, + errorSubtype: 'codex_turn_failed', + errors: [errorMessage], + }; + return; } if (event.type === 'item.completed') { @@ -419,9 +438,27 @@ async function* streamCodexEvents( tokens: usage, ...(structuredOutput !== undefined ? { structuredOutput } : {}), }; - break; + return; } } + + // Reaching here means the iterator closed without yielding turn.completed + // or turn.failed (both branches `return` immediately). Common cause: model + // rejected by the API (model not supported, auth refused) before the turn + // started. Surface as a fail-stop. The dag-executor's `msg.isError` branch + // (dag-executor.ts: throws `Node '' failed: SDK returned `) + // turns this into a thrown node failure — distinct from the empty-output + // guard further down, which returns `{ state: 'failed' }` for AI nodes + // that streamed nothing but never raised an isError. + const message = lastNonMcpError ?? 'Codex stream closed without turn.completed or turn.failed'; + getLog().error({ message }, 'stream_incomplete'); + yield { + type: 'result', + sessionId: threadId ?? undefined, + isError: true, + errorSubtype: 'codex_stream_incomplete', + errors: [message], + }; } // ─── Error Classification & Retry ──────────────────────────────────────── diff --git a/packages/providers/src/community/pi/index.ts b/packages/providers/src/community/pi/index.ts index 5f06e9edaa..ce0a286eda 100644 --- a/packages/providers/src/community/pi/index.ts +++ b/packages/providers/src/community/pi/index.ts @@ -1,5 +1,4 @@ export { PI_CAPABILITIES } from './capabilities'; export { parsePiConfig, type PiProviderDefaults } from './config'; -export { isPiModelCompatible, parsePiModelRef, type PiModelRef } from './model-ref'; export { PiProvider } from './provider'; export { registerPiProvider } from './registration'; diff --git a/packages/providers/src/community/pi/model-ref.test.ts b/packages/providers/src/community/pi/model-ref.test.ts index d0001186e2..2bd093973d 100644 --- a/packages/providers/src/community/pi/model-ref.test.ts +++ b/packages/providers/src/community/pi/model-ref.test.ts @@ -1,6 +1,6 @@ import { describe, expect, test } from 'bun:test'; -import { isPiModelCompatible, parsePiModelRef } from './model-ref'; +import { parsePiModelRef } from './model-ref'; describe('parsePiModelRef', () => { test('parses simple provider/model', () => { @@ -48,21 +48,3 @@ describe('parsePiModelRef', () => { expect(parsePiModelRef('')).toBeUndefined(); }); }); - -describe('isPiModelCompatible', () => { - test('accepts valid provider/model refs', () => { - expect(isPiModelCompatible('google/gemini-2.5-pro')).toBe(true); - expect(isPiModelCompatible('anthropic/claude-opus-4-5')).toBe(true); - expect(isPiModelCompatible('openrouter/qwen/qwen3-coder')).toBe(true); - }); - - test('rejects Claude aliases', () => { - expect(isPiModelCompatible('sonnet')).toBe(false); - expect(isPiModelCompatible('opus')).toBe(false); - expect(isPiModelCompatible('haiku')).toBe(false); - }); - - test('rejects claude-prefixed models without provider', () => { - expect(isPiModelCompatible('claude-sonnet-4')).toBe(false); - }); -}); diff --git a/packages/providers/src/community/pi/model-ref.ts b/packages/providers/src/community/pi/model-ref.ts index 2d67c05fec..3b7fbd66fe 100644 --- a/packages/providers/src/community/pi/model-ref.ts +++ b/packages/providers/src/community/pi/model-ref.ts @@ -30,13 +30,3 @@ export function parsePiModelRef(raw: string): PiModelRef | undefined { return { provider, modelId }; } - -/** - * Registry-level `isModelCompatible` check. - * Syntactic only — Pi's actual model catalog is validated at `sendQuery` time - * via `getModel(provider, modelId)`, which is more trustworthy than keeping - * an Archon-side allowlist in sync. - */ -export function isPiModelCompatible(model: string): boolean { - return parsePiModelRef(model) !== undefined; -} diff --git a/packages/providers/src/community/pi/registration.ts b/packages/providers/src/community/pi/registration.ts index 01c9e5ea0f..dd8447fd5d 100644 --- a/packages/providers/src/community/pi/registration.ts +++ b/packages/providers/src/community/pi/registration.ts @@ -1,7 +1,6 @@ import { isRegisteredProvider, registerProvider } from '../../registry'; import { PI_CAPABILITIES } from './capabilities'; -import { isPiModelCompatible } from './model-ref'; import { PiProvider } from './provider'; /** @@ -20,7 +19,6 @@ export function registerPiProvider(): void { displayName: 'Pi (community)', factory: () => new PiProvider(), capabilities: PI_CAPABILITIES, - isModelCompatible: isPiModelCompatible, builtIn: false, }); } diff --git a/packages/providers/src/registry.test.ts b/packages/providers/src/registry.test.ts index 64b879a91c..ee3e04ee04 100644 --- a/packages/providers/src/registry.test.ts +++ b/packages/providers/src/registry.test.ts @@ -49,7 +49,6 @@ function makeMockRegistration( displayName: `Mock ${id}`, factory: () => makeMockProvider(id), capabilities: makeMockProvider(id).getCapabilities(), - isModelCompatible: () => true, builtIn: false, ...overrides, }; @@ -183,7 +182,6 @@ describe('registry', () => { expect(reg.displayName).toBe('Claude (Anthropic)'); expect(reg.builtIn).toBe(true); expect(typeof reg.factory).toBe('function'); - expect(typeof reg.isModelCompatible).toBe('function'); }); test('throws for unknown provider', () => { @@ -251,27 +249,6 @@ describe('registry', () => { }); }); - describe('built-in model compatibility', () => { - test('Claude registration matches Claude model patterns', () => { - const reg = getRegistration('claude'); - expect(reg.isModelCompatible('sonnet')).toBe(true); - expect(reg.isModelCompatible('opus')).toBe(true); - expect(reg.isModelCompatible('haiku')).toBe(true); - expect(reg.isModelCompatible('inherit')).toBe(true); - expect(reg.isModelCompatible('claude-3.5-sonnet')).toBe(true); - expect(reg.isModelCompatible('gpt-4')).toBe(false); - }); - - test('Codex registration rejects Claude model patterns', () => { - const reg = getRegistration('codex'); - expect(reg.isModelCompatible('sonnet')).toBe(false); - expect(reg.isModelCompatible('claude-3.5-sonnet')).toBe(false); - expect(reg.isModelCompatible('inherit')).toBe(false); - expect(reg.isModelCompatible('gpt-4')).toBe(true); - expect(reg.isModelCompatible('o3-mini')).toBe(true); - }); - }); - describe('registerCommunityProviders (aggregator)', () => { test('registers all bundled community providers', () => { registerCommunityProviders(); @@ -325,17 +302,6 @@ describe('registry', () => { expect(caps.sandbox).toBe(false); }); - test('isModelCompatible accepts provider/model refs, rejects aliases', () => { - registerPiProvider(); - const reg = getRegistration('pi'); - expect(reg.isModelCompatible('google/gemini-2.5-pro')).toBe(true); - expect(reg.isModelCompatible('anthropic/claude-opus-4-5')).toBe(true); - expect(reg.isModelCompatible('openrouter/qwen/qwen3-coder')).toBe(true); - expect(reg.isModelCompatible('sonnet')).toBe(false); - expect(reg.isModelCompatible('claude-3.5-sonnet')).toBe(false); - expect(reg.isModelCompatible('')).toBe(false); - }); - test('appears in getProviderInfoList with builtIn: false', () => { registerPiProvider(); const info = getProviderInfoList().find(p => p.id === 'pi'); diff --git a/packages/providers/src/registry.ts b/packages/providers/src/registry.ts index 1ae16759dc..7006ab4961 100644 --- a/packages/providers/src/registry.ts +++ b/packages/providers/src/registry.ts @@ -83,7 +83,7 @@ export function getRegisteredProviders(): ProviderRegistration[] { } /** - * Get API-safe provider info (excludes factory and isModelCompatible). + * Get API-safe provider info (excludes the factory). */ export function getProviderInfoList(): ProviderInfo[] { return getRegisteredProviders().map(({ id, displayName, capabilities, builtIn }) => ({ @@ -112,10 +112,6 @@ export function registerBuiltinProviders(): void { displayName: 'Claude (Anthropic)', factory: () => new ClaudeProvider(), capabilities: CLAUDE_CAPABILITIES, - isModelCompatible: (model: string): boolean => { - const aliases = ['sonnet', 'opus', 'haiku']; - return aliases.includes(model) || model.startsWith('claude-') || model === 'inherit'; - }, builtIn: true, }, { @@ -123,12 +119,6 @@ export function registerBuiltinProviders(): void { displayName: 'Codex (OpenAI)', factory: () => new CodexProvider(), capabilities: CODEX_CAPABILITIES, - isModelCompatible: (model: string): boolean => { - const claudeAliases = ['sonnet', 'opus', 'haiku']; - return ( - !claudeAliases.includes(model) && !model.startsWith('claude-') && model !== 'inherit' - ); - }, builtIn: true, }, ]; diff --git a/packages/providers/src/types.ts b/packages/providers/src/types.ts index d6cb8b4a87..fe47eff6c4 100644 --- a/packages/providers/src/types.ts +++ b/packages/providers/src/types.ts @@ -265,13 +265,6 @@ export interface ProviderRegistration { /** Static capability declaration — used for dag-executor warnings */ capabilities: ProviderCapabilities; - /** - * Model compatibility check. Returns true if the model string - * is valid for this provider. Used by workflow validation and - * provider inference from model names. - */ - isModelCompatible: (model: string) => boolean; - /** Whether this is a built-in (maintained by core team) or community provider */ builtIn: boolean; } diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts index 6ae086b3bb..24eee1ff01 100644 --- a/packages/workflows/src/dag-executor.test.ts +++ b/packages/workflows/src/dag-executor.test.ts @@ -4483,17 +4483,21 @@ describe('executeDagWorkflow -- terminal node output selection', () => { expect(result).toBe('Final summary text'); }); - it('returns undefined when the single terminal node produces no output', async () => { + it('fails node when the AI stream closes with no assistant output', async () => { + // Empty assistant output on AI nodes (`command:`/`prompt:`) typically + // indicates a silent provider rejection or stream interruption that + // didn't yield a result.isError chunk. Treat it as a node failure + // rather than a successful empty completion. mockSendQueryDag.mockImplementation(async function* () { - // No assistant content — empty output yield { type: 'result', sessionId: 'sess-empty' }; }); - const mockDeps = createMockDeps(); + const store = createMockStore(); + const mockDeps = createMockDeps(store); const platform = createMockPlatform(); const workflowRun = makeWorkflowRun(); - const result = await executeDagWorkflow( + await executeDagWorkflow( mockDeps, platform, 'conv-dag', @@ -4509,7 +4513,120 @@ describe('executeDagWorkflow -- terminal node output selection', () => { minimalConfig ); - expect(result).toBeUndefined(); + const eventCalls = (store.createWorkflowEvent as ReturnType).mock.calls; + const nodeFailedEvents = eventCalls.filter( + (call: unknown[]) => (call[0] as Record).event_type === 'node_failed' + ); + expect(nodeFailedEvents.length).toBeGreaterThan(0); + const failedData = (nodeFailedEvents[0][0] as Record).data as Record< + string, + unknown + >; + expect(failedData.error).toContain('produced no assistant output'); + // Workflow-level failure must propagate, not just the node event. + expect(store.failWorkflowRun).toHaveBeenCalled(); + }); + + it('does NOT fail node when stream yields no assistant text but a structuredOutput is present', async () => { + // Output-format nodes legitimately produce zero free-form text — the + // useful payload is the structuredOutput field. The empty-output guard + // must spare them. + mockSendQueryDag.mockImplementation(async function* () { + yield { + type: 'result', + sessionId: 'sess-structured', + structuredOutput: { category: 'math' }, + }; + }); + + const store = createMockStore(); + const mockDeps = createMockDeps(store); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'structured-only-dag', + nodes: [ + { + id: 'classify', + prompt: 'Classify this', + output_format: { type: 'object', properties: {} }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + const eventCalls = (store.createWorkflowEvent as ReturnType).mock.calls; + const nodeFailedEvents = eventCalls.filter( + (call: unknown[]) => (call[0] as Record).event_type === 'node_failed' + ); + expect(nodeFailedEvents.length).toBe(0); + const nodeCompletedEvents = eventCalls.filter( + (call: unknown[]) => (call[0] as Record).event_type === 'node_completed' + ); + expect(nodeCompletedEvents.length).toBeGreaterThan(0); + }); + + it('fails the run when a node specifies an unknown provider (defense-in-depth at execution time)', async () => { + // Loader-time validation also catches this (loader.ts iterates dagNodes + // after parsing), but the dag-executor's resolveNodeProviderAndModel + // throws as defense-in-depth in case a code path bypasses the loader. + const store = createMockStore(); + const mockDeps = createMockDeps(store); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'unknown-provider-dag', + nodes: [ + { + id: 'bad', + command: 'my-cmd', + provider: 'claud', // typo + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + expect(store.failWorkflowRun).toHaveBeenCalled(); + // The "unknown provider" detail surfaces on the node_failed event; the + // workflow-level fail message is a generic "no successful nodes" summary. + const eventCalls = (store.createWorkflowEvent as ReturnType).mock.calls; + const nodeFailedEvents = eventCalls.filter( + (call: unknown[]) => (call[0] as Record).event_type === 'node_failed' + ); + expect(nodeFailedEvents.length).toBeGreaterThan(0); + const nodeFailedData = (nodeFailedEvents[0][0] as Record).data as Record< + string, + unknown + >; + expect(nodeFailedData.error).toContain("unknown provider 'claud'"); }); it('excludes intermediate nodes with dependents from terminal set (fan-in DAG)', async () => { @@ -5660,6 +5777,7 @@ describe('executeDagWorkflow -- cost tracking', () => { let callCount = 0; mockSendQueryDag.mockImplementation(function* () { callCount++; + yield { type: 'assistant', content: `Step ${String(callCount)} output` }; yield { type: 'result', sessionId: `sid-${String(callCount)}`, cost: 0.001 }; }); @@ -5701,6 +5819,7 @@ describe('executeDagWorkflow -- cost tracking', () => { it('omits total_cost_usd from completeWorkflowRun when no cost yielded', async () => { mockSendQueryDag.mockImplementation(function* () { + yield { type: 'assistant', content: 'Some output' }; yield { type: 'result', sessionId: 'sid-no-cost' }; }); diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index 3ba9824566..07426427f6 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -21,7 +21,11 @@ import type { ProviderCapabilities, TokenUsage, } from '@archon/providers/types'; -import { getProviderCapabilities } from '@archon/providers'; +import { + getProviderCapabilities, + getRegisteredProviders, + isRegisteredProvider, +} from '@archon/providers'; import type { DagNode, ApprovalNode, @@ -49,7 +53,6 @@ import { formatToolCall } from './utils/tool-formatter'; import { createLogger } from '@archon/paths'; import { getWorkflowEventEmitter } from './event-emitter'; import { evaluateCondition } from './condition-evaluator'; -import { inferProviderFromModel, isModelCompatible } from './model-validation'; import { logNodeStart, logNodeComplete, @@ -341,7 +344,17 @@ async function resolveNodeProviderAndModel( model: string | undefined; options: SendQueryOptions | undefined; }> { - const provider: string = node.provider ?? inferProviderFromModel(node.model, workflowProvider); + // Provider is explicit: node.provider ?? workflow.provider. Model never + // influences provider selection. Model strings pass through to the SDK. + const provider: string = node.provider ?? workflowProvider; + if (!isRegisteredProvider(provider)) { + throw new Error( + `Node '${node.id}': unknown provider '${provider}'. ` + + `Registered: ${getRegisteredProviders() + .map(p => p.id) + .join(', ')}` + ); + } const providerAssistantConfig = config.assistants[provider]; const model: string | undefined = @@ -350,12 +363,6 @@ async function resolveNodeProviderAndModel( ? workflowModel : (providerAssistantConfig?.model as string | undefined)); - if (!isModelCompatible(provider, model)) { - throw new Error( - `Node '${node.id}': model "${model ?? 'default'}" is not compatible with provider "${provider}"` - ); - } - // Get provider capabilities for capability warnings (static lookup, no instantiation) const caps = getProviderCapabilities(provider); @@ -1101,6 +1108,49 @@ async function executeNodeInternal( return { state: 'failed', output: nodeOutputText, error: creditError }; } + // Empty assistant output is a failure for AI nodes — a provider stream + // that closed cleanly with zero content typically means a silent + // rejection or interruption that didn't produce a result.isError chunk. + // Bash/script/approval nodes don't reach this path; they have their + // own dispatch and never stream through this loop. + // + // Idle-timeout exits are exempt: the timeout warning at line 1017 has + // already told the user the node "completed via idle timeout"; flipping + // that to a failure here would directly contradict the on-screen message. + if (nodeOutputText.trim() === '' && structuredOutput === undefined && !nodeIdleTimedOut) { + const duration = Date.now() - nodeStartTime; + const emptyError = `Node '${node.id}' produced no assistant output. The provider stream closed without yielding content — likely a silent provider rejection or stream interruption.`; + getLog().error({ nodeId: node.id, durationMs: duration }, 'dag.node_empty_output'); + await logNodeError(logDir, workflowRun.id, node.id, emptyError); + + deps.store + .createWorkflowEvent({ + workflow_run_id: workflowRun.id, + event_type: 'node_failed', + step_name: node.id, + data: { error: emptyError, duration_ms: duration }, + }) + .catch((err: Error) => { + getLog().error( + { err, workflowRunId: workflowRun.id, eventType: 'node_failed' }, + 'workflow_event_persist_failed' + ); + }); + + emitter.emit({ + type: 'node_failed', + runId: workflowRun.id, + nodeId: node.id, + nodeName: node.command ?? node.id, + error: emptyError, + }); + + lastNodeCancelCheck.delete(`${workflowRun.id}:${node.id}`); + lastNodeActivityUpdate.delete(`${workflowRun.id}:${node.id}`); + + return { state: 'failed', output: '', error: emptyError }; + } + const duration = Date.now() - nodeStartTime; getLog().info({ nodeId: node.id, durationMs: duration }, 'dag_node_completed'); await logNodeComplete(logDir, workflowRun.id, node.id, node.command ?? '', { @@ -1982,6 +2032,52 @@ async function executeLoopNode( ); } + // Empty assistant output is an iteration failure for AI loops — same + // contract as the single-shot AI-node guard in executeNodeInternal. A + // provider stream that closed cleanly with zero content typically means + // a silent rejection or interruption; left unchecked, an interactive + // loop would pause with a blank gate or burn the full max_iterations + // budget producing nothing. Idle-timeout exits are exempt — the + // notification above has already told the user the iteration completed + // via timeout, and flipping that to a failure would contradict it. + if (!iterationIdleTimedOut && fullOutput.trim() === '') { + const iterationDuration = Date.now() - iterationStart; + const emptyError = + 'Loop iteration produced no assistant output. The provider stream closed without yielding content — likely a silent provider rejection or stream interruption.'; + getLog().error( + { nodeId: node.id, iteration: i, durationMs: iterationDuration }, + 'loop_node.iteration_empty_output' + ); + getWorkflowEventEmitter().emit({ + type: 'loop_iteration_failed', + runId: workflowRun.id, + nodeId: node.id, + iteration: i, + error: emptyError, + }); + deps.store + .createWorkflowEvent({ + workflow_run_id: workflowRun.id, + event_type: 'loop_iteration_failed', + step_name: node.id, + data: { + iteration: i, + error: emptyError, + duration: iterationDuration, + nodeId: node.id, + }, + }) + .catch((evtErr: Error) => { + logEventStoreError(evtErr, i); + }); + return { + state: 'failed', + output: '', + error: `Loop iteration ${i} failed: ${emptyError}`, + costUsd: loopTotalCostUsd, + }; + } + // Batch mode: send accumulated output if (platform.getStreamingMode() === 'batch' && cleanOutput) { await safeSendMessage(platform, conversationId, cleanOutput, msgContext); @@ -2610,9 +2706,19 @@ export async function executeDagWorkflow( // 3b. Loop node dispatch — manages its own AI sessions and iteration if (isLoopNode(node)) { - // Resolve per-node provider/model overrides (same logic as other node types) - const loopProvider: string = - node.provider ?? inferProviderFromModel(node.model, workflowProvider); + // Resolve per-node provider/model overrides (same logic as other node types). + // Provider is explicit; model passes through to the SDK. Throw on an + // unknown provider so the outer catch below emits the standard + // node_failed event + user-facing message — the same path + // resolveNodeProviderAndModel uses for non-loop nodes. + const loopProvider: string = node.provider ?? workflowProvider; + if (!isRegisteredProvider(loopProvider)) { + throw new Error( + `Node '${node.id}': unknown provider '${loopProvider}'. Registered: ${getRegisteredProviders() + .map(p => p.id) + .join(', ')}` + ); + } const loopAssistantConfig = config.assistants[loopProvider]; const loopModel: string | undefined = node.model ?? @@ -2620,17 +2726,6 @@ export async function executeDagWorkflow( ? workflowModel : (loopAssistantConfig?.model as string | undefined)); - if (!isModelCompatible(loopProvider, loopModel)) { - return { - nodeId: node.id, - output: { - state: 'failed' as const, - output: '', - error: `Node '${node.id}': model "${loopModel ?? 'default'}" is not compatible with provider "${loopProvider}"`, - }, - }; - } - const output = await executeLoopNode( deps, platform, diff --git a/packages/workflows/src/executor-preamble.test.ts b/packages/workflows/src/executor-preamble.test.ts index 4739770940..a5b16dfb83 100644 --- a/packages/workflows/src/executor-preamble.test.ts +++ b/packages/workflows/src/executor-preamble.test.ts @@ -68,6 +68,14 @@ mock.module('./event-emitter', () => ({ getWorkflowEventEmitter: mock(() => mockEmitter), })); +// --------------------------------------------------------------------------- +// Bootstrap provider registry (executor calls isRegisteredProvider at workflow level) +// --------------------------------------------------------------------------- + +import { registerBuiltinProviders, clearRegistry } from '@archon/providers'; +clearRegistry(); +registerBuiltinProviders(); + // --------------------------------------------------------------------------- // Import after mocks // --------------------------------------------------------------------------- diff --git a/packages/workflows/src/executor.test.ts b/packages/workflows/src/executor.test.ts index 424e09a642..92d9cf5b81 100644 --- a/packages/workflows/src/executor.test.ts +++ b/packages/workflows/src/executor.test.ts @@ -468,10 +468,11 @@ describe('executeWorkflow', () => { expect(mockExecuteDagWorkflow).toHaveBeenCalledTimes(1); }); - it('infers claude provider when workflow sets a claude model alias', async () => { + it('passes workflow.model through unchanged when workflow.provider is unset', async () => { const store = makeStore(); const deps = makeDeps(store); - // config.assistant defaults to 'claude', model 'sonnet' is a claude alias + // Provider falls back to config.assistant ('claude'); model is forwarded + // verbatim. The SDK is the source of truth for what model strings work. await executeWorkflow( deps, makePlatform(), @@ -484,7 +485,26 @@ describe('executeWorkflow', () => { expect(mockExecuteDagWorkflow).toHaveBeenCalledTimes(1); }); - it('throws when model is incompatible with explicit provider', async () => { + it('passes provider+model through to the SDK without re-routing on model name', async () => { + // Provider is explicit; the model string is forwarded verbatim to + // whichever SDK the resolved provider names. A workflow that sets + // provider:codex with a Claude-looking model gets the request handed + // to the codex SDK as-is — the SDK decides whether to accept it. + const store = makeStore(); + const deps = makeDeps(store); + await executeWorkflow( + deps, + makePlatform(), + 'conv-1', + '/tmp', + makeWorkflow({ provider: 'codex', model: 'sonnet' }), + 'test message', + 'db-conv-1' + ); + expect(mockExecuteDagWorkflow).toHaveBeenCalledTimes(1); + }); + + it('throws when workflow.provider is not a registered provider', async () => { const store = makeStore(); const deps = makeDeps(store); await expect( @@ -493,11 +513,11 @@ describe('executeWorkflow', () => { makePlatform(), 'conv-1', '/tmp', - makeWorkflow({ provider: 'codex', model: 'sonnet' }), + makeWorkflow({ provider: 'claud', model: 'sonnet' }), 'test message', 'db-conv-1' ) - ).rejects.toThrow('not compatible'); + ).rejects.toThrow(/unknown provider 'claud'/); }); }); diff --git a/packages/workflows/src/executor.ts b/packages/workflows/src/executor.ts index 99176cbe26..77226621bf 100644 --- a/packages/workflows/src/executor.ts +++ b/packages/workflows/src/executor.ts @@ -13,7 +13,7 @@ import { executeDagWorkflow } from './dag-executor'; import { logWorkflowStart, logWorkflowError } from './logger'; import { formatDuration, parseDbTimestamp } from './utils/duration'; import { getWorkflowEventEmitter } from './event-emitter'; -import { inferProviderFromModel, isModelCompatible } from './model-validation'; +import { isRegisteredProvider, getRegisteredProviders } from '@archon/providers'; import { classifyError } from './executor-shared'; /** Lazy-initialized logger (deferred so test mocks can intercept createLogger) */ @@ -276,29 +276,21 @@ export async function executeWorkflow( const docsDir = config.docsPath ?? 'docs/'; - // Resolve provider and model once (used by all nodes) - // When workflow sets a model but not a provider, infer provider from the model. - // e.g. model: sonnet → provider: claude, even if config.assistant is codex. - let resolvedProvider: string; - let providerSource: string; - if (workflow.provider) { - resolvedProvider = workflow.provider; - providerSource = 'workflow definition'; - } else if (workflow.model) { - resolvedProvider = inferProviderFromModel(workflow.model, config.assistant); - providerSource = 'inferred from workflow model'; - } else { - resolvedProvider = config.assistant; - providerSource = 'config'; - } - const assistantDefaults = config.assistants[resolvedProvider]; - const resolvedModel = workflow.model ?? (assistantDefaults?.model as string | undefined); - if (!isModelCompatible(resolvedProvider, resolvedModel)) { + // Resolve provider and model once (used by all nodes). + // Provider is explicit: node.provider ?? workflow.provider ?? config.assistant. + // Model strings pass through to the SDK as-is — the SDK validates at request time. + const resolvedProvider: string = workflow.provider ?? config.assistant; + const providerSource = workflow.provider ? 'workflow definition' : 'config'; + if (!isRegisteredProvider(resolvedProvider)) { throw new Error( - `Model "${resolvedModel}" is not compatible with provider "${resolvedProvider}". ` + - 'Update your workflow or config.' + `Workflow '${workflow.name}': unknown provider '${resolvedProvider}'. ` + + `Registered: ${getRegisteredProviders() + .map(p => p.id) + .join(', ')}` ); } + const assistantDefaults = config.assistants[resolvedProvider]; + const resolvedModel = workflow.model ?? (assistantDefaults?.model as string | undefined); getLog().info( { diff --git a/packages/workflows/src/loader.test.ts b/packages/workflows/src/loader.test.ts index 7b0be0bebd..219670f6bf 100644 --- a/packages/workflows/src/loader.test.ts +++ b/packages/workflows/src/loader.test.ts @@ -28,7 +28,7 @@ mock.module('@archon/paths', () => ({ createLogger: mock(() => mockLogger), })); -// Bootstrap provider registry (needed by isModelCompatible in dag-node schema) +// Bootstrap provider registry (needed by isRegisteredProvider checks at load time) import { registerBuiltinProviders, clearRegistry } from '@archon/providers'; clearRegistry(); registerBuiltinProviders(); @@ -326,13 +326,13 @@ nodes: expect(workflows[0].provider).toBeUndefined(); }); - it('should treat invalid provider as undefined (executor handles fallback)', async () => { + it('should reject unknown provider at load time', async () => { const workflowDir = join(testDir, '.archon', 'workflows'); await mkdir(workflowDir, { recursive: true }); const yamlInvalidProvider = `name: invalid-provider description: Invalid provider specified -provider: invalid +provider: claud nodes: - id: test command: test @@ -340,33 +340,37 @@ nodes: await writeFile(join(workflowDir, 'test.yaml'), yamlInvalidProvider); const result = await discoverWorkflows(testDir, { loadDefaults: false }); - const workflows = result.workflows.map(ws => ws.workflow); - // Unknown providers are accepted (validated against registry at execution time) - expect(workflows).toHaveLength(1); - expect(workflows[0].provider).toBe('invalid'); + expect(result.workflows).toHaveLength(0); + expect(result.errors).toHaveLength(1); + expect(result.errors[0].errorType).toBe('validation_error'); + expect(result.errors[0].error).toContain("Unknown provider 'claud'"); }); - it('should reject claude model with codex provider at load time', async () => { + it('should accept any model string with a known provider (SDK validates at run time)', async () => { + // Whatever the user wrote in `model:` passes through to the SDK; the + // SDK is the source of truth for what model strings exist. Errors + // surface at run time, not load time. const workflowDir = join(testDir, '.archon', 'workflows'); await mkdir(workflowDir, { recursive: true }); - const invalidYaml = `name: invalid-model -description: Invalid model/provider pairing -provider: codex -model: sonnet + const yaml = `name: any-model +description: Any model string with a known provider +provider: claude +model: claude-opus-4-7[1m] nodes: - id: test command: test `; - await writeFile(join(workflowDir, 'invalid.yaml'), invalidYaml); + await writeFile(join(workflowDir, 'any-model.yaml'), yaml); const result = await discoverWorkflows(testDir, { loadDefaults: false }); + const workflows = result.workflows.map(ws => ws.workflow); - expect(result.workflows).toHaveLength(0); - expect(result.errors).toHaveLength(1); - expect(result.errors[0].errorType).toBe('validation_error'); - expect(result.errors[0].error).toContain('not compatible'); + expect(result.errors).toHaveLength(0); + expect(workflows).toHaveLength(1); + expect(workflows[0].provider).toBe('claude'); + expect(workflows[0].model).toBe('claude-opus-4-7[1m]'); }); it('should parse codex options fields', async () => { diff --git a/packages/workflows/src/loader.ts b/packages/workflows/src/loader.ts index 8b607da74d..3109c680d7 100644 --- a/packages/workflows/src/loader.ts +++ b/packages/workflows/src/loader.ts @@ -4,7 +4,7 @@ import type { WorkflowDefinition, WorkflowLoadError, DagNode, WorkflowNodeHooks } from './schemas'; import { isLoopNode, isApprovalNode, isCancelNode, isScriptNode } from './schemas'; import { createLogger } from '@archon/paths'; -import { isModelCompatible } from './model-validation'; +import { isRegisteredProvider, getRegisteredProviders } from '@archon/providers'; import { dagNodeSchema, BASH_NODE_AI_FIELDS, @@ -277,17 +277,36 @@ export function parseWorkflow(content: string, filename: string): ParseResult { typeof raw.provider === 'string' && raw.provider.length > 0 ? raw.provider : undefined; const model = typeof raw.model === 'string' ? raw.model : undefined; - // Validate model/provider compatibility at workflow level - if (provider && model && !isModelCompatible(provider, model)) { + // Validate provider identity at load time, both at the workflow level and + // per node. Model strings are NOT validated — they pass through to the SDK + // at run time, which is the source of truth for what model names exist + // (vendor SDKs ship new models faster than Archon can update). + if (provider && !isRegisteredProvider(provider)) { return { workflow: null, error: { filename, - error: `Model "${model}" is not compatible with provider "${provider}"`, + error: `Unknown provider '${provider}'. Registered: ${getRegisteredProviders() + .map(p => p.id) + .join(', ')}`, errorType: 'validation_error', }, }; } + for (const node of dagNodes) { + if (node.provider !== undefined && !isRegisteredProvider(node.provider)) { + return { + workflow: null, + error: { + filename, + error: `Node '${node.id}': unknown provider '${node.provider}'. Registered: ${getRegisteredProviders() + .map(p => p.id) + .join(', ')}`, + errorType: 'validation_error', + }, + }; + } + } // Validate modelReasoningEffort — warn and ignore invalid values (preserve original behavior) const modelReasoningEffortResult = modelReasoningEffortSchema.safeParse( diff --git a/packages/workflows/src/model-validation.test.ts b/packages/workflows/src/model-validation.test.ts deleted file mode 100644 index 2247fd7c05..0000000000 --- a/packages/workflows/src/model-validation.test.ts +++ /dev/null @@ -1,80 +0,0 @@ -import { describe, it, expect, beforeAll } from 'bun:test'; -import { registerBuiltinProviders, clearRegistry } from '@archon/providers'; -import { isModelCompatible, inferProviderFromModel } from './model-validation'; - -// Bootstrap registry once for all tests (idempotent) -beforeAll(() => { - clearRegistry(); - registerBuiltinProviders(); -}); - -describe('model-validation (registry-driven)', () => { - describe('isModelCompatible', () => { - it('should accept any model when model is undefined', () => { - expect(isModelCompatible('claude')).toBe(true); - expect(isModelCompatible('codex')).toBe(true); - }); - - it('should accept Claude models with claude provider', () => { - expect(isModelCompatible('claude', 'sonnet')).toBe(true); - expect(isModelCompatible('claude', 'opus')).toBe(true); - expect(isModelCompatible('claude', 'haiku')).toBe(true); - expect(isModelCompatible('claude', 'inherit')).toBe(true); - expect(isModelCompatible('claude', 'claude-opus-4-6')).toBe(true); - }); - - it('should reject non-Claude models with claude provider', () => { - expect(isModelCompatible('claude', 'gpt-5.3-codex')).toBe(false); - expect(isModelCompatible('claude', 'gpt-4')).toBe(false); - }); - - it('should accept Codex/OpenAI models with codex provider', () => { - expect(isModelCompatible('codex', 'gpt-5.3-codex')).toBe(true); - expect(isModelCompatible('codex', 'gpt-5.2-codex')).toBe(true); - expect(isModelCompatible('codex', 'gpt-4')).toBe(true); - expect(isModelCompatible('codex', 'o1-mini')).toBe(true); - }); - - it('should reject Claude models with codex provider', () => { - expect(isModelCompatible('codex', 'sonnet')).toBe(false); - expect(isModelCompatible('codex', 'opus')).toBe(false); - expect(isModelCompatible('codex', 'claude-opus-4-6')).toBe(false); - }); - - it('should handle empty string model', () => { - // Empty string is falsy, so treated as "no model specified" - expect(isModelCompatible('claude', '')).toBe(true); - expect(isModelCompatible('codex', '')).toBe(true); - }); - - it('should throw on unknown providers (fail-fast)', () => { - expect(() => isModelCompatible('my-llm', 'any-model')).toThrow(/Unknown provider 'my-llm'/); - }); - }); - - describe('inferProviderFromModel', () => { - it('should return default when model is undefined', () => { - expect(inferProviderFromModel(undefined, 'claude')).toBe('claude'); - expect(inferProviderFromModel(undefined, 'codex')).toBe('codex'); - }); - - it('should return default when model is empty string', () => { - expect(inferProviderFromModel('', 'claude')).toBe('claude'); - expect(inferProviderFromModel('', 'codex')).toBe('codex'); - }); - - it('should infer claude from Claude model names', () => { - expect(inferProviderFromModel('sonnet', 'codex')).toBe('claude'); - expect(inferProviderFromModel('opus', 'codex')).toBe('claude'); - expect(inferProviderFromModel('haiku', 'codex')).toBe('claude'); - expect(inferProviderFromModel('inherit', 'codex')).toBe('claude'); - expect(inferProviderFromModel('claude-opus-4-6', 'codex')).toBe('claude'); - }); - - it('should infer codex from non-Claude model names', () => { - expect(inferProviderFromModel('gpt-5.3-codex', 'claude')).toBe('codex'); - expect(inferProviderFromModel('gpt-4', 'claude')).toBe('codex'); - expect(inferProviderFromModel('o1-mini', 'claude')).toBe('codex'); - }); - }); -}); diff --git a/packages/workflows/src/model-validation.ts b/packages/workflows/src/model-validation.ts deleted file mode 100644 index 0140defce5..0000000000 --- a/packages/workflows/src/model-validation.ts +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Registry-driven model validation. - * - * All provider/model compatibility checks delegate to ProviderRegistration entries - * in the provider registry. No hardcoded provider knowledge lives here. - */ -import { getRegistration, getRegisteredProviders, isRegisteredProvider } from '@archon/providers'; - -/** - * Infer provider from a model name by iterating BUILT-IN registrations only. - * Community providers must be selected explicitly via `provider:` in YAML. - * - * Returns undefined if no built-in provider matches (caller falls back to config default). - */ -export function inferProviderFromModel(model: string | undefined, defaultProvider: string): string { - if (!model) return defaultProvider; - - for (const reg of getRegisteredProviders()) { - if (reg.builtIn && reg.isModelCompatible(model)) return reg.id; - } - - // No built-in matched — fall back to default - return defaultProvider; -} - -/** - * Check if a model is compatible with a provider using the registry. - * Returns true if no model is specified (any provider accepts no-model). - * Throws on unknown providers (fail-fast — matches getProviderCapabilities behavior). - */ -export function isModelCompatible(provider: string, model?: string): boolean { - if (!model) return true; - if (!isRegisteredProvider(provider)) { - throw new Error( - `Unknown provider '${provider}'. Registered providers: ${getRegisteredProviders() - .map(p => p.id) - .join(', ')}` - ); - } - return getRegistration(provider).isModelCompatible(model); -} diff --git a/packages/workflows/src/schemas/dag-node.ts b/packages/workflows/src/schemas/dag-node.ts index d41c6270c3..794f14ea78 100644 --- a/packages/workflows/src/schemas/dag-node.ts +++ b/packages/workflows/src/schemas/dag-node.ts @@ -15,7 +15,6 @@ import { stepRetryConfigSchema } from './retry'; import { loopNodeConfigSchema } from './loop'; import { workflowNodeHooksSchema } from './hooks'; import { isValidCommandName } from '../command-validation'; -import { isModelCompatible } from '../model-validation'; // --------------------------------------------------------------------------- // TriggerRule @@ -365,10 +364,13 @@ export const LOOP_NODE_AI_FIELDS: readonly string[] = BASH_NODE_AI_FIELDS.filter * - Non-empty id * - Exactly one of command/prompt/bash/loop (mutual exclusivity) * - command name validity (via isValidCommandName) - * - Model/provider compatibility (via isModelCompatible) * - idle_timeout must be a finite positive number * - retry not allowed on loop nodes * - timeout on bash must be positive + * + * Note: provider identity is validated in loader.ts (workflow-level) and + * dag-executor.ts (node-level). Model strings are passed through to the SDK + * unchanged — the SDK is the source of truth for what model names exist. */ export const dagNodeSchema = dagNodeBaseSchema .extend({ @@ -522,24 +524,6 @@ export const dagNodeSchema = dagNodeBaseSchema path: ['idle_timeout'], }); } - - // Provider/model compatibility (AI nodes only) - if (!hasBash && !hasLoop && !hasScript && data.provider && data.model) { - try { - if (!isModelCompatible(data.provider, data.model)) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - message: `model "${data.model}" is not compatible with provider "${data.provider}"`, - }); - } - } catch (e) { - // isModelCompatible throws on unknown providers — surface as a validation issue - ctx.addIssue({ - code: z.ZodIssueCode.custom, - message: (e as Error).message, - }); - } - } }) .transform((data): DagNode => { const id = data.id.trim();