From dfbb4acd43fb980e741f7fbaffc52b8513a64454 Mon Sep 17 00:00:00 2001 From: Cole Medin Date: Sat, 18 Apr 2026 12:19:43 -0500 Subject: [PATCH] fix(workflows): fail loudly on SDK isError results (#1208) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, `dag-executor` only failed nodes/iterations when the SDK returned an `error_max_budget_usd` result. Every other `isError: true` subtype — including `error_during_execution` — was silently `break`ed out of the stream with whatever partial output had accumulated, letting failed runs masquerade as successful ones with empty output. This is the most likely explanation for the "5-second crash" symptom in #1208: iterations finish instantly with empty text, the loop keeps going, and only the `claude.result_is_error` log tips the user off. Changes: - Capture the SDK's `errors: string[]` detail on result messages (previously discarded) and surface it through `MessageChunk.errors`. - Log `errors`, `stopReason` alongside `errorSubtype` in `claude.result_is_error` so users can see what actually failed. - Throw from both the general node path and the loop iteration path on any `isError: true` result, including the subtype and SDK errors detail in the thrown message. Note: this does not implement auto-retry. See PR comments on #1121 and the analysis on #1208 — a retry-with-fresh-session approach for loop iterations is not obviously correct until we see what `error_during_execution` actually carries in the reporter's env. This change is the observability + fail-loud step that has to come first so that signal is no longer silent. Co-Authored-By: Claude Opus 4.7 --- packages/providers/src/claude/provider.ts | 10 +- packages/providers/src/types.ts | 2 + packages/workflows/src/dag-executor.test.ts | 116 ++++++++++++++++++++ packages/workflows/src/dag-executor.ts | 41 +++++++ 4 files changed, 168 insertions(+), 1 deletion(-) diff --git a/packages/providers/src/claude/provider.ts b/packages/providers/src/claude/provider.ts index 26935bf373..0821319317 100644 --- a/packages/providers/src/claude/provider.ts +++ b/packages/providers/src/claude/provider.ts @@ -740,6 +740,7 @@ async function* streamClaudeMessages( total_cost_usd?: number; stop_reason?: string | null; num_turns?: number; + errors?: string[]; model_usage?: Record< string, { @@ -751,9 +752,15 @@ async function* streamClaudeMessages( >; }; const tokens = normalizeClaudeUsage(resultMsg.usage); + const sdkErrors = Array.isArray(resultMsg.errors) ? resultMsg.errors : undefined; if (resultMsg.is_error) { getLog().error( - { sessionId: resultMsg.session_id, errorSubtype: resultMsg.subtype }, + { + sessionId: resultMsg.session_id, + errorSubtype: resultMsg.subtype, + stopReason: resultMsg.stop_reason, + errors: sdkErrors, + }, 'claude.result_is_error' ); } @@ -765,6 +772,7 @@ async function* streamClaudeMessages( ? { structuredOutput: resultMsg.structured_output } : {}), ...(resultMsg.is_error ? { isError: true, errorSubtype: resultMsg.subtype } : {}), + ...(resultMsg.is_error && sdkErrors?.length ? { errors: sdkErrors } : {}), ...(resultMsg.total_cost_usd !== undefined ? { cost: resultMsg.total_cost_usd } : {}), ...(resultMsg.stop_reason != null ? { stopReason: resultMsg.stop_reason } : {}), ...(resultMsg.num_turns !== undefined ? { numTurns: resultMsg.num_turns } : {}), diff --git a/packages/providers/src/types.ts b/packages/providers/src/types.ts index 63444ac112..9822abd261 100644 --- a/packages/providers/src/types.ts +++ b/packages/providers/src/types.ts @@ -72,6 +72,8 @@ export type MessageChunk = structuredOutput?: unknown; isError?: boolean; errorSubtype?: string; + /** SDK-provided error detail strings. Populated when isError is true. */ + errors?: string[]; cost?: number; stopReason?: string; numTurns?: number; diff --git a/packages/workflows/src/dag-executor.test.ts b/packages/workflows/src/dag-executor.test.ts index c5822197e5..0c745b39e5 100644 --- a/packages/workflows/src/dag-executor.test.ts +++ b/packages/workflows/src/dag-executor.test.ts @@ -3594,6 +3594,70 @@ describe('executeDagWorkflow -- resume with priorCompletedNodes', () => { expect(sessionArg).toBe('loop-session-1'); }); + it('loop iteration fails loudly when SDK returns error_during_execution', async () => { + // Regression test for #1208: previously the loop silently broke on isError + // results and kept iterating with empty output, producing "5-second crashes" + // that masqueraded as successful iterations. + mockSendQueryDag.mockImplementation(function* () { + yield { + type: 'result', + isError: true, + errorSubtype: 'error_during_execution', + errors: ['Subprocess crashed mid-turn'], + sessionId: 'bad-session', + }; + }); + + const store = createMockStore(); + const mockDeps = createMockDeps(store); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'loop-iteration-err', + nodes: [ + { + id: 'work', + loop: { + prompt: 'Do the work. Say DONE.', + until: 'DONE', + max_iterations: 5, + }, + }, + ], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // Should fail after one iteration rather than burning through max_iterations + expect(mockSendQueryDag.mock.calls.length).toBe(1); + // The loop_iteration_failed event should carry the subtype and SDK errors detail + const eventCalls = (store.createWorkflowEvent as ReturnType).mock.calls; + const iterFailedEvents = eventCalls.filter( + (call: unknown[]) => + (call[0] as Record).event_type === 'loop_iteration_failed' + ); + expect(iterFailedEvents.length).toBeGreaterThan(0); + const failedData = (iterFailedEvents[0][0] as Record).data as Record< + string, + unknown + >; + expect(failedData.error).toContain('error_during_execution'); + expect(failedData.error).toContain('Subprocess crashed mid-turn'); + }); + it('non-interactive loop is unaffected (no pause)', async () => { mockSendQueryDag.mockImplementation(function* () { yield { type: 'assistant', content: 'Still working...' }; @@ -4617,6 +4681,58 @@ describe('executeDagWorkflow -- Claude SDK advanced options', () => { expect(capMessage).toBeDefined(); }); + it('fails node when SDK returns error_during_execution result', async () => { + // Regression test for #1208: previously we only failed on error_max_budget_usd + // and silently broke on all other isError subtypes, letting failed nodes + // masquerade as successes with empty output. + mockSendQueryDag.mockImplementation(function* () { + yield { + type: 'result', + isError: true, + errorSubtype: 'error_during_execution', + errors: ['Tool call failed: permission denied'], + sessionId: 'sid-err', + }; + }); + + const store = createMockStore(); + const mockDeps = createMockDeps(store); + const platform = createMockPlatform(); + const workflowRun = makeWorkflowRun(); + + await executeDagWorkflow( + mockDeps, + platform, + 'conv-dag', + testDir, + { + name: 'err-exec-test', + nodes: [{ id: 'step1', command: 'my-cmd' }], + }, + workflowRun, + 'claude', + undefined, + join(testDir, 'artifacts'), + join(testDir, 'logs'), + 'main', + 'docs/', + minimalConfig + ); + + // The node_failed event should carry the subtype and SDK errors detail + const eventCalls = (store.createWorkflowEvent as ReturnType).mock.calls; + const nodeFailedEvents = eventCalls.filter( + (call: unknown[]) => (call[0] as Record).event_type === 'node_failed' + ); + expect(nodeFailedEvents.length).toBeGreaterThan(0); + const failedData = (nodeFailedEvents[0][0] as Record).data as Record< + string, + unknown + >; + expect(failedData.error).toContain('error_during_execution'); + expect(failedData.error).toContain('permission denied'); + }); + it('forwards workflow-level effort to node when no per-node override', async () => { const mockDeps = createMockDeps(); const platform = createMockPlatform(); diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index 3680af28b5..a1dd05564b 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -764,6 +764,25 @@ async function executeNodeInternal( `Node '${node.id}' exceeded cost cap${cap !== undefined ? ` of $${cap.toFixed(2)}` : ''}.` ); } + // Fail loudly on any other SDK error result. Previously we broke out of + // the stream silently, producing empty/partial output without signaling + // failure — which let failed iterations masquerade as successes (#1208). + if (msg.isError) { + const subtype = msg.errorSubtype ?? 'unknown'; + const errorsDetail = msg.errors?.length ? ` — ${msg.errors.join('; ')}` : ''; + getLog().error( + { + nodeId: node.id, + errorSubtype: subtype, + errors: msg.errors, + sessionId: msg.sessionId, + stopReason: msg.stopReason, + durationMs: Date.now() - nodeStartTime, + }, + 'dag.node_sdk_error_result' + ); + throw new Error(`Node '${node.id}' failed: SDK returned ${subtype}${errorsDetail}`); + } break; // Result is the "I'm done" signal — don't wait for subprocess to exit } else if (msg.type === 'system' && msg.content) { // Forward provider warnings (⚠️) and MCP connection failures to the user. @@ -1626,6 +1645,28 @@ async function executeLoopNode( if (msg.numTurns !== undefined) { loopTotalNumTurns = (loopTotalNumTurns ?? 0) + msg.numTurns; } + // Fail the iteration loudly on SDK error results. Previously we broke + // silently, producing empty output and continuing to the next iteration — + // which made `error_during_execution` on resumed interactive loops look + // like a "5-second crash" that kept burning iterations (#1208). + if (msg.isError) { + const subtype = msg.errorSubtype ?? 'unknown'; + const errorsDetail = msg.errors?.length ? ` — ${msg.errors.join('; ')}` : ''; + getLog().error( + { + nodeId: node.id, + iteration: i, + errorSubtype: subtype, + errors: msg.errors, + sessionId: msg.sessionId, + stopReason: msg.stopReason, + }, + 'loop_node.iteration_sdk_error' + ); + throw new Error( + `Loop '${node.id}' iteration ${String(i)} failed: SDK returned ${subtype}${errorsDetail}` + ); + } break; // Result is the "I'm done" signal — don't wait for subprocess to exit } else if (msg.type === 'tool' && msg.toolName) { const now = Date.now();