diff --git a/CHANGELOG.md b/CHANGELOG.md index 76259ddb8f..929641a35b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - **Claude provider crashed in dev mode with `error: unknown option '--no-env-file'`.** The Claude Agent SDK switched from shipping `cli.js` to per-platform native binaries (via optional deps) in the 0.2.x series. Archon's `shouldPassNoEnvFile` predicate kept emitting the Bun-only `--no-env-file` flag in dev mode (when the SDK resolves its bundled binary), which the native binary rejects. Tightened the predicate to only emit the flag for explicitly-configured Bun-runnable JS entry points (`.js`/`.mjs`/`.cjs`). Target-repo `.env` isolation is unchanged — `stripCwdEnv()` at process boot remains the primary guard, and the native Claude binary does not auto-load `.env` from its cwd. (#1461) +- **Pi structured-output now tolerates reasoning-model prose preamble.** `tryParseStructuredOutput` previously returned `undefined` whenever the assistant text wasn't pure JSON, even when the JSON object was clearly emitted at the end of a "Let me evaluate..." preamble. Reasoning models — observed on Minimax M2.7 — routinely "think out loud" before emitting structured output despite explicit JSON-only prompts. The parser now falls back to a forward-scan from the first `{` when the clean parse fails, recovering the structured output without changing the success path for fully compliant models. (#1440) ## [0.3.9] - 2026-04-22 diff --git a/packages/docs-web/src/content/docs/getting-started/ai-assistants.md b/packages/docs-web/src/content/docs/getting-started/ai-assistants.md index 7a65b97adf..08993fc8a2 100644 --- a/packages/docs-web/src/content/docs/getting-started/ai-assistants.md +++ b/packages/docs-web/src/content/docs/getting-started/ai-assistants.md @@ -375,7 +375,7 @@ nodes: | Codebase env vars (`envInjection`) | ✅ | `.archon/config.yaml` `env:` section | | MCP servers | ❌ | Pi rejects MCP by design | | Claude-SDK hooks | ❌ | Claude-specific format | -| Structured output | ✅ (best-effort) | `output_format:` — schema is appended to the prompt and JSON is parsed out of the assistant text (bare or ```json```-fenced); degrades cleanly when the model emits prose. Not SDK-enforced like Claude/Codex. | +| Structured output | ✅ (best-effort) | `output_format:` — schema is appended to the prompt and JSON is parsed out of the assistant text. Handles bare JSON, ```json```-fenced, and reasoning-model prose preambles like `Let me evaluate... {...}` (Minimax M2.x pattern). Trailing-text-interleaved cases still degrade cleanly to the missing-structured-output warning. Not SDK-enforced like Claude/Codex. | | Cost limits (`maxBudgetUsd`) | ❌ | tracked in result chunk, not enforced | | Fallback model | ❌ | not native in Pi | | Sandbox | ❌ | not native in Pi | diff --git a/packages/providers/src/community/pi/event-bridge.test.ts b/packages/providers/src/community/pi/event-bridge.test.ts index cc176f7943..d0bf9a35b7 100644 --- a/packages/providers/src/community/pi/event-bridge.test.ts +++ b/packages/providers/src/community/pi/event-bridge.test.ts @@ -401,15 +401,63 @@ describe('tryParseStructuredOutput', () => { expect(tryParseStructuredOutput(' ')).toBeUndefined(); }); - test('returns undefined when model wraps JSON in prose', () => { - // Realistic failure mode — model ignores "JSON only" instruction and adds - // explanatory text before/after. Caller degrades via the executor's - // missing-structured-output warning path. + test('returns undefined when model wraps JSON in prose with trailing text', () => { + // Caller degrades via the executor's missing-structured-output warning. + // Forward scan starts at the JSON object but JSON.parse rejects the + // trailing prose, so we fail closed rather than guess. const prose = 'Here is the JSON you requested:\n{"ok":true}\nLet me know if you need anything else.'; expect(tryParseStructuredOutput(prose)).toBeUndefined(); }); + test('parses preamble + trailing JSON (Minimax M2.7 reasoning-model pattern)', () => { + // Real-world failure mode observed on Minimax M2.7: the model "thinks out + // loud" before emitting the JSON-only output we asked for. Forward scan + // from the first `{` (preamble has no braces) recovers the payload. + const minimax = + 'Now I have all the inputs. Let me evaluate the three gates:\n\n' + + '**Gate A — Direction alignment**: aligned\n' + + '**Gate B — Scope**: focused\n' + + '**Gate C — Template**: partial\n\n' + + '{"verdict":"review","direction_alignment":"aligned","scope_assessment":"focused","template_quality":"partial"}'; + expect(tryParseStructuredOutput(minimax)).toEqual({ + verdict: 'review', + direction_alignment: 'aligned', + scope_assessment: 'focused', + template_quality: 'partial', + }); + }); + + test('parses preamble + trailing nested JSON via forward scan', () => { + // Forward scan lands on the outer `{` and JSON.parse handles the nesting. + const nested = + 'Reasoning before the JSON.\n' + '{"verdict":"review","details":{"foo":1,"bar":[1,2,3]}}'; + expect(tryParseStructuredOutput(nested)).toEqual({ + verdict: 'review', + details: { foo: 1, bar: [1, 2, 3] }, + }); + }); + + test('parses preamble + JSON containing `{` inside a string value', () => { + // Forward scan lands on the JSON object's outer `{`; JSON.parse handles + // the in-string `{`. Preamble must not itself contain `{`, otherwise the + // forward scan would start there and fail. + const tricky = + 'Brief preamble with no extra braces.\n' + '{"key":"value with { inside","ok":true}'; + expect(tryParseStructuredOutput(tricky)).toEqual({ + key: 'value with { inside', + ok: true, + }); + }); + + test('returns undefined when prose contains a brace-bearing example after the real JSON', () => { + // Conservative-failure regression. A backward-scan strategy would silently + // return the trailing example; forward scan starts at the real payload, + // JSON.parse rejects the trailing prose+example, and we fail closed. + const withExample = '{"actual":"value"}\nFor example: {"verdict":"review"}'; + expect(tryParseStructuredOutput(withExample)).toBeUndefined(); + }); + test('returns undefined on malformed JSON', () => { expect(tryParseStructuredOutput('{not valid}')).toBeUndefined(); expect(tryParseStructuredOutput('{"unclosed":')).toBeUndefined(); diff --git a/packages/providers/src/community/pi/event-bridge.ts b/packages/providers/src/community/pi/event-bridge.ts index aa5363ce86..4adde52809 100644 --- a/packages/providers/src/community/pi/event-bridge.ts +++ b/packages/providers/src/community/pi/event-bridge.ts @@ -153,10 +153,14 @@ export function buildResultChunk(messages: readonly unknown[]): MessageChunk { /** * Attempt to parse a Pi assistant transcript as the structured-output JSON - * requested via `outputFormat`. Handles two common model failure modes: + * requested via `outputFormat`. Handles three common model failure modes: * - trailing/leading whitespace (always stripped) * - markdown code fences (```json ... ``` or bare ``` ... ```) that models * emit despite the "no code fences" instruction in the prompt + * - prose preamble followed by a single trailing JSON object — pattern + * observed on Minimax M2.7 ("Now I have all the inputs. Let me evaluate + * the three gates: ... {...}"). Reasoning models tend to "think out loud" + * before emitting structured output despite explicit JSON-only prompts. * * Returns the parsed value on success, `undefined` on any failure. Callers * treat `undefined` as "structured output unavailable" and degrade via the @@ -171,11 +175,30 @@ export function tryParseStructuredOutput(text: string): unknown { .replace(/^```(?:json)?\s*\n?/i, '') .replace(/\n?\s*```\s*$/, '') .trim(); + + // Tier 1: clean parse — fast path for fully compliant outputs. try { return JSON.parse(cleaned); } catch { - return undefined; + // fall through + } + + // Tier 2: scan forward to the FIRST `{` and parse from there. Recovers the + // preamble-then-JSON pattern reasoning models emit. A backward scan from + // the last `{` was considered but rejected: it silently returns the wrong + // object when the prose contains a brace-bearing example after the real + // payload (e.g. `{"actual":1}\nFor example: {"x":2}` would yield `{x:2}`), + // breaking the conservative-failure contract callers rely on. + const firstBrace = cleaned.indexOf('{'); + if (firstBrace > 0) { + try { + return JSON.parse(cleaned.slice(firstBrace)); + } catch { + // fall through + } } + + return undefined; } /**