From acc0851005ed0d48cd41a2624f6c38435f3c0040 Mon Sep 17 00:00:00 2001 From: Rasmus Widing Date: Mon, 27 Apr 2026 18:54:09 +0300 Subject: [PATCH 1/3] fix(providers/pi): tolerate prose preamble in structured-output responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pi has no SDK-level JSON-mode parameter and Minimax's Anthropic-compat proxy doesn't expose response_format support, so reasoning models like Minimax M2.7 routinely "think out loud" before emitting the JSON we asked for, e.g.: "Now I have all the inputs. Let me evaluate the three gates: **Gate A — Direction alignment**: ... {\"verdict\":\"review\",\"direction_alignment\":\"aligned\",...}" The previous tryParseStructuredOutput stripped fences then JSON.parse'd the whole string, which fails the moment a single character of prose appears before the {. The maintainer-review-pr workflow's gate node was hitting this on ~70% of runs, propagating to 7-of-10 silent failures in a sequential review batch this afternoon. Add two preamble-tolerant fallback tiers: Tier 1 (existing): clean JSON.parse — fast path for compliant models Tier 2 (new): scan backward to last `{`, parse from there — flat JSON with reasoning preamble (Minimax M2.7 case) Tier 3 (new): scan forward to first `{`, parse from there — nested JSON with preamble (Tier 2 lands inside a child object and fails) The existing "trailing-prose-too" failure case (preamble + JSON + postamble) still returns undefined — handling it would require brace- depth tracking and isn't worth the cost. The two new tiers cover the failure mode actually observed in production. Tested against the real Minimax preamble pattern and a synthetic preamble + nested-JSON case. 39 event-bridge tests pass. No SDK changes, no Pi extensions, no Minimax API dependencies. Pure Archon-side parser hardening — backward-compatible and benefits every verbose-preamble model routed through Pi, not just Minimax. --- .../src/community/pi/event-bridge.test.ts | 37 +++++++++++++++++-- .../src/community/pi/event-bridge.ts | 37 ++++++++++++++++++- 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/packages/providers/src/community/pi/event-bridge.test.ts b/packages/providers/src/community/pi/event-bridge.test.ts index cc176f7943..2b18f3c379 100644 --- a/packages/providers/src/community/pi/event-bridge.test.ts +++ b/packages/providers/src/community/pi/event-bridge.test.ts @@ -401,15 +401,44 @@ describe('tryParseStructuredOutput', () => { expect(tryParseStructuredOutput(' ')).toBeUndefined(); }); - test('returns undefined when model wraps JSON in prose', () => { - // Realistic failure mode — model ignores "JSON only" instruction and adds - // explanatory text before/after. Caller degrades via the executor's - // missing-structured-output warning path. + test('returns undefined when model wraps JSON in prose with trailing text', () => { + // Caller degrades via the executor's missing-structured-output warning. + // Trailing prose after the JSON breaks every tier — the regex would have + // to track brace depth to handle this case, which isn't worth the cost. const prose = 'Here is the JSON you requested:\n{"ok":true}\nLet me know if you need anything else.'; expect(tryParseStructuredOutput(prose)).toBeUndefined(); }); + test('parses preamble + trailing flat JSON (Minimax M2.7 reasoning-model pattern)', () => { + // Real-world failure mode observed on Minimax M2.7: the model "thinks out + // loud" before emitting the JSON-only output we asked for. Tier 2's + // backward scan from the last `{` recovers the structured output. + const minimax = + 'Now I have all the inputs. Let me evaluate the three gates:\n\n' + + '**Gate A — Direction alignment**: aligned\n' + + '**Gate B — Scope**: focused\n' + + '**Gate C — Template**: partial\n\n' + + '{"verdict":"review","direction_alignment":"aligned","scope_assessment":"focused","template_quality":"partial"}'; + expect(tryParseStructuredOutput(minimax)).toEqual({ + verdict: 'review', + direction_alignment: 'aligned', + scope_assessment: 'focused', + template_quality: 'partial', + }); + }); + + test('parses preamble + trailing nested JSON via tier-3 forward scan', () => { + // Tier 2's backward scan lands inside the nested object and fails; + // Tier 3's forward scan recovers by parsing from the outer `{`. + const nested = + 'Reasoning before the JSON.\n' + '{"verdict":"review","details":{"foo":1,"bar":[1,2,3]}}'; + expect(tryParseStructuredOutput(nested)).toEqual({ + verdict: 'review', + details: { foo: 1, bar: [1, 2, 3] }, + }); + }); + test('returns undefined on malformed JSON', () => { expect(tryParseStructuredOutput('{not valid}')).toBeUndefined(); expect(tryParseStructuredOutput('{"unclosed":')).toBeUndefined(); diff --git a/packages/providers/src/community/pi/event-bridge.ts b/packages/providers/src/community/pi/event-bridge.ts index aa5363ce86..a6a3d0c328 100644 --- a/packages/providers/src/community/pi/event-bridge.ts +++ b/packages/providers/src/community/pi/event-bridge.ts @@ -153,10 +153,14 @@ export function buildResultChunk(messages: readonly unknown[]): MessageChunk { /** * Attempt to parse a Pi assistant transcript as the structured-output JSON - * requested via `outputFormat`. Handles two common model failure modes: + * requested via `outputFormat`. Handles three common model failure modes: * - trailing/leading whitespace (always stripped) * - markdown code fences (```json ... ``` or bare ``` ... ```) that models * emit despite the "no code fences" instruction in the prompt + * - prose preamble followed by a single trailing JSON object — pattern + * observed on Minimax M2.7 ("Now I have all the inputs. Let me evaluate + * the three gates: ... {...}"). Reasoning models tend to "think out loud" + * before emitting structured output despite explicit JSON-only prompts. * * Returns the parsed value on success, `undefined` on any failure. Callers * treat `undefined` as "structured output unavailable" and degrade via the @@ -171,11 +175,40 @@ export function tryParseStructuredOutput(text: string): unknown { .replace(/^```(?:json)?\s*\n?/i, '') .replace(/\n?\s*```\s*$/, '') .trim(); + + // Tier 1: clean parse — fast path for compliant models (Claude, GPT, Gemini, + // most Anthropic-API responses). try { return JSON.parse(cleaned); } catch { - return undefined; + // Fall through to preamble-tolerant tiers. + } + + // Tier 2: scan backward to the LAST `{` and parse from there. Catches the + // common reasoning-model preamble pattern ("Let me evaluate... {flat JSON}"). + // For nested JSON in the response, the last `{` lands inside a child object + // and parsing fails — Tier 3 picks it up. + const lastBrace = cleaned.lastIndexOf('{'); + if (lastBrace > 0) { + try { + return JSON.parse(cleaned.slice(lastBrace)); + } catch { + // Fall through. + } } + + // Tier 3: scan forward to the FIRST `{` and parse from there. Catches the + // preamble + nested JSON case missed by Tier 2 (last `{` was inside a child). + const firstBrace = cleaned.indexOf('{'); + if (firstBrace > 0 && firstBrace !== lastBrace) { + try { + return JSON.parse(cleaned.slice(firstBrace)); + } catch { + // Final fall through to undefined. + } + } + + return undefined; } /** From ac958c742b86e13a5b5a0afebb065b31f14ab933 Mon Sep 17 00:00:00 2001 From: Rasmus Widing Date: Mon, 27 Apr 2026 19:14:26 +0300 Subject: [PATCH 2/3] fix(providers/pi): address CodeRabbit review on #1440 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop the redundant `firstBrace !== lastBrace` guard on Tier 3. When the only `{` past position 0 is the same one Tier 2 just tried, Tier 3 will re-run JSON.parse on the same input and fail identically — one redundant call accepted for simpler control flow. (CodeRabbit cleanup) - Tighten the Tier 1 comment to drop the model name list (will date) and tighten the Tier 2 comment to scope its claim to "preamble + flat JSON" rather than overgeneralized "nested JSON". (CodeRabbit comment-quality cleanup) - Replace the bare `// Fall through...` comments with `// fall through` inside the catch blocks. Necessary because eslint's `no-empty` rule flags bare `catch {}` blocks. (lint compliance) - Add a regression test for `{` inside a string value that pins the Tier 2 → Tier 3 cascade composition: lastIndexOf lands inside the string brace, JSON.parse rejects the resulting `{ inside","ok":true}` fragment, Tier 3 forward-scans to the JSON object's outer `{` and parses cleanly. Note: the preamble must not itself contain `{`, otherwise Tier 3 lands on it instead of on the JSON object — covered in the test comment so the constraint isn't lost. (CodeRabbit test coverage) - Update `packages/docs-web/src/content/docs/getting-started/ai-assistants.md:378` — the previous "degrades cleanly when the model emits prose" claim is now partially false (preamble-only patterns are recovered). New text enumerates the three handled forms (bare, fenced, prose-preamble) and clarifies the trailing-text-interleaved case still degrades. (CodeRabbit docs-impact) - Add a CHANGELOG entry under `## [Unreleased] / Fixed`. CodeRabbit's "critical" suffix-check suggestion (`cleaned.slice(brace) .trimEnd().endsWith('}')`) is not applied. JSON.parse already rejects trailing non-whitespace, so any successful parse from a `{`-prefixed slice already ends with `}` after trim — the check is tautological in all cases the function actually reaches it. The actual concern (a self-contained `{...}` fragment in preamble that happens to be valid JSON, with no real answer in the response) wouldn't be caught by the suffix check either, since the fragment IS a valid JSON object that ends with `}`. Real defense against that case would require schema validation, which lives at the call site (executor's `structured_output_missing` warning path), not in this parser. Tests: 40 pass / 0 fail. Lint, format, type-check all clean. --- CHANGELOG.md | 1 + .../docs/getting-started/ai-assistants.md | 2 +- .../src/community/pi/event-bridge.test.ts | 14 ++++++++++ .../src/community/pi/event-bridge.ts | 26 +++++++++++-------- 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76259ddb8f..929641a35b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - **Claude provider crashed in dev mode with `error: unknown option '--no-env-file'`.** The Claude Agent SDK switched from shipping `cli.js` to per-platform native binaries (via optional deps) in the 0.2.x series. Archon's `shouldPassNoEnvFile` predicate kept emitting the Bun-only `--no-env-file` flag in dev mode (when the SDK resolves its bundled binary), which the native binary rejects. Tightened the predicate to only emit the flag for explicitly-configured Bun-runnable JS entry points (`.js`/`.mjs`/`.cjs`). Target-repo `.env` isolation is unchanged — `stripCwdEnv()` at process boot remains the primary guard, and the native Claude binary does not auto-load `.env` from its cwd. (#1461) +- **Pi structured-output now tolerates reasoning-model prose preamble.** `tryParseStructuredOutput` previously returned `undefined` whenever the assistant text wasn't pure JSON, even when the JSON object was clearly emitted at the end of a "Let me evaluate..." preamble. Reasoning models — observed on Minimax M2.7 — routinely "think out loud" before emitting structured output despite explicit JSON-only prompts. The parser now falls back to a forward-scan from the first `{` when the clean parse fails, recovering the structured output without changing the success path for fully compliant models. (#1440) ## [0.3.9] - 2026-04-22 diff --git a/packages/docs-web/src/content/docs/getting-started/ai-assistants.md b/packages/docs-web/src/content/docs/getting-started/ai-assistants.md index 7a65b97adf..08993fc8a2 100644 --- a/packages/docs-web/src/content/docs/getting-started/ai-assistants.md +++ b/packages/docs-web/src/content/docs/getting-started/ai-assistants.md @@ -375,7 +375,7 @@ nodes: | Codebase env vars (`envInjection`) | ✅ | `.archon/config.yaml` `env:` section | | MCP servers | ❌ | Pi rejects MCP by design | | Claude-SDK hooks | ❌ | Claude-specific format | -| Structured output | ✅ (best-effort) | `output_format:` — schema is appended to the prompt and JSON is parsed out of the assistant text (bare or ```json```-fenced); degrades cleanly when the model emits prose. Not SDK-enforced like Claude/Codex. | +| Structured output | ✅ (best-effort) | `output_format:` — schema is appended to the prompt and JSON is parsed out of the assistant text. Handles bare JSON, ```json```-fenced, and reasoning-model prose preambles like `Let me evaluate... {...}` (Minimax M2.x pattern). Trailing-text-interleaved cases still degrade cleanly to the missing-structured-output warning. Not SDK-enforced like Claude/Codex. | | Cost limits (`maxBudgetUsd`) | ❌ | tracked in result chunk, not enforced | | Fallback model | ❌ | not native in Pi | | Sandbox | ❌ | not native in Pi | diff --git a/packages/providers/src/community/pi/event-bridge.test.ts b/packages/providers/src/community/pi/event-bridge.test.ts index 2b18f3c379..55acbe189f 100644 --- a/packages/providers/src/community/pi/event-bridge.test.ts +++ b/packages/providers/src/community/pi/event-bridge.test.ts @@ -439,6 +439,20 @@ describe('tryParseStructuredOutput', () => { }); }); + test('parses preamble + JSON containing `{` inside a string value', () => { + // Pins the cascade composition. Tier 2's lastIndexOf('{') lands inside + // the string value's brace; slicing to `{ inside","ok":true}` makes + // JSON.parse fail. Tier 3 forward-scans to the JSON object's outer + // opening `{` and parses cleanly. Preamble must not itself contain + // `{`, otherwise Tier 3 lands there instead of on the JSON object. + const tricky = + 'Brief preamble with no extra braces.\n' + '{"key":"value with { inside","ok":true}'; + expect(tryParseStructuredOutput(tricky)).toEqual({ + key: 'value with { inside', + ok: true, + }); + }); + test('returns undefined on malformed JSON', () => { expect(tryParseStructuredOutput('{not valid}')).toBeUndefined(); expect(tryParseStructuredOutput('{"unclosed":')).toBeUndefined(); diff --git a/packages/providers/src/community/pi/event-bridge.ts b/packages/providers/src/community/pi/event-bridge.ts index a6a3d0c328..411c07f3d6 100644 --- a/packages/providers/src/community/pi/event-bridge.ts +++ b/packages/providers/src/community/pi/event-bridge.ts @@ -176,35 +176,39 @@ export function tryParseStructuredOutput(text: string): unknown { .replace(/\n?\s*```\s*$/, '') .trim(); - // Tier 1: clean parse — fast path for compliant models (Claude, GPT, Gemini, - // most Anthropic-API responses). + // Tier 1: clean parse — fast path for fully compliant outputs. try { return JSON.parse(cleaned); } catch { - // Fall through to preamble-tolerant tiers. + // fall through } // Tier 2: scan backward to the LAST `{` and parse from there. Catches the - // common reasoning-model preamble pattern ("Let me evaluate... {flat JSON}"). - // For nested JSON in the response, the last `{` lands inside a child object - // and parsing fails — Tier 3 picks it up. + // common preamble-then-flat-JSON pattern. With a flat (non-nested) JSON + // response the last `{` is the only `{`, and slicing recovers it. + // `lastBrace > 0` excludes position 0, which Tier 1 already attempted. const lastBrace = cleaned.lastIndexOf('{'); if (lastBrace > 0) { try { return JSON.parse(cleaned.slice(lastBrace)); } catch { - // Fall through. + // fall through } } - // Tier 3: scan forward to the FIRST `{` and parse from there. Catches the - // preamble + nested JSON case missed by Tier 2 (last `{` was inside a child). + // Tier 3: scan forward to the FIRST `{`. With a preamble-then-nested-JSON + // pattern, Tier 2 lands inside a child object and JSON.parse rejects the + // trailing `}}`; the outer `{` is the recovery point. When the only `{` + // is at position 0, Tier 1 already tried it. When there's exactly one + // `{` past position 0, Tier 2 already tried that exact slice; this tier + // re-runs JSON.parse on the same input and fails identically — one + // redundant call we accept for a simpler control flow. const firstBrace = cleaned.indexOf('{'); - if (firstBrace > 0 && firstBrace !== lastBrace) { + if (firstBrace > 0) { try { return JSON.parse(cleaned.slice(firstBrace)); } catch { - // Final fall through to undefined. + // fall through } } From bf77ac979bdb628cabd70f0f36513bef360fe30c Mon Sep 17 00:00:00 2001 From: Rasmus Widing Date: Wed, 29 Apr 2026 12:22:53 +0300 Subject: [PATCH 3/3] fix(providers/pi): drop Tier 2 backward-scan to avoid brace-postamble footgun MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit flagged a real correctness issue: a backward scan from the last `{` silently returns the wrong JSON object when the response contains a brace-bearing example after the real payload (e.g. `{"actual":1}\n For example: {"x":2}` parses the trailing example instead of failing). This breaks the conservative-failure contract callers rely on. Tier 2 was always strictly worse than Tier 3 anyway: every preamble pattern Tier 2 handled, Tier 3 handles too, and Tier 3 doesn't have the multi-fragment hazard. Dropping Tier 2 entirely removes the footgun and keeps the parser simple (clean parse → forward scan → undefined). Tests updated: - Renamed/clarified existing tests to reference the surviving forward-scan tier instead of the deleted backward-scan tier (no behavior change for the cases they cover — preamble has no extra braces). - Added a regression test for the brace-postamble footgun: input with a trailing example like `{"actual":"value"}\nFor example: {"verdict":"review"}` must return undefined under the new contract. --- .../src/community/pi/event-bridge.test.ts | 31 +++++++++++-------- .../src/community/pi/event-bridge.ts | 26 ++++------------ 2 files changed, 24 insertions(+), 33 deletions(-) diff --git a/packages/providers/src/community/pi/event-bridge.test.ts b/packages/providers/src/community/pi/event-bridge.test.ts index 55acbe189f..d0bf9a35b7 100644 --- a/packages/providers/src/community/pi/event-bridge.test.ts +++ b/packages/providers/src/community/pi/event-bridge.test.ts @@ -403,17 +403,17 @@ describe('tryParseStructuredOutput', () => { test('returns undefined when model wraps JSON in prose with trailing text', () => { // Caller degrades via the executor's missing-structured-output warning. - // Trailing prose after the JSON breaks every tier — the regex would have - // to track brace depth to handle this case, which isn't worth the cost. + // Forward scan starts at the JSON object but JSON.parse rejects the + // trailing prose, so we fail closed rather than guess. const prose = 'Here is the JSON you requested:\n{"ok":true}\nLet me know if you need anything else.'; expect(tryParseStructuredOutput(prose)).toBeUndefined(); }); - test('parses preamble + trailing flat JSON (Minimax M2.7 reasoning-model pattern)', () => { + test('parses preamble + trailing JSON (Minimax M2.7 reasoning-model pattern)', () => { // Real-world failure mode observed on Minimax M2.7: the model "thinks out - // loud" before emitting the JSON-only output we asked for. Tier 2's - // backward scan from the last `{` recovers the structured output. + // loud" before emitting the JSON-only output we asked for. Forward scan + // from the first `{` (preamble has no braces) recovers the payload. const minimax = 'Now I have all the inputs. Let me evaluate the three gates:\n\n' + '**Gate A — Direction alignment**: aligned\n' + @@ -428,9 +428,8 @@ describe('tryParseStructuredOutput', () => { }); }); - test('parses preamble + trailing nested JSON via tier-3 forward scan', () => { - // Tier 2's backward scan lands inside the nested object and fails; - // Tier 3's forward scan recovers by parsing from the outer `{`. + test('parses preamble + trailing nested JSON via forward scan', () => { + // Forward scan lands on the outer `{` and JSON.parse handles the nesting. const nested = 'Reasoning before the JSON.\n' + '{"verdict":"review","details":{"foo":1,"bar":[1,2,3]}}'; expect(tryParseStructuredOutput(nested)).toEqual({ @@ -440,11 +439,9 @@ describe('tryParseStructuredOutput', () => { }); test('parses preamble + JSON containing `{` inside a string value', () => { - // Pins the cascade composition. Tier 2's lastIndexOf('{') lands inside - // the string value's brace; slicing to `{ inside","ok":true}` makes - // JSON.parse fail. Tier 3 forward-scans to the JSON object's outer - // opening `{` and parses cleanly. Preamble must not itself contain - // `{`, otherwise Tier 3 lands there instead of on the JSON object. + // Forward scan lands on the JSON object's outer `{`; JSON.parse handles + // the in-string `{`. Preamble must not itself contain `{`, otherwise the + // forward scan would start there and fail. const tricky = 'Brief preamble with no extra braces.\n' + '{"key":"value with { inside","ok":true}'; expect(tryParseStructuredOutput(tricky)).toEqual({ @@ -453,6 +450,14 @@ describe('tryParseStructuredOutput', () => { }); }); + test('returns undefined when prose contains a brace-bearing example after the real JSON', () => { + // Conservative-failure regression. A backward-scan strategy would silently + // return the trailing example; forward scan starts at the real payload, + // JSON.parse rejects the trailing prose+example, and we fail closed. + const withExample = '{"actual":"value"}\nFor example: {"verdict":"review"}'; + expect(tryParseStructuredOutput(withExample)).toBeUndefined(); + }); + test('returns undefined on malformed JSON', () => { expect(tryParseStructuredOutput('{not valid}')).toBeUndefined(); expect(tryParseStructuredOutput('{"unclosed":')).toBeUndefined(); diff --git a/packages/providers/src/community/pi/event-bridge.ts b/packages/providers/src/community/pi/event-bridge.ts index 411c07f3d6..4adde52809 100644 --- a/packages/providers/src/community/pi/event-bridge.ts +++ b/packages/providers/src/community/pi/event-bridge.ts @@ -183,26 +183,12 @@ export function tryParseStructuredOutput(text: string): unknown { // fall through } - // Tier 2: scan backward to the LAST `{` and parse from there. Catches the - // common preamble-then-flat-JSON pattern. With a flat (non-nested) JSON - // response the last `{` is the only `{`, and slicing recovers it. - // `lastBrace > 0` excludes position 0, which Tier 1 already attempted. - const lastBrace = cleaned.lastIndexOf('{'); - if (lastBrace > 0) { - try { - return JSON.parse(cleaned.slice(lastBrace)); - } catch { - // fall through - } - } - - // Tier 3: scan forward to the FIRST `{`. With a preamble-then-nested-JSON - // pattern, Tier 2 lands inside a child object and JSON.parse rejects the - // trailing `}}`; the outer `{` is the recovery point. When the only `{` - // is at position 0, Tier 1 already tried it. When there's exactly one - // `{` past position 0, Tier 2 already tried that exact slice; this tier - // re-runs JSON.parse on the same input and fails identically — one - // redundant call we accept for a simpler control flow. + // Tier 2: scan forward to the FIRST `{` and parse from there. Recovers the + // preamble-then-JSON pattern reasoning models emit. A backward scan from + // the last `{` was considered but rejected: it silently returns the wrong + // object when the prose contains a brace-bearing example after the real + // payload (e.g. `{"actual":1}\nFor example: {"x":2}` would yield `{x:2}`), + // breaking the conservative-failure contract callers rely on. const firstBrace = cleaned.indexOf('{'); if (firstBrace > 0) { try {