From 9da72041495785edee05d9f9f3860f80baf61e76 Mon Sep 17 00:00:00 2001 From: EQCommunication <97274751+EQCommunication@users.noreply.github.com> Date: Wed, 18 Feb 2026 00:30:59 +0900 Subject: [PATCH] fix: sanitize lone surrogates in API request body to prevent JSON parse errors Anthropic API's serde_json strictly enforces RFC 8259 and rejects JSON containing lone surrogate characters. JavaScript's JSON.stringify() allows lone surrogates (per ECMA-262), so when tool outputs contain non-UTF-8 data (binary files, terminal output), the API returns a 400 error. This adds sanitization in the custom fetch wrapper to replace lone surrogates (U+D800-U+DFFF) with U+FFFD before sending the request body. Uses String.prototype.toWellFormed() (ES2024) with regex fallback. Fixes the 'no low surrogate in string' JSON parse error reported across multiple tools. Related: anthropics/claude-code#1709 Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus --- packages/opencode/src/provider/provider.ts | 5 +++ .../src/util/sanitize-surrogates.test.ts | 38 +++++++++++++++++++ .../opencode/src/util/sanitize-surrogates.ts | 6 +++ 3 files changed, 49 insertions(+) create mode 100644 packages/opencode/src/util/sanitize-surrogates.test.ts create mode 100644 packages/opencode/src/util/sanitize-surrogates.ts diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts index cdbad663784..30ca52f1016 100644 --- a/packages/opencode/src/provider/provider.ts +++ b/packages/opencode/src/provider/provider.ts @@ -5,6 +5,7 @@ import { Config } from "../config/config" import { mapValues, mergeDeep, omit, pickBy, sortBy } from "remeda" import { NoSuchModelError, type Provider as SDK } from "ai" import { Log } from "../util/log" +import { sanitizeSurrogates } from "../util/sanitize-surrogates" import { BunProc } from "../bun" import { Plugin } from "../plugin" import { ModelsDev } from "./models" @@ -1093,6 +1094,10 @@ export namespace Provider { } } + if (typeof opts.body === "string") { + opts.body = sanitizeSurrogates(opts.body) + } + return fetchFn(input, { ...opts, // @ts-ignore see here: https://github.com/oven-sh/bun/issues/16682 diff --git a/packages/opencode/src/util/sanitize-surrogates.test.ts b/packages/opencode/src/util/sanitize-surrogates.test.ts new file mode 100644 index 00000000000..d18b8fb8f54 --- /dev/null +++ b/packages/opencode/src/util/sanitize-surrogates.test.ts @@ -0,0 +1,38 @@ +import { describe, test, expect } from "bun:test" +import { sanitizeSurrogates } from "./sanitize-surrogates" + +describe("sanitizeSurrogates", () => { + test("replaces lone high surrogate", () => { + expect(sanitizeSurrogates("\uD800")).toBe("\uFFFD") + }) + + test("replaces lone low surrogate", () => { + expect(sanitizeSurrogates("\uDC00")).toBe("\uFFFD") + }) + + test("preserves valid surrogate pair", () => { + const emoji = "\uD83D\uDE00" + expect(sanitizeSurrogates(emoji)).toBe(emoji) + }) + + test("preserves normal text", () => { + expect(sanitizeSurrogates("hello world")).toBe("hello world") + }) + + test("preserves Korean text", () => { + expect(sanitizeSurrogates("안녕하세요")).toBe("안녕하세요") + }) + + test("preserves empty string", () => { + expect(sanitizeSurrogates("")).toBe("") + }) + + test("replaces surrogate in middle", () => { + expect(sanitizeSurrogates("hello\uD800world")).toBe("hello\uFFFDworld") + }) + + test("result is well-formed", () => { + const result = sanitizeSurrogates("test\uD800\uDBFF\uDC00data\uDFFF") + expect(result.isWellFormed()).toBe(true) + }) +}) diff --git a/packages/opencode/src/util/sanitize-surrogates.ts b/packages/opencode/src/util/sanitize-surrogates.ts new file mode 100644 index 00000000000..1de3fdbbcd3 --- /dev/null +++ b/packages/opencode/src/util/sanitize-surrogates.ts @@ -0,0 +1,6 @@ +export function sanitizeSurrogates(s: string): string { + if (typeof s !== "string" || s.length === 0) return s + if (typeof s.isWellFormed === "function" && s.isWellFormed()) return s + if (typeof s.toWellFormed === "function") return s.toWellFormed() + return s.replace(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?