diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts index cdbad663784..30ca52f1016 100644 --- a/packages/opencode/src/provider/provider.ts +++ b/packages/opencode/src/provider/provider.ts @@ -5,6 +5,7 @@ import { Config } from "../config/config" import { mapValues, mergeDeep, omit, pickBy, sortBy } from "remeda" import { NoSuchModelError, type Provider as SDK } from "ai" import { Log } from "../util/log" +import { sanitizeSurrogates } from "../util/sanitize-surrogates" import { BunProc } from "../bun" import { Plugin } from "../plugin" import { ModelsDev } from "./models" @@ -1093,6 +1094,10 @@ export namespace Provider { } } + if (typeof opts.body === "string") { + opts.body = sanitizeSurrogates(opts.body) + } + return fetchFn(input, { ...opts, // @ts-ignore see here: https://github.com/oven-sh/bun/issues/16682 diff --git a/packages/opencode/src/util/sanitize-surrogates.test.ts b/packages/opencode/src/util/sanitize-surrogates.test.ts new file mode 100644 index 00000000000..d18b8fb8f54 --- /dev/null +++ b/packages/opencode/src/util/sanitize-surrogates.test.ts @@ -0,0 +1,38 @@ +import { describe, test, expect } from "bun:test" +import { sanitizeSurrogates } from "./sanitize-surrogates" + +describe("sanitizeSurrogates", () => { + test("replaces lone high surrogate", () => { + expect(sanitizeSurrogates("\uD800")).toBe("\uFFFD") + }) + + test("replaces lone low surrogate", () => { + expect(sanitizeSurrogates("\uDC00")).toBe("\uFFFD") + }) + + test("preserves valid surrogate pair", () => { + const emoji = "\uD83D\uDE00" + expect(sanitizeSurrogates(emoji)).toBe(emoji) + }) + + test("preserves normal text", () => { + expect(sanitizeSurrogates("hello world")).toBe("hello world") + }) + + test("preserves Korean text", () => { + expect(sanitizeSurrogates("안녕하세요")).toBe("안녕하세요") + }) + + test("preserves empty string", () => { + expect(sanitizeSurrogates("")).toBe("") + }) + + test("replaces surrogate in middle", () => { + expect(sanitizeSurrogates("hello\uD800world")).toBe("hello\uFFFDworld") + }) + + test("result is well-formed", () => { + const result = sanitizeSurrogates("test\uD800\uDBFF\uDC00data\uDFFF") + expect(result.isWellFormed()).toBe(true) + }) +}) diff --git a/packages/opencode/src/util/sanitize-surrogates.ts b/packages/opencode/src/util/sanitize-surrogates.ts new file mode 100644 index 00000000000..1de3fdbbcd3 --- /dev/null +++ b/packages/opencode/src/util/sanitize-surrogates.ts @@ -0,0 +1,6 @@ +export function sanitizeSurrogates(s: string): string { + if (typeof s !== "string" || s.length === 0) return s + if (typeof s.isWellFormed === "function" && s.isWellFormed()) return s + if (typeof s.toWellFormed === "function") return s.toWellFormed() + return s.replace(/[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?