From cd1f109a0f18ee9f3874e90545d39de534afe5be Mon Sep 17 00:00:00 2001 From: Kiet Ho Date: Thu, 7 May 2026 21:05:57 -0700 Subject: [PATCH] fix(host-service): restore terminal modes on reattach via headless xterm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renderer refreshes mid-session were dropping kitty keyboard, bracketed paste, focus reporting, mouse, and other DEC private modes — codex CLI's Shift+Enter started submitting instead of inserting a newline because the mode-setting escape (`\x1b[>7u`) was emitted once at startup, broadcast straight to the live socket, and never made it into the FIFO replay. Mirror VSCode's `XtermSerializer` approach: feed every PTY chunk through an `@xterm/headless` instance that tracks current mode state. On `replayBuffer`, prepend a preamble built from the tracker so a freshly created xterm rejoins in the modes the running program already believes are active. The preamble is now sent on every attach (even when the FIFO is empty), so the renderer is never wedged at default modes. Two headless-specific quirks worth flagging: - `Terminal.write` is async-buffered; we use the internal `_core._writeBuffer.writeSync(...)` so `term.modes` is up-to-date when we build the preamble. Same engine, same path xterm's own SerializeAddon takes. - `vtExtensions.kittyKeyboard` is in the public typings but headless's option sanitizer drops it, so kitty handlers early-return. Inject it on `rawOptions` post-construction. Plan: plans/20260507-terminal-mode-replay.md --- bun.lock | 1 + packages/host-service/package.json | 1 + .../terminal/terminal-mode-tracker.test.ts | 114 +++++++++++++++ .../src/terminal/terminal-mode-tracker.ts | 130 ++++++++++++++++++ .../host-service/src/terminal/terminal.ts | 55 +++++++- plans/20260507-terminal-mode-replay.md | 64 +++++++++ 6 files changed, 360 insertions(+), 5 deletions(-) create mode 100644 packages/host-service/src/terminal/terminal-mode-tracker.test.ts create mode 100644 packages/host-service/src/terminal/terminal-mode-tracker.ts create mode 100644 plans/20260507-terminal-mode-replay.md diff --git a/bun.lock b/bun.lock index c17af3d7927..0a1a0fa427f 100644 --- a/bun.lock +++ b/bun.lock @@ -791,6 +791,7 @@ "@t3-oss/env-core": "^0.13.8", "@trpc/client": "^11.7.1", "@trpc/server": "^11.7.1", + "@xterm/headless": "6.1.0-beta.197", "better-sqlite3": "12.6.2", "drizzle-orm": "0.45.2", "hono": "^4.8.5", diff --git a/packages/host-service/package.json b/packages/host-service/package.json index d48953e4b65..5cb243b8d1b 100644 --- a/packages/host-service/package.json +++ b/packages/host-service/package.json @@ -68,6 +68,7 @@ "@t3-oss/env-core": "^0.13.8", "@trpc/client": "^11.7.1", "@trpc/server": "^11.7.1", + "@xterm/headless": "6.1.0-beta.197", "better-sqlite3": "12.6.2", "drizzle-orm": "0.45.2", "hono": "^4.8.5", diff --git a/packages/host-service/src/terminal/terminal-mode-tracker.test.ts b/packages/host-service/src/terminal/terminal-mode-tracker.test.ts new file mode 100644 index 00000000000..0cea78b6ca4 --- /dev/null +++ b/packages/host-service/src/terminal/terminal-mode-tracker.test.ts @@ -0,0 +1,114 @@ +import { describe, expect, test } from "bun:test"; +import { createModeTracker } from "./terminal-mode-tracker"; + +const enc = new TextEncoder(); +const dec = new TextDecoder(); + +function preambleString(tracker: ReturnType): string { + const bytes = tracker.buildPreamble(); + return bytes ? dec.decode(bytes) : ""; +} + +describe("createModeTracker", () => { + test("default state needs no preamble", () => { + const t = createModeTracker(120, 32); + expect(t.buildPreamble()).toBeNull(); + t.dispose(); + }); + + test("kitty keyboard push survives many KB of unrelated output", () => { + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[>7u")); + + // 200 KB of filler — well past the host-service FIFO's 64 KiB cap. + // Tracker state is independent of the FIFO so flags should hold. + const filler = "x".repeat(2048); + for (let i = 0; i < 100; i += 1) { + t.feed(enc.encode(filler)); + } + + expect(preambleString(t)).toBe("\x1b[=7;1u"); + t.dispose(); + }); + + test("preamble drops kitty after explicit pop", () => { + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[>7u")); + expect(preambleString(t)).toBe("\x1b[=7;1u"); + + t.feed(enc.encode("\x1b[ { + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[>7u")); + t.feed(enc.encode("\x1b[=0;1u")); + expect(t.buildPreamble()).toBeNull(); + t.dispose(); + }); + + test("bracketed paste mode is captured", () => { + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[?2004h")); + expect(preambleString(t)).toContain("\x1b[?2004h"); + t.feed(enc.encode("\x1b[?2004l")); + expect(preambleString(t)).not.toContain("?2004"); + t.dispose(); + }); + + test("focus reporting and mouse tracking are captured", () => { + // `?1002h` is button-tracking, NOT SGR encoding (`?1006h`). xterm.js's + // public IModes doesn't expose mouse encoding format, so the preamble + // can't restore it — clients reattaching mid-session keep the default + // X10 encoding. Acceptable today; revisit if a TUI relying on SGR + // breaks on reattach. + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[?1004h\x1b[?1002h")); + const preamble = preambleString(t); + expect(preamble).toContain("\x1b[?1004h"); + expect(preamble).toContain("\x1b[?1002h"); + t.dispose(); + }); + + test("multi-mode preamble lists DEC modes before kitty", () => { + // Order matters: a peer applying the preamble should see DEC modes + // settle before the kitty Set, so a kitty-aware program reading back + // state via `\x1b[?u` query gets a consistent answer. + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[?2004h\x1b[?1004h\x1b[>7u")); + const p = preambleString(t); + expect(p.indexOf("\x1b[?2004h")).toBeGreaterThanOrEqual(0); + expect(p.indexOf("\x1b[?1004h")).toBeGreaterThanOrEqual(0); + expect(p.indexOf("\x1b[=7;1u")).toBeGreaterThan(p.indexOf("\x1b[?2004h")); + t.dispose(); + }); + + test("show-cursor only emitted when explicitly hidden", () => { + const t = createModeTracker(120, 32); + expect(t.buildPreamble()).toBeNull(); + t.feed(enc.encode("\x1b[?25l")); + expect(preambleString(t)).toContain("\x1b[?25l"); + t.dispose(); + }); + + test("resize is idempotent and doesn't reset mode state", () => { + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[>7u")); + t.resize(80, 24); + t.resize(80, 24); + t.resize(160, 50); + expect(preambleString(t)).toBe("\x1b[=7;1u"); + t.dispose(); + }); + + test("escape sequences split across feeds are still parsed", () => { + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[")); + t.feed(enc.encode(">7")); + t.feed(enc.encode("u")); + expect(preambleString(t)).toBe("\x1b[=7;1u"); + t.dispose(); + }); +}); diff --git a/packages/host-service/src/terminal/terminal-mode-tracker.ts b/packages/host-service/src/terminal/terminal-mode-tracker.ts new file mode 100644 index 00000000000..7534da741e7 --- /dev/null +++ b/packages/host-service/src/terminal/terminal-mode-tracker.ts @@ -0,0 +1,130 @@ +// Tracks terminal-mode state (kitty keyboard, bracketed paste, focus, mouse, +// app cursor, …) by feeding every PTY-output chunk through a headless +// xterm.js. `buildPreamble()` returns the byte sequence that brings a freshly +// reattached renderer xterm back to the modes the running program already +// believes are active. +// +// Live programs typically set these modes ONCE at startup (e.g. codex emits +// `\x1b[>7u` to enable kitty keyboard). Those bytes are broadcast straight to +// the live socket and never enter the FIFO replay, so a renderer reload +// reattaches a fresh xterm with default modes — Shift+Enter starts submitting +// instead of inserting newline, paste arrives as keystrokes, etc. +// +// Pattern adapted from VSCode's XtermSerializer +// (src/vs/platform/terminal/node/ptyService.ts). + +import { Terminal as HeadlessTerminal } from "@xterm/headless"; + +export interface ModeTracker { + feed(bytes: Uint8Array): void; + resize(cols: number, rows: number): void; + buildPreamble(): Uint8Array | null; + dispose(): void; +} + +// Reaches into private xterm internals: synchronous parsing and kitty +// keyboard flags aren't on the public API, but @xterm/headless and +// @xterm/xterm share the same engine, so the shape is stable. Used the same +// way by xterm's own SerializeAddon. +type HeadlessInternals = { + _core?: { + _writeBuffer?: { writeSync(data: string | Uint8Array): void }; + coreService?: { kittyKeyboard?: { flags: number } }; + optionsService?: { + rawOptions: { vtExtensions?: { kittyKeyboard?: boolean } }; + }; + }; +}; + +export function createModeTracker(cols: number, rows: number): ModeTracker { + const term = new HeadlessTerminal({ + cols, + rows, + // Tracker reads modes, never cells — keep scrollback minimal. + scrollback: 1, + allowProposedApi: true, + }); + const internals = term as unknown as HeadlessInternals; + + // Validate the private surface up front so a future @xterm/headless + // upgrade that renames internals fails loudly at session construction + // rather than silently throwing inside every PTY-output callback. + const optionsRaw = internals._core?.optionsService?.rawOptions; + const writeBuffer = internals._core?._writeBuffer; + if (!optionsRaw || typeof writeBuffer?.writeSync !== "function") { + throw new Error( + "@xterm/headless internals not found (optionsService.rawOptions, " + + "_writeBuffer.writeSync). Likely a version-pinning regression — " + + "check that the pinned version still exposes these.", + ); + } + + // `vtExtensions.kittyKeyboard` is in the public typings but the headless + // option sanitizer silently drops it (its DEFAULT_OPTIONS table omits the + // key). Without this, kitty handlers early-return and `\x1b[>7u` is a + // no-op. Set it on rawOptions directly. + optionsRaw.vtExtensions = { kittyKeyboard: true }; + + // `Terminal.write` is async-buffered, so `term.modes` lags behind feeds. + // Pump synchronously through the internal WriteBuffer so the preamble can + // be built immediately after a feed in the WS-attach hot path. + + const buildPreamble = (): Uint8Array | null => { + const m = term.modes; + const parts: string[] = []; + + if (m.applicationCursorKeysMode) parts.push("\x1b[?1h"); + if (m.applicationKeypadMode) parts.push("\x1b[?66h"); + if (m.bracketedPasteMode) parts.push("\x1b[?2004h"); + if (m.insertMode) parts.push("\x1b[4h"); + if (m.originMode) parts.push("\x1b[?6h"); + if (m.reverseWraparoundMode) parts.push("\x1b[?45h"); + if (m.sendFocusMode) parts.push("\x1b[?1004h"); + // Inverted: defaults true, only emit when explicitly disabled. + if (!m.showCursor) parts.push("\x1b[?25l"); + if (!m.wraparoundMode) parts.push("\x1b[?7l"); + // synchronizedOutputMode intentionally omitted — re-asserting it on + // attach would suspend rendering until the next end-marker. + + switch (m.mouseTrackingMode) { + case "x10": + parts.push("\x1b[?9h"); + break; + case "vt200": + parts.push("\x1b[?1000h"); + break; + case "drag": + parts.push("\x1b[?1002h"); + break; + case "any": + parts.push("\x1b[?1003h"); + break; + case "none": + break; + } + + const kittyFlags = internals._core?.coreService?.kittyKeyboard?.flags ?? 0; + if (kittyFlags > 0) { + // `=N;1u` sets flags directly — restoring effective state to a + // fresh peer, not modeling the program's push/pop stack. + parts.push(`\x1b[=${kittyFlags};1u`); + } + + if (parts.length === 0) return null; + return new TextEncoder().encode(parts.join("")); + }; + + return { + feed(bytes) { + writeBuffer.writeSync(bytes); + }, + resize(nextCols, nextRows) { + if (term.cols === nextCols && term.rows === nextRows) return; + term.resize(nextCols, nextRows); + }, + buildPreamble, + dispose() { + term.dispose(); + }, + }; +} diff --git a/packages/host-service/src/terminal/terminal.ts b/packages/host-service/src/terminal/terminal.ts index e4af8a67baa..33ea8e33806 100644 --- a/packages/host-service/src/terminal/terminal.ts +++ b/packages/host-service/src/terminal/terminal.ts @@ -29,6 +29,10 @@ import { getTerminalBaseEnv, resolveLaunchShell, } from "./env.ts"; +import { + createModeTracker, + type ModeTracker, +} from "./terminal-mode-tracker.ts"; /** * Thin adapter exposing approximately the IPty surface that the rest of @@ -231,6 +235,13 @@ interface TerminalSession { * actually broadcast to the renderer. */ portHintDecoder: StringDecoder; + + /** + * Mirrors PTY output through a headless xterm so a reattaching renderer + * can be resynced via a mode preamble — covers kitty keyboard, bracketed + * paste, focus, mouse, etc. that the FIFO can't restore on its own. + */ + modeTracker: ModeTracker; } /** PTY lifetime is independent of socket lifetime — sockets detach/reattach freely. */ @@ -268,6 +279,11 @@ onDaemonDisconnect((err) => { } session.unsubscribeDaemon = null; } + try { + session.modeTracker.dispose(); + } catch { + // best-effort + } } sessions.clear(); }); @@ -289,6 +305,11 @@ export function __resetSessionsForTesting(): void { // best-effort } } + try { + session.modeTracker.dispose(); + } catch { + // best-effort + } } sessions.clear(); } @@ -427,11 +448,22 @@ function broadcastBytes(session: TerminalSession, bytes: Uint8Array): number { } function replayBuffer(session: TerminalSession, socket: TerminalSocket) { - if (session.buffer.length === 0) return; - let total = 0; - for (const b of session.buffer) total += b.byteLength; - const combined = new Uint8Array(total); + // Preamble first, then FIFO. Mode-setting escapes (kitty keyboard, + // bracketed paste, focus, …) are typically emitted once at startup and + // broadcast away rather than buffered, so a fresh xterm needs them + // re-asserted on every attach — even when the FIFO is empty. + const preamble = session.modeTracker.buildPreamble(); + let bufferTotal = 0; + for (const b of session.buffer) bufferTotal += b.byteLength; + const preambleLen = preamble?.byteLength ?? 0; + if (preambleLen === 0 && bufferTotal === 0) return; + + const combined = new Uint8Array(preambleLen + bufferTotal); let offset = 0; + if (preamble) { + combined.set(preamble, offset); + offset += preamble.byteLength; + } for (const b of session.buffer) { combined.set(b, offset); offset += b.byteLength; @@ -457,7 +489,9 @@ function resolveShellReady( } // Flush held marker bytes — they weren't part of a full marker if (session.scanState.heldBytes.length > 0) { - bufferOutput(session, Uint8Array.from(session.scanState.heldBytes)); + const heldBytes = Uint8Array.from(session.scanState.heldBytes); + session.modeTracker.feed(heldBytes); + bufferOutput(session, heldBytes); session.scanState.heldBytes.length = 0; } session.scanState.matchPos = 0; @@ -517,6 +551,11 @@ export function disposeSession(terminalId: string, db: HostDb) { } session.unsubscribeDaemon = null; } + try { + session.modeTracker.dispose(); + } catch { + // best-effort + } sessions.delete(terminalId); } @@ -770,6 +809,7 @@ export async function createTerminalSessionInternal({ // host-service lifetime — flag it as queued so we don't double-fire it. initialCommandQueued: isAdopted, portHintDecoder: new StringDecoder("utf8"), + modeTracker: createModeTracker(cols, rows), }; sessions.set(terminalId, session); portManager.upsertSession(terminalId, workspaceId, pty.pid); @@ -819,6 +859,10 @@ export async function createTerminalSessionInternal({ ); if (hintText.length > 0) portManager.checkOutputForHint(hintText); + // Feed the tracker on every byte — broadcast skips the FIFO, + // so this is the only path that catches startup mode escapes. + session.modeTracker.feed(bytes); + if (broadcastBytes(session, bytes) === 0) { bufferOutput(session, bytes); } @@ -1071,6 +1115,7 @@ export function registerWorkspaceTerminalRoute({ DEFAULT_TERMINAL_ROWS, ); session.pty.resize(cols, rows); + session.modeTracker.resize(cols, rows); } }, diff --git a/plans/20260507-terminal-mode-replay.md b/plans/20260507-terminal-mode-replay.md new file mode 100644 index 00000000000..dfe64b2f08f --- /dev/null +++ b/plans/20260507-terminal-mode-replay.md @@ -0,0 +1,64 @@ +# Terminal mode replay on reattach + +## Bug + +Codex TUI (and other kitty-aware TUIs) lose Shift+Enter after a renderer refresh +mid-conversation. Repro confirmed: refresh renderer → Shift+Enter submits +instead of inserting newline. + +## Why + +- Codex emits `\x1b[>7u` once at startup to enable kitty keyboard protocol. +- xterm.js encodes Shift+Enter as `\x1b[13;2u` only when `kittyKeyboard.flags > 0`. +- Host-service replay buffer is a 64 KiB FIFO (`terminal.ts:159`). Long + conversations evict the early `\x1b[>7u`. +- On reload, fresh `XTerm` (flags = 0) attaches and replay no longer contains + the kitty enable. Codex never re-pushes. +- Same class of bug latent for: bracketed paste, focus, mouse, app cursor, + cursor visibility. + +## Prior art + +| Project | Backend state | Mode replay | Solves it | +|---|---|---|---| +| VSCode | `@xterm/headless` ingesting PTY | `serialize({ excludeModes: true })` — strips modes intentionally; partial DOM-side workarounds | Partially | +| Tabby | none | none | No | +| Wave | 256 KiB FIFO | none | No | +| cmux | scrollback string | none | No | + +VSCode has the right scaffolding (headless xterm on host), wrong policy +(excludes modes from replay). + +## Fix + +Adopt VSCode's scaffolding, fix the policy: + +1. `packages/host-service`: add `@xterm/headless`, feed every PTY chunk to it + alongside the existing FIFO append. +2. In `replayBuffer` (`terminal.ts:429`), build a preamble from headless state + and send before the FIFO bytes: + - kitty: `headless._core.coreService.kittyKeyboard.flags` → `\x1b[=N;1u` + - bracketed paste: `headless.modes.bracketedPasteMode` → `\x1b[?2004h` + - focus: `\x1b[?1004h` + - mouse tracking (x10/vt200/drag/any): `\x1b[?9h` / `?1000h` / `?1002h` / + `?1003h`. Mouse *encoding* (`?1006`/`?1015`/`?1016`) is **not** covered + — xterm.js's public `IModes` doesn't expose it. Clients reattaching + keep the default X10 encoding; revisit if it bites. + - app cursor: `\x1b[?1h` + - cursor visibility: `\x1b[?25h/l` +3. Test: pump >64 KiB of arbitrary output through the writer after a kitty + push, run `replayBuffer`, assert the preamble bytes appear before the FIFO + bytes. + +## Notes + +- `@xterm/headless` is a small npm package, Node-compatible, same engine as + xterm.js so the kitty private accessor matches the renderer. +- Renderer/codex untouched — fix is fully contained in host-service. +- Codex's own self-heal paths (`with_restored`, suspend/resume) already + re-push on demand, so the new preamble doesn't conflict. + +## Out of scope + +- xterm.js / SerializeAddon upstream PR to add a `modes` opt-in. +- Compressing/persisting the FIFO across host restarts.