diff --git a/bun.lock b/bun.lock index 47f764c458b..7b66c80e765 100644 --- a/bun.lock +++ b/bun.lock @@ -794,6 +794,7 @@ "@t3-oss/env-core": "^0.13.8", "@trpc/client": "^11.7.1", "@trpc/server": "^11.7.1", + "@xterm/headless": "6.1.0-beta.197", "better-sqlite3": "12.6.2", "drizzle-orm": "0.45.2", "hono": "^4.8.5", diff --git a/packages/host-service/package.json b/packages/host-service/package.json index fd032f2ff1d..7a622eaf2cb 100644 --- a/packages/host-service/package.json +++ b/packages/host-service/package.json @@ -69,6 +69,7 @@ "@t3-oss/env-core": "^0.13.8", "@trpc/client": "^11.7.1", "@trpc/server": "^11.7.1", + "@xterm/headless": "6.1.0-beta.197", "better-sqlite3": "12.6.2", "drizzle-orm": "0.45.2", "hono": "^4.8.5", diff --git a/packages/host-service/src/terminal/terminal-mode-tracker.test.ts b/packages/host-service/src/terminal/terminal-mode-tracker.test.ts new file mode 100644 index 00000000000..0cea78b6ca4 --- /dev/null +++ b/packages/host-service/src/terminal/terminal-mode-tracker.test.ts @@ -0,0 +1,114 @@ +import { describe, expect, test } from "bun:test"; +import { createModeTracker } from "./terminal-mode-tracker"; + +const enc = new TextEncoder(); +const dec = new TextDecoder(); + +function preambleString(tracker: ReturnType): string { + const bytes = tracker.buildPreamble(); + return bytes ? dec.decode(bytes) : ""; +} + +describe("createModeTracker", () => { + test("default state needs no preamble", () => { + const t = createModeTracker(120, 32); + expect(t.buildPreamble()).toBeNull(); + t.dispose(); + }); + + test("kitty keyboard push survives many KB of unrelated output", () => { + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[>7u")); + + // 200 KB of filler — well past the host-service FIFO's 64 KiB cap. + // Tracker state is independent of the FIFO so flags should hold. + const filler = "x".repeat(2048); + for (let i = 0; i < 100; i += 1) { + t.feed(enc.encode(filler)); + } + + expect(preambleString(t)).toBe("\x1b[=7;1u"); + t.dispose(); + }); + + test("preamble drops kitty after explicit pop", () => { + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[>7u")); + expect(preambleString(t)).toBe("\x1b[=7;1u"); + + t.feed(enc.encode("\x1b[ { + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[>7u")); + t.feed(enc.encode("\x1b[=0;1u")); + expect(t.buildPreamble()).toBeNull(); + t.dispose(); + }); + + test("bracketed paste mode is captured", () => { + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[?2004h")); + expect(preambleString(t)).toContain("\x1b[?2004h"); + t.feed(enc.encode("\x1b[?2004l")); + expect(preambleString(t)).not.toContain("?2004"); + t.dispose(); + }); + + test("focus reporting and mouse tracking are captured", () => { + // `?1002h` is button-tracking, NOT SGR encoding (`?1006h`). xterm.js's + // public IModes doesn't expose mouse encoding format, so the preamble + // can't restore it — clients reattaching mid-session keep the default + // X10 encoding. Acceptable today; revisit if a TUI relying on SGR + // breaks on reattach. + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[?1004h\x1b[?1002h")); + const preamble = preambleString(t); + expect(preamble).toContain("\x1b[?1004h"); + expect(preamble).toContain("\x1b[?1002h"); + t.dispose(); + }); + + test("multi-mode preamble lists DEC modes before kitty", () => { + // Order matters: a peer applying the preamble should see DEC modes + // settle before the kitty Set, so a kitty-aware program reading back + // state via `\x1b[?u` query gets a consistent answer. + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[?2004h\x1b[?1004h\x1b[>7u")); + const p = preambleString(t); + expect(p.indexOf("\x1b[?2004h")).toBeGreaterThanOrEqual(0); + expect(p.indexOf("\x1b[?1004h")).toBeGreaterThanOrEqual(0); + expect(p.indexOf("\x1b[=7;1u")).toBeGreaterThan(p.indexOf("\x1b[?2004h")); + t.dispose(); + }); + + test("show-cursor only emitted when explicitly hidden", () => { + const t = createModeTracker(120, 32); + expect(t.buildPreamble()).toBeNull(); + t.feed(enc.encode("\x1b[?25l")); + expect(preambleString(t)).toContain("\x1b[?25l"); + t.dispose(); + }); + + test("resize is idempotent and doesn't reset mode state", () => { + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[>7u")); + t.resize(80, 24); + t.resize(80, 24); + t.resize(160, 50); + expect(preambleString(t)).toBe("\x1b[=7;1u"); + t.dispose(); + }); + + test("escape sequences split across feeds are still parsed", () => { + const t = createModeTracker(120, 32); + t.feed(enc.encode("\x1b[")); + t.feed(enc.encode(">7")); + t.feed(enc.encode("u")); + expect(preambleString(t)).toBe("\x1b[=7;1u"); + t.dispose(); + }); +}); diff --git a/packages/host-service/src/terminal/terminal-mode-tracker.ts b/packages/host-service/src/terminal/terminal-mode-tracker.ts new file mode 100644 index 00000000000..7534da741e7 --- /dev/null +++ b/packages/host-service/src/terminal/terminal-mode-tracker.ts @@ -0,0 +1,130 @@ +// Tracks terminal-mode state (kitty keyboard, bracketed paste, focus, mouse, +// app cursor, …) by feeding every PTY-output chunk through a headless +// xterm.js. `buildPreamble()` returns the byte sequence that brings a freshly +// reattached renderer xterm back to the modes the running program already +// believes are active. +// +// Live programs typically set these modes ONCE at startup (e.g. codex emits +// `\x1b[>7u` to enable kitty keyboard). Those bytes are broadcast straight to +// the live socket and never enter the FIFO replay, so a renderer reload +// reattaches a fresh xterm with default modes — Shift+Enter starts submitting +// instead of inserting newline, paste arrives as keystrokes, etc. +// +// Pattern adapted from VSCode's XtermSerializer +// (src/vs/platform/terminal/node/ptyService.ts). + +import { Terminal as HeadlessTerminal } from "@xterm/headless"; + +export interface ModeTracker { + feed(bytes: Uint8Array): void; + resize(cols: number, rows: number): void; + buildPreamble(): Uint8Array | null; + dispose(): void; +} + +// Reaches into private xterm internals: synchronous parsing and kitty +// keyboard flags aren't on the public API, but @xterm/headless and +// @xterm/xterm share the same engine, so the shape is stable. Used the same +// way by xterm's own SerializeAddon. +type HeadlessInternals = { + _core?: { + _writeBuffer?: { writeSync(data: string | Uint8Array): void }; + coreService?: { kittyKeyboard?: { flags: number } }; + optionsService?: { + rawOptions: { vtExtensions?: { kittyKeyboard?: boolean } }; + }; + }; +}; + +export function createModeTracker(cols: number, rows: number): ModeTracker { + const term = new HeadlessTerminal({ + cols, + rows, + // Tracker reads modes, never cells — keep scrollback minimal. + scrollback: 1, + allowProposedApi: true, + }); + const internals = term as unknown as HeadlessInternals; + + // Validate the private surface up front so a future @xterm/headless + // upgrade that renames internals fails loudly at session construction + // rather than silently throwing inside every PTY-output callback. + const optionsRaw = internals._core?.optionsService?.rawOptions; + const writeBuffer = internals._core?._writeBuffer; + if (!optionsRaw || typeof writeBuffer?.writeSync !== "function") { + throw new Error( + "@xterm/headless internals not found (optionsService.rawOptions, " + + "_writeBuffer.writeSync). Likely a version-pinning regression — " + + "check that the pinned version still exposes these.", + ); + } + + // `vtExtensions.kittyKeyboard` is in the public typings but the headless + // option sanitizer silently drops it (its DEFAULT_OPTIONS table omits the + // key). Without this, kitty handlers early-return and `\x1b[>7u` is a + // no-op. Set it on rawOptions directly. + optionsRaw.vtExtensions = { kittyKeyboard: true }; + + // `Terminal.write` is async-buffered, so `term.modes` lags behind feeds. + // Pump synchronously through the internal WriteBuffer so the preamble can + // be built immediately after a feed in the WS-attach hot path. + + const buildPreamble = (): Uint8Array | null => { + const m = term.modes; + const parts: string[] = []; + + if (m.applicationCursorKeysMode) parts.push("\x1b[?1h"); + if (m.applicationKeypadMode) parts.push("\x1b[?66h"); + if (m.bracketedPasteMode) parts.push("\x1b[?2004h"); + if (m.insertMode) parts.push("\x1b[4h"); + if (m.originMode) parts.push("\x1b[?6h"); + if (m.reverseWraparoundMode) parts.push("\x1b[?45h"); + if (m.sendFocusMode) parts.push("\x1b[?1004h"); + // Inverted: defaults true, only emit when explicitly disabled. + if (!m.showCursor) parts.push("\x1b[?25l"); + if (!m.wraparoundMode) parts.push("\x1b[?7l"); + // synchronizedOutputMode intentionally omitted — re-asserting it on + // attach would suspend rendering until the next end-marker. + + switch (m.mouseTrackingMode) { + case "x10": + parts.push("\x1b[?9h"); + break; + case "vt200": + parts.push("\x1b[?1000h"); + break; + case "drag": + parts.push("\x1b[?1002h"); + break; + case "any": + parts.push("\x1b[?1003h"); + break; + case "none": + break; + } + + const kittyFlags = internals._core?.coreService?.kittyKeyboard?.flags ?? 0; + if (kittyFlags > 0) { + // `=N;1u` sets flags directly — restoring effective state to a + // fresh peer, not modeling the program's push/pop stack. + parts.push(`\x1b[=${kittyFlags};1u`); + } + + if (parts.length === 0) return null; + return new TextEncoder().encode(parts.join("")); + }; + + return { + feed(bytes) { + writeBuffer.writeSync(bytes); + }, + resize(nextCols, nextRows) { + if (term.cols === nextCols && term.rows === nextRows) return; + term.resize(nextCols, nextRows); + }, + buildPreamble, + dispose() { + term.dispose(); + }, + }; +} diff --git a/packages/host-service/src/terminal/terminal.ts b/packages/host-service/src/terminal/terminal.ts index e4af8a67baa..33ea8e33806 100644 --- a/packages/host-service/src/terminal/terminal.ts +++ b/packages/host-service/src/terminal/terminal.ts @@ -29,6 +29,10 @@ import { getTerminalBaseEnv, resolveLaunchShell, } from "./env.ts"; +import { + createModeTracker, + type ModeTracker, +} from "./terminal-mode-tracker.ts"; /** * Thin adapter exposing approximately the IPty surface that the rest of @@ -231,6 +235,13 @@ interface TerminalSession { * actually broadcast to the renderer. */ portHintDecoder: StringDecoder; + + /** + * Mirrors PTY output through a headless xterm so a reattaching renderer + * can be resynced via a mode preamble — covers kitty keyboard, bracketed + * paste, focus, mouse, etc. that the FIFO can't restore on its own. + */ + modeTracker: ModeTracker; } /** PTY lifetime is independent of socket lifetime — sockets detach/reattach freely. */ @@ -268,6 +279,11 @@ onDaemonDisconnect((err) => { } session.unsubscribeDaemon = null; } + try { + session.modeTracker.dispose(); + } catch { + // best-effort + } } sessions.clear(); }); @@ -289,6 +305,11 @@ export function __resetSessionsForTesting(): void { // best-effort } } + try { + session.modeTracker.dispose(); + } catch { + // best-effort + } } sessions.clear(); } @@ -427,11 +448,22 @@ function broadcastBytes(session: TerminalSession, bytes: Uint8Array): number { } function replayBuffer(session: TerminalSession, socket: TerminalSocket) { - if (session.buffer.length === 0) return; - let total = 0; - for (const b of session.buffer) total += b.byteLength; - const combined = new Uint8Array(total); + // Preamble first, then FIFO. Mode-setting escapes (kitty keyboard, + // bracketed paste, focus, …) are typically emitted once at startup and + // broadcast away rather than buffered, so a fresh xterm needs them + // re-asserted on every attach — even when the FIFO is empty. + const preamble = session.modeTracker.buildPreamble(); + let bufferTotal = 0; + for (const b of session.buffer) bufferTotal += b.byteLength; + const preambleLen = preamble?.byteLength ?? 0; + if (preambleLen === 0 && bufferTotal === 0) return; + + const combined = new Uint8Array(preambleLen + bufferTotal); let offset = 0; + if (preamble) { + combined.set(preamble, offset); + offset += preamble.byteLength; + } for (const b of session.buffer) { combined.set(b, offset); offset += b.byteLength; @@ -457,7 +489,9 @@ function resolveShellReady( } // Flush held marker bytes — they weren't part of a full marker if (session.scanState.heldBytes.length > 0) { - bufferOutput(session, Uint8Array.from(session.scanState.heldBytes)); + const heldBytes = Uint8Array.from(session.scanState.heldBytes); + session.modeTracker.feed(heldBytes); + bufferOutput(session, heldBytes); session.scanState.heldBytes.length = 0; } session.scanState.matchPos = 0; @@ -517,6 +551,11 @@ export function disposeSession(terminalId: string, db: HostDb) { } session.unsubscribeDaemon = null; } + try { + session.modeTracker.dispose(); + } catch { + // best-effort + } sessions.delete(terminalId); } @@ -770,6 +809,7 @@ export async function createTerminalSessionInternal({ // host-service lifetime — flag it as queued so we don't double-fire it. initialCommandQueued: isAdopted, portHintDecoder: new StringDecoder("utf8"), + modeTracker: createModeTracker(cols, rows), }; sessions.set(terminalId, session); portManager.upsertSession(terminalId, workspaceId, pty.pid); @@ -819,6 +859,10 @@ export async function createTerminalSessionInternal({ ); if (hintText.length > 0) portManager.checkOutputForHint(hintText); + // Feed the tracker on every byte — broadcast skips the FIFO, + // so this is the only path that catches startup mode escapes. + session.modeTracker.feed(bytes); + if (broadcastBytes(session, bytes) === 0) { bufferOutput(session, bytes); } @@ -1071,6 +1115,7 @@ export function registerWorkspaceTerminalRoute({ DEFAULT_TERMINAL_ROWS, ); session.pty.resize(cols, rows); + session.modeTracker.resize(cols, rows); } }, diff --git a/plans/20260507-terminal-mode-replay.md b/plans/20260507-terminal-mode-replay.md new file mode 100644 index 00000000000..dfe64b2f08f --- /dev/null +++ b/plans/20260507-terminal-mode-replay.md @@ -0,0 +1,64 @@ +# Terminal mode replay on reattach + +## Bug + +Codex TUI (and other kitty-aware TUIs) lose Shift+Enter after a renderer refresh +mid-conversation. Repro confirmed: refresh renderer → Shift+Enter submits +instead of inserting newline. + +## Why + +- Codex emits `\x1b[>7u` once at startup to enable kitty keyboard protocol. +- xterm.js encodes Shift+Enter as `\x1b[13;2u` only when `kittyKeyboard.flags > 0`. +- Host-service replay buffer is a 64 KiB FIFO (`terminal.ts:159`). Long + conversations evict the early `\x1b[>7u`. +- On reload, fresh `XTerm` (flags = 0) attaches and replay no longer contains + the kitty enable. Codex never re-pushes. +- Same class of bug latent for: bracketed paste, focus, mouse, app cursor, + cursor visibility. + +## Prior art + +| Project | Backend state | Mode replay | Solves it | +|---|---|---|---| +| VSCode | `@xterm/headless` ingesting PTY | `serialize({ excludeModes: true })` — strips modes intentionally; partial DOM-side workarounds | Partially | +| Tabby | none | none | No | +| Wave | 256 KiB FIFO | none | No | +| cmux | scrollback string | none | No | + +VSCode has the right scaffolding (headless xterm on host), wrong policy +(excludes modes from replay). + +## Fix + +Adopt VSCode's scaffolding, fix the policy: + +1. `packages/host-service`: add `@xterm/headless`, feed every PTY chunk to it + alongside the existing FIFO append. +2. In `replayBuffer` (`terminal.ts:429`), build a preamble from headless state + and send before the FIFO bytes: + - kitty: `headless._core.coreService.kittyKeyboard.flags` → `\x1b[=N;1u` + - bracketed paste: `headless.modes.bracketedPasteMode` → `\x1b[?2004h` + - focus: `\x1b[?1004h` + - mouse tracking (x10/vt200/drag/any): `\x1b[?9h` / `?1000h` / `?1002h` / + `?1003h`. Mouse *encoding* (`?1006`/`?1015`/`?1016`) is **not** covered + — xterm.js's public `IModes` doesn't expose it. Clients reattaching + keep the default X10 encoding; revisit if it bites. + - app cursor: `\x1b[?1h` + - cursor visibility: `\x1b[?25h/l` +3. Test: pump >64 KiB of arbitrary output through the writer after a kitty + push, run `replayBuffer`, assert the preamble bytes appear before the FIFO + bytes. + +## Notes + +- `@xterm/headless` is a small npm package, Node-compatible, same engine as + xterm.js so the kitty private accessor matches the renderer. +- Renderer/codex untouched — fix is fully contained in host-service. +- Codex's own self-heal paths (`with_restored`, suspend/resume) already + re-push on demand, so the new preamble doesn't conflict. + +## Out of scope + +- xterm.js / SerializeAddon upstream PR to add a `modes` opt-in. +- Compressing/persisting the FIFO across host restarts.