From fdd58c02f4a0174f20493cda8f70f4dde6bff7cb Mon Sep 17 00:00:00 2001 From: Satya Patel Date: Sat, 6 Jun 2026 15:47:43 -0700 Subject: [PATCH 1/6] fix(host-service): mark terminal session disposed only after confirmed kill A failed daemon close left the sqlite row marked disposed while the PTY stayed alive, so workspace-scoped cleanup could never find it again. Mark the row disposed only once the kill is confirmed; failed kills stay active and reapable. --- packages/host-service/src/terminal/terminal.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/packages/host-service/src/terminal/terminal.ts b/packages/host-service/src/terminal/terminal.ts index ac481a807f..b4470b44cb 100644 --- a/packages/host-service/src/terminal/terminal.ts +++ b/packages/host-service/src/terminal/terminal.ts @@ -733,14 +733,17 @@ export async function disposeSessionAndWait( portManager.unregisterSession(terminalId); - db.update(terminalSessions) - .set({ status: "disposed", endedAt: Date.now() }) - .where(eq(terminalSessions.id, terminalId)) - .run(); - const closeResult = closePromise ? await closePromise : { attempted: false, succeeded: true }; + + if (closeResult.succeeded) { + db.update(terminalSessions) + .set({ status: "disposed", endedAt: Date.now() }) + .where(eq(terminalSessions.id, terminalId)) + .run(); + } + return { terminalId, daemonCloseAttempted: closeResult.attempted, From 93c5f850c1ef585ac23474ec20b5322c8fbe3c62 Mon Sep 17 00:00:00 2001 From: Satya Patel Date: Sat, 6 Jun 2026 15:47:44 -0700 Subject: [PATCH 2/6] fix(host-service): clear dead terminal rows when destroying a workspace Delete the workspace's confirmed-dead terminal rows in the destroy saga so its session index dies with it instead of lingering as set-null orphans. Still-active rows (failed kills) are kept reachable for the reaper. --- .../workspace-cleanup/workspace-cleanup.ts | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/packages/host-service/src/trpc/router/workspace-cleanup/workspace-cleanup.ts b/packages/host-service/src/trpc/router/workspace-cleanup/workspace-cleanup.ts index 191c394eb0..a611c5d2ae 100644 --- a/packages/host-service/src/trpc/router/workspace-cleanup/workspace-cleanup.ts +++ b/packages/host-service/src/trpc/router/workspace-cleanup/workspace-cleanup.ts @@ -1,8 +1,8 @@ import { existsSync } from "node:fs"; import { TRPCError } from "@trpc/server"; -import { eq } from "drizzle-orm"; +import { and, eq, ne } from "drizzle-orm"; import { z } from "zod"; -import { workspaces } from "../../../db/schema"; +import { terminalSessions, workspaces } from "../../../db/schema"; import { invalidateLabelCache } from "../../../ports/static-ports"; import { runTeardown, type TeardownResult } from "../../../runtime/teardown"; import { disposeSessionsByWorkspaceId } from "../../../terminal/terminal"; @@ -276,6 +276,24 @@ async function runDestroy( warnings.push(`Failed to dispose terminal sessions: ${message}`); } + // Drop this workspace's terminal rows so its session index dies with it + // rather than lingering as `set null` orphans. Confirmed-dead rows only: + // a still-`active` row is a failed kill we keep reachable for the reaper. + try { + ctx.db + .delete(terminalSessions) + .where( + and( + eq(terminalSessions.originWorkspaceId, input.workspaceId), + ne(terminalSessions.status, "active"), + ), + ) + .run(); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + warnings.push(`Failed to clear terminal session rows: ${message}`); + } + // 2b. Worktree. Double-force unlocks the rare locked-worktree case and // clears stale metadata when the directory was manually removed. let worktreeRemoved = false; From 895f21446f2e2f66e89480f76297d7930c29c035 Mon Sep 17 00:00:00 2001 From: Satya Patel Date: Sat, 6 Jun 2026 15:47:44 -0700 Subject: [PATCH 3/6] feat(host-service): reap orphaned daemon PTYs on boot and on an interval The pty-daemon has no workspace mapping, so a PTY orphaned past workspace deletion can only be recovered from the daemon's live-session list. Add a reaper that kills daemon sessions whose row is dead/null-workspace, with a two-pass guard so a being-born session is never killed mid-creation. Runs at startup (drains pre-existing orphans) and every 5 minutes. --- packages/host-service/src/app.ts | 8 ++ .../host-service/src/terminal/reaper/index.ts | 5 + .../src/terminal/reaper/reaper.ts | 92 +++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 packages/host-service/src/terminal/reaper/index.ts create mode 100644 packages/host-service/src/terminal/reaper/reaper.ts diff --git a/packages/host-service/src/app.ts b/packages/host-service/src/app.ts index 0edf155063..d0cc73709f 100644 --- a/packages/host-service/src/app.ts +++ b/packages/host-service/src/app.ts @@ -17,6 +17,7 @@ import type { GitCredentialProvider } from "./runtime/git"; import { createGitFactory } from "./runtime/git"; import { runMainWorkspaceSweep } from "./runtime/main-workspace-sweep"; import { PullRequestRuntimeManager } from "./runtime/pull-requests"; +import { startTerminalReaper } from "./terminal/reaper"; import { registerWorkspaceTerminalRoute } from "./terminal/terminal"; import { TerminalAgentStore } from "./terminal-agents"; import { appRouter } from "./trpc/router"; @@ -162,6 +163,8 @@ export function createApp(options: CreateAppOptions): CreateAppResult { upgradeWebSocket, }); + const stopTerminalReaper = startTerminalReaper(db); + app.use( "/trpc/*", trpcServer({ @@ -189,6 +192,11 @@ export function createApp(options: CreateAppOptions): CreateAppResult { // Each step is best-effort and isolated: a throw in one cleanup must // not skip the others, otherwise a flaky `.stop()` could leak the // open SQLite handle for the rest of the process lifetime. + try { + stopTerminalReaper(); + } catch (err) { + console.warn("[host-service] stopTerminalReaper failed:", err); + } try { pullRequestRuntime.stop(); } catch (err) { diff --git a/packages/host-service/src/terminal/reaper/index.ts b/packages/host-service/src/terminal/reaper/index.ts new file mode 100644 index 0000000000..5ad4dfd303 --- /dev/null +++ b/packages/host-service/src/terminal/reaper/index.ts @@ -0,0 +1,5 @@ +export { + type ReapResult, + reapOrphanedSessions, + startTerminalReaper, +} from "./reaper.ts"; diff --git a/packages/host-service/src/terminal/reaper/reaper.ts b/packages/host-service/src/terminal/reaper/reaper.ts new file mode 100644 index 0000000000..d2b441c4fb --- /dev/null +++ b/packages/host-service/src/terminal/reaper/reaper.ts @@ -0,0 +1,92 @@ +import type { HostDb } from "../../db/index.ts"; +import { terminalSessions } from "../../db/schema.ts"; +import { getDaemonClient } from "../daemon-client-singleton.ts"; +import { disposeSessionAndWait } from "../terminal.ts"; + +export interface ReapResult { + reaped: number; + failed: number; +} + +const REAP_INTERVAL_MS = 5 * 60 * 1000; + +const rowlessSessionsPendingSecondPass = new Set(); + +export function startTerminalReaper(db: HostDb): () => void { + const run = () => { + void reapOrphanedSessions(db) + .then((result) => { + if (result.reaped > 0 || result.failed > 0) { + console.log( + `[host-service] terminal reaper: ${result.reaped} reaped, ${result.failed} failed`, + ); + } + }) + .catch((err) => { + console.warn("[host-service] terminal reaper failed:", err); + }); + }; + run(); + const interval = setInterval(run, REAP_INTERVAL_MS); + interval.unref(); + return () => clearInterval(interval); +} + +export async function reapOrphanedSessions(db: HostDb): Promise { + const daemon = await getDaemonClient(); + const liveSessions = (await daemon.list()).filter((session) => session.alive); + const liveIds = new Set(liveSessions.map((session) => session.id)); + + for (const id of rowlessSessionsPendingSecondPass) { + if (!liveIds.has(id)) rowlessSessionsPendingSecondPass.delete(id); + } + + if (liveSessions.length === 0) return { reaped: 0, failed: 0 }; + + const rows = db + .select({ + id: terminalSessions.id, + status: terminalSessions.status, + originWorkspaceId: terminalSessions.originWorkspaceId, + }) + .from(terminalSessions) + .all(); + const rowById = new Map(rows.map((row) => [row.id, row])); + + const orphanIds: string[] = []; + const stillRowless = new Set(); + for (const session of liveSessions) { + const row = rowById.get(session.id); + if (!row) { + if (rowlessSessionsPendingSecondPass.has(session.id)) { + orphanIds.push(session.id); + } else { + stillRowless.add(session.id); + } + continue; + } + if ( + row.status === "disposed" || + row.status === "exited" || + !row.originWorkspaceId + ) { + orphanIds.push(session.id); + } + } + + rowlessSessionsPendingSecondPass.clear(); + for (const id of stillRowless) rowlessSessionsPendingSecondPass.add(id); + + let reaped = 0; + let failed = 0; + for (const id of orphanIds) { + try { + const result = await disposeSessionAndWait(id, db); + if (result.daemonCloseSucceeded) reaped += 1; + else failed += 1; + } catch { + failed += 1; + } + } + return { reaped, failed }; +} From 36d8d3540d6e87407128a195dfc9f231a1cd9611 Mon Sep 17 00:00:00 2001 From: Satya Patel Date: Sat, 6 Jun 2026 17:06:38 -0700 Subject: [PATCH 4/6] fix(host-service): start terminal reaper from the serve entry, not createApp Starting the reaper inside createApp made it eagerly connect the pty-daemon client at construction time, which raced integration tests that configure a custom daemon socket after building the app (teardown terminals never spawned -> timeouts). Move it to the serve listen callback, alongside connectRelay, so it only runs in the real host process and after daemon bootstrap. Expose db on CreateAppResult to wire it. --- packages/host-service/src/app.ts | 11 ++--------- packages/host-service/src/serve.ts | 5 ++++- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/packages/host-service/src/app.ts b/packages/host-service/src/app.ts index d0cc73709f..c91b9118cb 100644 --- a/packages/host-service/src/app.ts +++ b/packages/host-service/src/app.ts @@ -17,7 +17,6 @@ import type { GitCredentialProvider } from "./runtime/git"; import { createGitFactory } from "./runtime/git"; import { runMainWorkspaceSweep } from "./runtime/main-workspace-sweep"; import { PullRequestRuntimeManager } from "./runtime/pull-requests"; -import { startTerminalReaper } from "./terminal/reaper"; import { registerWorkspaceTerminalRoute } from "./terminal/terminal"; import { TerminalAgentStore } from "./terminal-agents"; import { appRouter } from "./trpc/router"; @@ -61,6 +60,7 @@ export interface CreateAppResult { app: Hono; injectWebSocket: ReturnType["injectWebSocket"]; api: ApiClient; + db: HostDb; dispose: () => Promise; } @@ -163,8 +163,6 @@ export function createApp(options: CreateAppOptions): CreateAppResult { upgradeWebSocket, }); - const stopTerminalReaper = startTerminalReaper(db); - app.use( "/trpc/*", trpcServer({ @@ -192,11 +190,6 @@ export function createApp(options: CreateAppOptions): CreateAppResult { // Each step is best-effort and isolated: a throw in one cleanup must // not skip the others, otherwise a flaky `.stop()` could leak the // open SQLite handle for the rest of the process lifetime. - try { - stopTerminalReaper(); - } catch (err) { - console.warn("[host-service] stopTerminalReaper failed:", err); - } try { pullRequestRuntime.stop(); } catch (err) { @@ -221,5 +214,5 @@ export function createApp(options: CreateAppOptions): CreateAppResult { } }; - return { app, injectWebSocket, api, dispose }; + return { app, injectWebSocket, api, db, dispose }; } diff --git a/packages/host-service/src/serve.ts b/packages/host-service/src/serve.ts index 28e7ea1527..26943737fc 100644 --- a/packages/host-service/src/serve.ts +++ b/packages/host-service/src/serve.ts @@ -11,6 +11,7 @@ import { PskHostAuthProvider } from "./providers/host-auth"; import { LocalModelProvider } from "./providers/model-providers"; import { installProcessSafetyNet } from "./safety"; import { initTerminalBaseEnv, resolveTerminalBaseEnv } from "./terminal/env"; +import { startTerminalReaper } from "./terminal/reaper"; import { connectRelay } from "./tunnel"; async function main(): Promise { @@ -45,7 +46,7 @@ async function main(): Promise { apiUrl: env.SUPERSET_API_URL, }); - const { app, injectWebSocket, api } = createApp({ + const { app, injectWebSocket, api, db } = createApp({ config: { organizationId: env.ORGANIZATION_ID, dbPath: env.HOST_DB_PATH, @@ -95,6 +96,8 @@ async function main(): Promise { installProcessSafetyNet(); console.log(`[host-service] listening on http://localhost:${info.port}`); + startTerminalReaper(db); + if (env.RELAY_URL) { void connectRelay({ api, From 34838d0422c976d7bb491b00616600fd4555e9cd Mon Sep 17 00:00:00 2001 From: Satya Patel Date: Sat, 6 Jun 2026 17:06:38 -0700 Subject: [PATCH 5/6] fix(host-service): guard overlapping reaper passes and scope pending state - Skip a pass when one is already in flight, so setInterval can't interleave two passes racing on the pending-second-pass set. - Move that set into the startTerminalReaper closure so it can't leak across instances/restarts. - Re-queue a failed second-pass rowless kill so it retries on the next pass instead of restarting its two-pass clock. --- .../host-service/src/terminal/reaper/index.ts | 6 +- .../src/terminal/reaper/reaper.ts | 97 +++++++++++-------- 2 files changed, 56 insertions(+), 47 deletions(-) diff --git a/packages/host-service/src/terminal/reaper/index.ts b/packages/host-service/src/terminal/reaper/index.ts index 5ad4dfd303..ccf4e5921d 100644 --- a/packages/host-service/src/terminal/reaper/index.ts +++ b/packages/host-service/src/terminal/reaper/index.ts @@ -1,5 +1 @@ -export { - type ReapResult, - reapOrphanedSessions, - startTerminalReaper, -} from "./reaper.ts"; +export { startTerminalReaper } from "./reaper.ts"; diff --git a/packages/host-service/src/terminal/reaper/reaper.ts b/packages/host-service/src/terminal/reaper/reaper.ts index d2b441c4fb..67d51fbf29 100644 --- a/packages/host-service/src/terminal/reaper/reaper.ts +++ b/packages/host-service/src/terminal/reaper/reaper.ts @@ -3,46 +3,24 @@ import { terminalSessions } from "../../db/schema.ts"; import { getDaemonClient } from "../daemon-client-singleton.ts"; import { disposeSessionAndWait } from "../terminal.ts"; -export interface ReapResult { +interface ReapResult { reaped: number; failed: number; } const REAP_INTERVAL_MS = 5 * 60 * 1000; -const rowlessSessionsPendingSecondPass = new Set(); - -export function startTerminalReaper(db: HostDb): () => void { - const run = () => { - void reapOrphanedSessions(db) - .then((result) => { - if (result.reaped > 0 || result.failed > 0) { - console.log( - `[host-service] terminal reaper: ${result.reaped} reaped, ${result.failed} failed`, - ); - } - }) - .catch((err) => { - console.warn("[host-service] terminal reaper failed:", err); - }); - }; - run(); - const interval = setInterval(run, REAP_INTERVAL_MS); - interval.unref(); - return () => clearInterval(interval); -} - -export async function reapOrphanedSessions(db: HostDb): Promise { +async function reapOrphanedSessions( + db: HostDb, + rowlessPendingSecondPass: Set, +): Promise { const daemon = await getDaemonClient(); const liveSessions = (await daemon.list()).filter((session) => session.alive); - const liveIds = new Set(liveSessions.map((session) => session.id)); - - for (const id of rowlessSessionsPendingSecondPass) { - if (!liveIds.has(id)) rowlessSessionsPendingSecondPass.delete(id); + if (liveSessions.length === 0) { + rowlessPendingSecondPass.clear(); + return { reaped: 0, failed: 0 }; } - if (liveSessions.length === 0) return { reaped: 0, failed: 0 }; - const rows = db .select({ id: terminalSessions.id, @@ -53,13 +31,13 @@ export async function reapOrphanedSessions(db: HostDb): Promise { .all(); const rowById = new Map(rows.map((row) => [row.id, row])); - const orphanIds: string[] = []; + const orphans: { id: string; rowless: boolean }[] = []; const stillRowless = new Set(); for (const session of liveSessions) { const row = rowById.get(session.id); if (!row) { - if (rowlessSessionsPendingSecondPass.has(session.id)) { - orphanIds.push(session.id); + if (rowlessPendingSecondPass.has(session.id)) { + orphans.push({ id: session.id, rowless: true }); } else { stillRowless.add(session.id); } @@ -70,23 +48,58 @@ export async function reapOrphanedSessions(db: HostDb): Promise { row.status === "exited" || !row.originWorkspaceId ) { - orphanIds.push(session.id); + orphans.push({ id: session.id, rowless: false }); } } - rowlessSessionsPendingSecondPass.clear(); - for (const id of stillRowless) rowlessSessionsPendingSecondPass.add(id); - let reaped = 0; let failed = 0; - for (const id of orphanIds) { + for (const orphan of orphans) { try { - const result = await disposeSessionAndWait(id, db); - if (result.daemonCloseSucceeded) reaped += 1; - else failed += 1; + const result = await disposeSessionAndWait(orphan.id, db); + if (result.daemonCloseSucceeded) { + reaped += 1; + continue; + } } catch { - failed += 1; + // fall through to the failure path below } + failed += 1; + // A failed kill on a confirmed (second-pass) rowless orphan is kept + // pending so the next pass retries it instead of restarting its + // two-pass clock. + if (orphan.rowless) stillRowless.add(orphan.id); } + + rowlessPendingSecondPass.clear(); + for (const id of stillRowless) rowlessPendingSecondPass.add(id); + return { reaped, failed }; } + +export function startTerminalReaper(db: HostDb): () => void { + const rowlessPendingSecondPass = new Set(); + let running = false; + const run = () => { + if (running) return; + running = true; + void reapOrphanedSessions(db, rowlessPendingSecondPass) + .then((result) => { + if (result.reaped > 0 || result.failed > 0) { + console.log( + `[host-service] terminal reaper: ${result.reaped} reaped, ${result.failed} failed`, + ); + } + }) + .catch((err) => { + console.warn("[host-service] terminal reaper failed:", err); + }) + .finally(() => { + running = false; + }); + }; + run(); + const interval = setInterval(run, REAP_INTERVAL_MS); + interval.unref(); + return () => clearInterval(interval); +} From cdb508543f5b3a205e655502cb64383dd1a291f6 Mon Sep 17 00:00:00 2001 From: Satya Patel Date: Sat, 6 Jun 2026 17:06:38 -0700 Subject: [PATCH 6/6] fix(host-service): include workspace id in terminal-row cleanup warning --- .../src/trpc/router/workspace-cleanup/workspace-cleanup.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/host-service/src/trpc/router/workspace-cleanup/workspace-cleanup.ts b/packages/host-service/src/trpc/router/workspace-cleanup/workspace-cleanup.ts index a611c5d2ae..88a4f270dc 100644 --- a/packages/host-service/src/trpc/router/workspace-cleanup/workspace-cleanup.ts +++ b/packages/host-service/src/trpc/router/workspace-cleanup/workspace-cleanup.ts @@ -291,7 +291,9 @@ async function runDestroy( .run(); } catch (err) { const message = err instanceof Error ? err.message : String(err); - warnings.push(`Failed to clear terminal session rows: ${message}`); + warnings.push( + `Failed to clear terminal session rows for ${input.workspaceId}: ${message}`, + ); } // 2b. Worktree. Double-force unlocks the rare locked-worktree case and