diff --git a/packages/opencode/src/cleanup/index.ts b/packages/opencode/src/cleanup/index.ts new file mode 100644 index 00000000000..03f6b3c2fc6 --- /dev/null +++ b/packages/opencode/src/cleanup/index.ts @@ -0,0 +1,275 @@ +import fs from "fs/promises" +import path from "path" +import { Log } from "../util/log" +import { Database, and, lt, isNotNull, isNull, eq, inArray } from "../storage/db" +import { SessionTable, MessageTable } from "../session/session.sql" +import { ProjectTable } from "../project/project.sql" +import { Storage } from "../storage/storage" +import { Global } from "../global" +import type { Config } from "../config/config" + +type StorageCategory = "session" | "session_diff" | "message" | "part" | "todo" | "project" | "snapshot" + +const ALL_CATEGORIES: StorageCategory[] = [ + "session", + "session_diff", + "message", + "part", + "todo", + "project", + "snapshot", +] + +export namespace Cleanup { + const log = Log.create({ service: "cleanup" }) + + /** Yield to the event loop so the TUI can render */ + const yieldTick = () => new Promise((r) => setTimeout(r, 0)) + + export function run(config: Config.Info["cleanup"]) { + if (config?.enabled === false) return + // Defer cleanup to avoid competing with TUI startup + setTimeout(() => runDeferred(config), 500) + } + + async function runDeferred(config: Config.Info["cleanup"]) { + log.info("cleanup started") + const sessionsDeleted = await sessionCleanup(config?.session).catch((e) => { + log.error("session cleanup failed", { error: e }) + return 0 + }) + const categories = new Set(config?.storage ?? ALL_CATEGORIES) + const orphansSwept = await sweepOrphanedStorage(categories).catch((e) => { + log.error("orphan sweep failed", { error: e }) + return 0 + }) + await vacuum(config?.vacuum).catch((e) => + log.error("vacuum failed", { error: e }), + ) + log.info("cleanup complete", { + sessions_deleted: sessionsDeleted, + orphans_swept: orphansSwept, + }) + } + + async function sessionCleanup(config: NonNullable["session"]): Promise { + if (!config?.max_age_days) return 0 + const cutoff = Date.now() - config.max_age_days * 86_400_000 + + const conditions = [ + isNotNull(SessionTable.time_updated), + lt(SessionTable.time_updated, cutoff), + isNull(SessionTable.parent_id), // only root sessions; children cascade + ] + if (!config.target || config.target === "archived") { + conditions.push(isNotNull(SessionTable.time_archived)) + } + + const sessions = Database.use((db) => + db + .select({ id: SessionTable.id }) + .from(SessionTable) + .where(and(...conditions)) + .all(), + ) + + if (sessions.length === 0) return 0 + const sessionIDs = sessions.map((s) => s.id) + + // DB first — orphaned storage files are harmless; orphaned DB rows could confuse UI + Database.transaction((db) => { + db.delete(SessionTable).where(inArray(SessionTable.id, sessionIDs)).run() + }) + + // Storage cleanup is best-effort; orphan sweep catches any misses + for (const id of sessionIDs) { + await Storage.remove(["session_diff", id]).catch(() => {}) + } + + log.info("session cleanup", { deleted: sessionIDs.length, target: config.target ?? "archived" }) + return sessionIDs.length + } + + // -- Orphan sweep helpers -- + + /** Get all session IDs that exist in the DB */ + function getSessionIDs(): Set { + const rows = Database.use((db) => + db.select({ id: SessionTable.id }).from(SessionTable).all(), + ) + return new Set(rows.map((r) => r.id)) + } + + /** Get all message IDs that exist in the DB */ + function getMessageIDs(): Set { + const rows = Database.use((db) => + db.select({ id: MessageTable.id }).from(MessageTable).all(), + ) + return new Set(rows.map((r) => r.id)) + } + + /** Get all project IDs that exist in the DB */ + function getProjectIDs(): Set { + const rows = Database.use((db) => + db.select({ id: ProjectTable.id }).from(ProjectTable).all(), + ) + return new Set(rows.map((r) => r.id)) + } + + /** + * Sweep a storage prefix, removing files whose ID (extracted from the key) + * is not in the provided valid set. + */ + async function sweepStoragePrefix( + prefix: string, + validIDs: Set, + idIndex: number, + ): Promise { + let count = 0 + const keys = await Storage.list([prefix]) + for (let i = 0; i < keys.length; i++) { + const id = keys[i][idIndex] + if (!id) continue + if (!validIDs.has(id)) { + await Storage.remove(keys[i]).catch(() => {}) + count++ + // Yield every 100 deletions to avoid blocking the event loop + if (count % 100 === 0) await yieldTick() + } + } + return count + } + + /** Recursively remove empty directories under a path (bottom-up) */ + async function pruneEmptyDirs(dir: string) { + let entries: string[] + try { + entries = await fs.readdir(dir) + } catch { + return + } + for (const entry of entries) { + const full = path.join(dir, entry) + try { + const stat = await fs.stat(full) + if (!stat.isDirectory()) continue + await pruneEmptyDirs(full) + const children = await fs.readdir(full) + if (children.length === 0) { + await fs.rmdir(full).catch(() => {}) + } + } catch { + // entry disappeared between readdir and stat — fine + } + } + } + + async function sweepOrphanedStorage(categories: Set): Promise { + const storageDir = path.join(Global.Path.data, "storage") + let totalSwept = 0 + + // Lazily load ID sets only when needed + let sessionIDs: Set | undefined + let messageIDs: Set | undefined + let projectIDs: Set | undefined + + const ensureSessionIDs = () => (sessionIDs ??= getSessionIDs()) + const ensureMessageIDs = () => (messageIDs ??= getMessageIDs()) + const ensureProjectIDs = () => (projectIDs ??= getProjectIDs()) + + // session_diff: storage/session_diff/.json + if (categories.has("session_diff")) { + const swept = await sweepStoragePrefix("session_diff", ensureSessionIDs(), 1) + if (swept > 0) { + log.info("swept orphaned session_diff files", { count: swept }) + totalSwept += swept + } + } + + // todo: storage/todo/.json + if (categories.has("todo")) { + const swept = await sweepStoragePrefix("todo", ensureSessionIDs(), 1) + if (swept > 0) { + log.info("swept orphaned todo files", { count: swept }) + totalSwept += swept + } + } + + // message: storage/message//.json + if (categories.has("message")) { + const swept = await sweepStoragePrefix("message", ensureSessionIDs(), 1) + if (swept > 0) { + log.info("swept orphaned message files", { count: swept }) + totalSwept += swept + } + } + + // part: storage/part//.json + if (categories.has("part")) { + const swept = await sweepStoragePrefix("part", ensureMessageIDs(), 1) + if (swept > 0) { + log.info("swept orphaned part files", { count: swept }) + totalSwept += swept + } + } + + // session: storage/session//.json + if (categories.has("session")) { + const swept = await sweepStoragePrefix("session", ensureSessionIDs(), 2) + if (swept > 0) { + log.info("swept orphaned session files", { count: swept }) + totalSwept += swept + } + } + + // project: storage/project/.json + if (categories.has("project")) { + const swept = await sweepStoragePrefix("project", ensureProjectIDs(), 1) + if (swept > 0) { + log.info("swept orphaned project files", { count: swept }) + totalSwept += swept + } + } + + // snapshot: ~/.local/share/opencode/snapshot// + if (categories.has("snapshot")) { + const snapshotDir = path.join(Global.Path.data, "snapshot") + let swept = 0 + try { + const pids = ensureProjectIDs() + const entries = await fs.readdir(snapshotDir) + for (const entry of entries) { + if (pids.has(entry)) continue + const full = path.join(snapshotDir, entry) + const stat = await fs.stat(full).catch(() => null) + if (!stat?.isDirectory()) continue + await fs.rm(full, { recursive: true, force: true }).catch(() => {}) + swept++ + } + } catch { + // snapshot dir may not exist — that's fine + } + if (swept > 0) { + log.info("swept orphaned snapshot dirs", { count: swept }) + totalSwept += swept + } + } + + // Prune empty subdirectories across all storage categories + await pruneEmptyDirs(storageDir) + + if (totalSwept > 0) { + log.info("orphan sweep complete", { total: totalSwept }) + } + return totalSwept + } + + async function vacuum(config: { enabled?: boolean } | undefined) { + if (config?.enabled === false) return + const start = Date.now() + const client = Database.Client().$client + client.run("PRAGMA wal_checkpoint(TRUNCATE)") + client.run("VACUUM") + log.info("vacuum complete", { duration: Date.now() - start }) + } +} diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index 6b4242a225a..cc5c618485a 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -1147,6 +1147,44 @@ export namespace Config { .describe("Token buffer for compaction. Leaves enough window to avoid overflow during compaction."), }) .optional(), + cleanup: z + .object({ + enabled: z.boolean().optional().describe("Enable cleanup on startup (default: true)"), + log: z + .object({ + max_count: z + .number() + .int() + .min(1) + .optional() + .describe("Maximum log files to retain (default: 10)"), + }) + .optional(), + session: z + .object({ + max_age_days: z + .number() + .int() + .min(1) + .optional() + .describe("Delete sessions older than N days (disabled by default)"), + target: z + .enum(["archived", "all"]) + .optional() + .describe("Which sessions to consider for cleanup (default: archived)"), + }) + .optional(), + storage: z + .array(z.enum(["session", "session_diff", "message", "part", "todo", "project", "snapshot"])) + .optional() + .describe("Storage categories to sweep for orphaned files (default: all)"), + vacuum: z + .object({ + enabled: z.boolean().optional().describe("Run VACUUM on startup (default: true)"), + }) + .optional(), + }) + .optional(), experimental: z .object({ disable_paste_summary: z.boolean().optional(), diff --git a/packages/opencode/src/index.ts b/packages/opencode/src/index.ts index 4fd5f0e67b3..1dc9cfbd105 100644 --- a/packages/opencode/src/index.ts +++ b/packages/opencode/src/index.ts @@ -33,6 +33,8 @@ import path from "path" import { Global } from "./global" import { JsonMigration } from "./storage/json-migration" import { Database } from "./storage/db" +import { Config } from "./config/config" +import { Cleanup } from "./cleanup" process.on("unhandledRejection", (e) => { Log.Default.error("rejection", { @@ -69,9 +71,12 @@ let cli = yargs(hideBin(process.argv)) choices: ["DEBUG", "INFO", "WARN", "ERROR"], }) .middleware(async (opts) => { + const globalConfig = await Config.global() + await Log.init({ print: process.argv.includes("--print-logs"), dev: Installation.isLocal(), + maxLogFiles: globalConfig?.cleanup?.log?.max_count, level: (() => { if (opts.logLevel) return opts.logLevel as Log.Level if (Installation.isLocal()) return "DEBUG" @@ -124,6 +129,8 @@ let cli = yargs(hideBin(process.argv)) } process.stderr.write("Database migration complete." + EOL) } + + Cleanup.run(globalConfig?.cleanup) }) .usage("\n" + UI.logo()) .completion("completion", "generate shell completion script") diff --git a/packages/opencode/src/storage/storage.ts b/packages/opencode/src/storage/storage.ts index a78ff04f43d..228dfa5d596 100644 --- a/packages/opencode/src/storage/storage.ts +++ b/packages/opencode/src/storage/storage.ts @@ -210,7 +210,7 @@ export namespace Storage { const result = await Glob.scan("**/*", { cwd: path.join(dir, ...prefix), include: "file", - }).then((results) => results.map((x) => [...prefix, ...x.slice(0, -5).split(path.sep)])) + }).then((results) => results.map((x) => [...prefix, ...x.slice(0, -5).split("/")])) result.sort() return result } catch { diff --git a/packages/opencode/src/util/log.ts b/packages/opencode/src/util/log.ts index 2ca4c0a3de3..bea9aedefca 100644 --- a/packages/opencode/src/util/log.ts +++ b/packages/opencode/src/util/log.ts @@ -46,6 +46,7 @@ export namespace Log { print: boolean dev?: boolean level?: Level + maxLogFiles?: number } let logpath = "" @@ -59,7 +60,7 @@ export namespace Log { export async function init(options: Options) { if (options.level) level = options.level - cleanup(Global.Path.log) + cleanup(Global.Path.log, options.maxLogFiles) if (options.print) return logpath = path.join( Global.Path.log, @@ -77,15 +78,15 @@ export namespace Log { } } - async function cleanup(dir: string) { + async function cleanup(dir: string, maxCount = 10) { const files = await Glob.scan("????-??-??T??????.log", { cwd: dir, absolute: true, include: "file", }) - if (files.length <= 5) return - - const filesToDelete = files.slice(0, -10) + if (files.length <= maxCount) return + files.sort() + const filesToDelete = files.slice(0, -maxCount) await Promise.all(filesToDelete.map((file) => fs.unlink(file).catch(() => {}))) } diff --git a/packages/web/src/content/docs/config.mdx b/packages/web/src/content/docs/config.mdx index 038f253274e..4487255dbba 100644 --- a/packages/web/src/content/docs/config.mdx +++ b/packages/web/src/content/docs/config.mdx @@ -505,6 +505,41 @@ You can control context compaction behavior through the `compaction` option. --- +### Cleanup + +You can control startup cleanup behavior through the `cleanup` option. OpenCode runs cleanup on every startup to manage old sessions, orphaned storage files, and database maintenance. + +```json title="opencode.json" +{ + "$schema": "https://opencode.ai/config.json", + "cleanup": { + "enabled": true, + "log": { + "max_count": 10 + }, + "session": { + "max_age_days": 90, + "target": "archived" + }, + "storage": ["session", "session_diff", "message", "part", "todo", "project", "snapshot"], + "vacuum": { + "enabled": true + } + } +} +``` + +- `enabled` - Enable cleanup on startup (default: `true`). +- `log.max_count` - Maximum log files to retain (default: `10`). +- `session.max_age_days` - Delete sessions older than N days based on last modification time (disabled by default). +- `session.target` - Which sessions to consider: `"archived"` (only archived sessions) or `"all"` (default: `"archived"`). +- `storage` - Storage categories to sweep for orphaned files (default: all categories). Categories: `session`, `session_diff`, `message`, `part`, `todo`, `project`, `snapshot`. +- `vacuum.enabled` - Run SQLite VACUUM on startup (default: `true`). + +Session cleanup is opt-in — it only runs when `max_age_days` is explicitly set. The orphan sweep and vacuum run by default and clean up storage files that no longer have corresponding database records. + +--- + ### Watcher You can configure file watcher ignore patterns through the `watcher` option.