From 17459e48d99b4f67efdb5ea061ddfe9345241a13 Mon Sep 17 00:00:00 2001 From: "vellum-apollo-bot[bot]" <242025090+vellum-apollo-bot[bot]@users.noreply.github.com> Date: Fri, 29 May 2026 21:49:59 +0000 Subject: [PATCH] feat(cli): add `assistant db repair` with integrity-check step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a step-runner framework for the repair surface so future remediation passes (conversation backfill, memory consolidation, etc.) can be appended without restructuring the command. Each step produces a structured `StepResult` and the runner aggregates them into a `RepairReport` that renders as plain text or as JSON via `--json`. First step: `integrity-check` — runs `PRAGMA integrity_check` on a read-only handle. Full scan (not quick_check) because a user typing `repair` is opting in to a thorough probe. Healthy DBs report `ok` + page count. Damaged DBs surface the integrity_check rows verbatim, capped at 20 lines in human output (full list in --json). Severely-malformed DBs whose pragma throws before yielding rows are normalized into the same corruption signal, not flagged as a step bug. Also drops the "(read-only by default)" qualifier from the parent `db` description per review feedback on #32606 — no flag exists to flip the default, so the qualifier had no referent. Gateway risk registry: `db repair` registered as medium. First step is read-only; later steps will mutate. Smoke-tested on the live ~4 GB workspace DB (993 829 pages, 5m44s, no corruption). 11 unit tests pass covering healthy DB, two corrupt seed shapes, missing DB, --json shape, and the four runner semantics (sequential order, continue-on-error, halt-on-error, throw capture). --- .../cli/commands/db/__tests__/repair.test.ts | 355 ++++++++++++++++++ assistant/src/cli/commands/db/index.ts | 14 +- .../cli/commands/db/repair-step-integrity.ts | 128 +++++++ assistant/src/cli/commands/db/repair-steps.ts | 177 +++++++++ assistant/src/cli/commands/db/repair.ts | 145 +++++++ assistant/src/cli/lib/cli-colors.ts | 30 +- .../command-registry/commands/assistant.ts | 7 + 7 files changed, 844 insertions(+), 12 deletions(-) create mode 100644 assistant/src/cli/commands/db/__tests__/repair.test.ts create mode 100644 assistant/src/cli/commands/db/repair-step-integrity.ts create mode 100644 assistant/src/cli/commands/db/repair-steps.ts create mode 100644 assistant/src/cli/commands/db/repair.ts diff --git a/assistant/src/cli/commands/db/__tests__/repair.test.ts b/assistant/src/cli/commands/db/__tests__/repair.test.ts new file mode 100644 index 00000000000..88103de903d --- /dev/null +++ b/assistant/src/cli/commands/db/__tests__/repair.test.ts @@ -0,0 +1,355 @@ +/** + * Tests for `assistant db repair`. + * + * Covers: + * - integrity check passes on a healthy DB (happy path) + * - integrity check reports errors on a deliberately corrupted DB + * - missing DB exits 1 with a loud error + * - --json shape contains step results + * - the step framework itself (continue-on-error, halt-on-error, throwing + * step captured as a synthetic error result) + * + * Uses real bun:sqlite databases in tmp dirs; the integrity check needs to + * walk actual pages, so mocking would defeat the point. + */ + +import { + closeSync, + mkdirSync, + mkdtempSync, + openSync, + rmSync, + writeSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { Database } from "bun:sqlite"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; + +import type { RepairStep } from "../repair-steps.js"; +import { runRepairSteps } from "../repair-steps.js"; + +// --------------------------------------------------------------------------- +// Workspace setup +// --------------------------------------------------------------------------- + +let workspaceDir: string; +let dbPath: string; +let originalWorkspaceEnv: string | undefined; + +beforeEach(() => { + workspaceDir = mkdtempSync(join(tmpdir(), "db-repair-test-")); + mkdirSync(join(workspaceDir, "data", "db"), { recursive: true }); + dbPath = join(workspaceDir, "data", "db", "assistant.db"); + originalWorkspaceEnv = process.env.VELLUM_WORKSPACE_DIR; + process.env.VELLUM_WORKSPACE_DIR = workspaceDir; +}); + +afterEach(() => { + if (originalWorkspaceEnv === undefined) { + delete process.env.VELLUM_WORKSPACE_DIR; + } else { + process.env.VELLUM_WORKSPACE_DIR = originalWorkspaceEnv; + } + rmSync(workspaceDir, { recursive: true, force: true }); +}); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function seedHealthyDb(opts: { walMode?: boolean } = {}): void { + const db = new Database(dbPath); + try { + // Default to WAL to match production. The corrupt-DB seed disables + // WAL so it can trample the main file directly (otherwise data lives + // in the -wal file and the main file is just a 1-page header). + if (opts.walMode ?? true) { + db.exec("PRAGMA journal_mode=WAL"); + } else { + db.exec("PRAGMA journal_mode=DELETE"); + } + db.exec(` + CREATE TABLE conversations (id TEXT PRIMARY KEY, title TEXT); + CREATE TABLE messages (id TEXT PRIMARY KEY, content TEXT); + `); + const ins = db.prepare( + "INSERT INTO conversations (id, title) VALUES (?, ?)", + ); + for (let i = 0; i < 50; i++) ins.run(`c-${i}`, `t-${i}`); + } finally { + db.close(); + } +} + +/** + * Build a structurally invalid SQLite file by writing junk bytes over a + * b-tree page in an otherwise-formed DB. PRAGMA integrity_check rejects + * this with concrete error rows. + * + * Uses rollback-journal mode (not WAL) so all data lives in the main + * file — otherwise the main file is just a 1-page header and our writes + * land in unused space the integrity check doesn't validate. + */ +function seedCorruptDb(): void { + seedHealthyDb({ walMode: false }); + const fd = openSync(dbPath, "r+"); + try { + const junk = Buffer.alloc(32 * 1024, 0xff); + // Start at page 2 (header is page 1); the data b-tree pages live in + // the next few pages of a small healthy DB. + writeSync(fd, junk, 0, junk.length, 1 * 4096); + } finally { + closeSync(fd); + } +} + +async function runRepair(args: string[]): Promise<{ + stdout: string; + stderr: string; + exitCode: number; +}> { + const stdoutChunks: string[] = []; + const stderrChunks: string[] = []; + let exitCode = 0; + + const origOut = process.stdout.write.bind(process.stdout); + const origErr = process.stderr.write.bind(process.stderr); + const origExit = process.exit; + + process.stdout.write = ((chunk: string | Uint8Array) => { + stdoutChunks.push( + typeof chunk === "string" ? chunk : new TextDecoder().decode(chunk), + ); + return true; + }) as typeof process.stdout.write; + process.stderr.write = ((chunk: string | Uint8Array) => { + stderrChunks.push( + typeof chunk === "string" ? chunk : new TextDecoder().decode(chunk), + ); + return true; + }) as typeof process.stderr.write; + process.exit = ((code?: number) => { + exitCode = code ?? 0; + throw new Error("__test_exit__"); + }) as typeof process.exit; + + try { + const { Command } = await import("commander"); + const { registerDbCommand } = await import("../index.js"); + const program = new Command(); + program.exitOverride(); + registerDbCommand(program); + try { + await program.parseAsync(["node", "assistant", "db", ...args]); + } catch (e) { + if ((e as Error).message !== "__test_exit__") throw e; + } + } finally { + process.stdout.write = origOut; + process.stderr.write = origErr; + process.exit = origExit; + } + return { + stdout: stdoutChunks.join(""), + stderr: stderrChunks.join(""), + exitCode, + }; +} + +// --------------------------------------------------------------------------- +// Integrity check — DB level +// --------------------------------------------------------------------------- + +describe("assistant db repair — healthy DB", () => { + test("integrity check passes and exits 0", async () => { + seedHealthyDb(); + const { stdout, exitCode } = await runRepair(["repair"]); + expect(exitCode).toBe(0); + expect(stdout).toContain("integrity-check"); + expect(stdout).toContain("ok"); + expect(stdout).toContain("no corruption detected"); + expect(stdout).toMatch(/Done\. 1 step ran: 1 ok, 0 failed/); + }); + + test("--json emits a structured report with the step result", async () => { + seedHealthyDb(); + const { stdout, exitCode } = await runRepair(["--json", "repair"]); + expect(exitCode).toBe(0); + const parsed = JSON.parse(stdout); + expect(parsed.dbPath).toBe(dbPath); + expect(parsed.steps).toHaveLength(1); + expect(parsed.steps[0].name).toBe("integrity-check"); + expect(parsed.steps[0].result.status).toBe("ok"); + expect(parsed.steps[0].result.data.errorCount).toBe(0); + expect(typeof parsed.steps[0].result.durationMs).toBe("number"); + expect(parsed.okCount).toBe(1); + expect(parsed.errorCount).toBe(0); + }); +}); + +describe("assistant db repair — corrupt DB", () => { + test("integrity check surfaces corruption and exits 1", async () => { + seedCorruptDb(); + const { stdout, exitCode } = await runRepair(["repair"]); + expect(exitCode).toBe(1); + expect(stdout).toContain("integrity-check"); + expect(stdout).toContain("error"); + // The seed produces a severely-corrupt DB where PRAGMA integrity_check + // itself throws "database disk image is malformed" before yielding any + // rows. The step normalizes that into a structured corruption signal + // rather than letting the runner mark it as a synthetic bug. + expect(stdout).toMatch( + /(integrity violation|database is too corrupt|database disk image is malformed)/, + ); + expect(stdout).not.toContain("this is a bug"); + expect(stdout).toMatch(/Done\. 1 step ran: 0 ok, 1 failed/); + }); + + test("--json carries the full error list", async () => { + seedCorruptDb(); + const { stdout, exitCode } = await runRepair(["--json", "repair"]); + expect(exitCode).toBe(1); + const parsed = JSON.parse(stdout); + expect(parsed.steps[0].result.status).toBe("error"); + expect(parsed.steps[0].result.data).toBeDefined(); + expect(Array.isArray(parsed.steps[0].result.data.errors)).toBe(true); + expect(parsed.steps[0].result.data.errors.length).toBeGreaterThan(0); + expect(parsed.errorCount).toBe(1); + }); +}); + +describe("assistant db repair — DB missing", () => { + test("exits 1 with a loud error", async () => { + // No seed + const { stdout, stderr, exitCode } = await runRepair(["repair"]); + expect(exitCode).toBe(1); + expect(stdout).toBe(""); + expect(stderr).toContain("ERROR"); + expect(stderr).toContain("Database not found"); + expect(stderr).toContain(dbPath); + }); + + test("--json missing DB emits structured payload, exits 1", async () => { + const { stdout, exitCode } = await runRepair(["--json", "repair"]); + expect(exitCode).toBe(1); + const parsed = JSON.parse(stdout); + expect(parsed.missing).toBe(true); + expect(parsed.dbPath).toBe(dbPath); + expect(parsed.steps).toHaveLength(0); + }); +}); + +// --------------------------------------------------------------------------- +// Step framework — runner semantics +// --------------------------------------------------------------------------- + +describe("repair step runner", () => { + test("runs steps sequentially in declared order", async () => { + const calls: string[] = []; + const steps: RepairStep[] = [ + { + name: "a", + description: "first", + run: async () => { + calls.push("a"); + return { status: "ok", summary: "" }; + }, + }, + { + name: "b", + description: "second", + run: async () => { + calls.push("b"); + return { status: "ok", summary: "" }; + }, + }, + ]; + const report = await runRepairSteps({ dbPath }, steps); + expect(calls).toEqual(["a", "b"]); + expect(report.okCount).toBe(2); + expect(report.errorCount).toBe(0); + }); + + test("continues to the next step on non-halting failure", async () => { + const calls: string[] = []; + const steps: RepairStep[] = [ + { + name: "broken", + description: "fails but does not halt", + run: async () => { + calls.push("broken"); + return { status: "error", summary: "boom" }; + }, + }, + { + name: "later", + description: "still runs", + run: async () => { + calls.push("later"); + return { status: "ok", summary: "" }; + }, + }, + ]; + const report = await runRepairSteps({ dbPath }, steps); + expect(calls).toEqual(["broken", "later"]); + expect(report.errorCount).toBe(1); + expect(report.okCount).toBe(1); + expect(report.halted).toBe(false); + }); + + test("stops the sequence when a step reports halt: true", async () => { + const calls: string[] = []; + const steps: RepairStep[] = [ + { + name: "fatal", + description: "halts", + run: async () => { + calls.push("fatal"); + return { status: "error", summary: "stop now", halt: true }; + }, + }, + { + name: "skipped", + description: "never runs", + run: async () => { + calls.push("skipped"); + return { status: "ok", summary: "" }; + }, + }, + ]; + const report = await runRepairSteps({ dbPath }, steps); + expect(calls).toEqual(["fatal"]); + expect(report.halted).toBe(true); + expect(report.steps).toHaveLength(1); + }); + + test("captures thrown errors as synthetic error results", async () => { + const steps: RepairStep[] = [ + { + name: "thrower", + description: "throws unexpectedly", + run: async () => { + throw new Error("unhandled"); + }, + }, + ]; + const report = await runRepairSteps({ dbPath }, steps); + expect(report.errorCount).toBe(1); + expect(report.steps[0].result.status).toBe("error"); + const detail = report.steps[0].result.detailLines ?? []; + expect(detail.join(" ")).toContain("bug"); + }); + + test("records non-zero durationMs for each step", async () => { + const steps: RepairStep[] = [ + { + name: "timed", + description: "noop", + run: async () => ({ status: "ok", summary: "" }), + }, + ]; + const report = await runRepairSteps({ dbPath }, steps); + expect(report.steps[0].result.durationMs).toBeGreaterThanOrEqual(0); + }); +}); diff --git a/assistant/src/cli/commands/db/index.ts b/assistant/src/cli/commands/db/index.ts index ae2f43a72cf..8f9f0142acd 100644 --- a/assistant/src/cli/commands/db/index.ts +++ b/assistant/src/cli/commands/db/index.ts @@ -1,27 +1,29 @@ /** - * `assistant db` — inspect and (in follow-up PRs) repair the assistant SQLite - * database directly from disk. + * `assistant db` — inspect and repair the assistant SQLite database directly + * from disk. * * Subcommands declare `transport: "local"` so they work when the daemon is * down — which is precisely the failure mode this surface is most useful in. - * Each subcommand opens its own bun:sqlite connection (read-only for `status`) - * and never goes through IPC. + * Each subcommand opens its own bun:sqlite connection (read-only for + * inspection, read-write for repair steps that mutate) and never goes + * through IPC. */ import type { Command } from "commander"; import { registerCommand } from "../../lib/register-command.js"; +import { registerDbRepair } from "./repair.js"; import { registerDbStatus } from "./status.js"; export function registerDbCommand(program: Command): void { registerCommand(program, { name: "db", transport: "local", - description: - "Inspect and repair the assistant SQLite database (read-only by default)", + description: "Inspect and repair the assistant SQLite database", build: (db) => { db.option("--json", "Machine-readable compact JSON output"); registerDbStatus(db); + registerDbRepair(db); }, }); } diff --git a/assistant/src/cli/commands/db/repair-step-integrity.ts b/assistant/src/cli/commands/db/repair-step-integrity.ts new file mode 100644 index 00000000000..5af127b2774 --- /dev/null +++ b/assistant/src/cli/commands/db/repair-step-integrity.ts @@ -0,0 +1,128 @@ +/** + * Repair step: integrity check. + * + * Runs `PRAGMA integrity_check` on a read-only handle. The pragma is the + * authoritative SQLite corruption probe — it walks every page, verifies + * b-tree linkage, checks index ↔ table consistency, and surfaces results + * as one or more rows of text. The canonical "everything ok" response is + * a single row containing the literal string "ok"; any other rows are + * error messages we surface verbatim. + * + * We use the full `integrity_check` rather than `quick_check` because the + * user typing `assistant db repair` is explicitly signing up for a slow + * thorough probe. On the workspace's current ~4 GB DB the full check runs + * in single-digit-minutes; that's acceptable for "the DB might be broken, + * please tell me everything that's wrong". + * + * The step never mutates the database. If corruption is found the step + * returns a non-halting error — subsequent steps (conversation backfill, + * etc.) may still produce useful work even on a partially-corrupt DB. + */ + +import { Database } from "bun:sqlite"; + +import type { RepairContext, RepairStep, StepResult } from "./repair-steps.js"; +import { withDb } from "./repair-steps.js"; + +/** + * Maximum number of corruption error lines to surface in the human output + * before truncating with a "+N more" suffix. The JSON payload always + * carries the full list; this cap only affects the rendered text so a + * massively corrupt DB doesn't drown the terminal. + */ +const MAX_REPORTED_ERROR_LINES = 20; + +async function runIntegrityCheck(ctx: RepairContext): Promise { + return withDb( + () => new Database(ctx.dbPath, { readonly: true }), + async (db) => { + // `PRAGMA integrity_check` returns rows of a single TEXT column also + // named `integrity_check`. When the DB is healthy this is exactly one + // row whose value is the literal "ok"; any other shape means errors. + // + // Severely corrupted DBs (header damaged, b-tree root unreadable) + // can cause the pragma itself to throw "database disk image is + // malformed" before yielding any rows. We catch that and surface it + // as the corruption signal it actually is — it's not a bug in the + // step, it's the DB telling us it's structurally invalid. + let messages: string[]; + try { + const rows = db + .query<{ integrity_check: string }, []>("PRAGMA integrity_check") + .all(); + messages = rows.map((r) => r.integrity_check); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + const pageCount = safePageCount(db); + return { + status: "error", + summary: "database is too corrupt to complete integrity check", + detailLines: [ + msg, + `page count: ${pageCount.toLocaleString("en-US")}`, + ], + data: { + pageCount, + errorCount: 1, + errors: [msg], + checkFailed: true, + }, + }; + } + + const healthy = messages.length === 1 && messages[0] === "ok"; + const pageCount = safePageCount(db); + + if (healthy) { + return { + status: "ok", + summary: "no corruption detected", + detailLines: [`scanned ${pageCount.toLocaleString("en-US")} pages`], + data: { pageCount, errorCount: 0 }, + }; + } + + const truncated = messages.slice(0, MAX_REPORTED_ERROR_LINES); + const detailLines = + messages.length > MAX_REPORTED_ERROR_LINES + ? [ + ...truncated, + `+ ${messages.length - MAX_REPORTED_ERROR_LINES} more (use --json for full list)`, + ] + : truncated; + + return { + status: "error", + summary: `${messages.length} integrity violation${messages.length === 1 ? "" : "s"} reported`, + detailLines, + data: { + pageCount, + errorCount: messages.length, + errors: messages, + }, + }; + }, + ); +} + +/** + * `PRAGMA page_count` is cheap and works even on damaged DBs (it reads + * from the header), but on truly malformed files it can throw too. Wrap + * it so the integrity step always has a number to report. + */ +function safePageCount(db: import("bun:sqlite").Database): number { + try { + return ( + db.query<{ page_count: number }, []>("PRAGMA page_count").get() + ?.page_count ?? 0 + ); + } catch { + return 0; + } +} + +export const integrityCheckStep: RepairStep = { + name: "integrity-check", + description: "Walk every database page and verify b-tree consistency", + run: runIntegrityCheck, +}; diff --git a/assistant/src/cli/commands/db/repair-steps.ts b/assistant/src/cli/commands/db/repair-steps.ts new file mode 100644 index 00000000000..03f810fe4b3 --- /dev/null +++ b/assistant/src/cli/commands/db/repair-steps.ts @@ -0,0 +1,177 @@ +/** + * Step framework for `assistant db repair`. + * + * `repair` is conceptually a sequence of discrete remediation passes: + * + * 1. integrity check (this PR) + * 2. conversation backfill (next PR — replay /workspace/conversations + * into SQLite) + * 3. … more to come (memory consolidation, lost-and-found triage, etc.) + * + * Each step is a small unit that: + * - logs a "starting" line when it begins + * - produces a `StepResult` describing what happened + * - logs a single "success" or "error" summary line with details + * + * The runner is intentionally not clever: + * - steps run sequentially (later steps may depend on earlier ones; in + * particular, a step that mutates the DB needs preceding integrity-check + * results to be visible) + * - a failed step does NOT halt the sequence by default. Repair is a + * best-effort surface — a corrupted DB doesn't mean we should skip + * re-deriving conversations from disk. Steps that genuinely cannot + * continue on failure mark themselves `halt: true`. + * - the runner never throws; every error is captured into a `StepResult` + * so callers can render a coherent summary + * + * `RepairContext` holds the per-run state every step shares — the DB path + * and any opened handles. Steps may open their own bun:sqlite connections + * (e.g. integrity check opens read-only) rather than holding one open at + * the context level; future write-side steps will need to open RW + * themselves anyway. + */ + +import type { Database } from "bun:sqlite"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface RepairContext { + /** Absolute path to the assistant SQLite database file. */ + dbPath: string; +} + +export type StepResult = + | { + status: "ok"; + /** One-line human summary, e.g. "no corruption detected". */ + summary: string; + /** Optional structured payload surfaced in --json mode. */ + data?: Record; + /** Optional secondary lines printed under the summary in human mode. */ + detailLines?: string[]; + /** Wall-clock duration of the step, set by the runner. */ + durationMs?: number; + } + | { + status: "error"; + /** One-line human summary, e.g. "database disk image is malformed". */ + summary: string; + data?: Record; + detailLines?: string[]; + durationMs?: number; + /** When true, the runner stops the remaining sequence. */ + halt?: boolean; + }; + +export interface RepairStep { + /** Short identifier; appears in logs as `[i/N] `. */ + name: string; + /** Human one-liner explaining what the step does. */ + description: string; + run: (ctx: RepairContext) => Promise; +} + +export interface StepRun { + name: string; + description: string; + result: StepResult; +} + +export interface RepairReport { + dbPath: string; + steps: StepRun[]; + /** Convenience counters for renderers; derived from `steps`. */ + okCount: number; + errorCount: number; + /** True when the sequence was cut short by a `halt: true` failure. */ + halted: boolean; +} + +// --------------------------------------------------------------------------- +// Runner +// --------------------------------------------------------------------------- + +export interface RunnerHooks { + /** Called when a step is about to start. */ + onStart?: (idx: number, total: number, step: RepairStep) => void; + /** Called when a step finishes (whether ok or error). */ + onFinish?: ( + idx: number, + total: number, + step: RepairStep, + result: StepResult, + ) => void; +} + +/** + * Run a sequence of repair steps. Never throws — all step failures land in + * the returned `RepairReport`. Errors thrown from a step's `run` are + * captured as a synthetic `status: "error"` result so a bug in one step + * can't take down the whole repair. + */ +export async function runRepairSteps( + ctx: RepairContext, + steps: RepairStep[], + hooks: RunnerHooks = {}, +): Promise { + const runs: StepRun[] = []; + let halted = false; + + for (let i = 0; i < steps.length; i++) { + const step = steps[i]!; + hooks.onStart?.(i + 1, steps.length, step); + + const startedAt = performance.now(); + let result: StepResult; + try { + result = await step.run(ctx); + } catch (err) { + result = { + status: "error", + summary: err instanceof Error ? err.message : String(err), + detailLines: ["step threw an unexpected error — this is a bug"], + }; + } + result.durationMs = performance.now() - startedAt; + + hooks.onFinish?.(i + 1, steps.length, step, result); + runs.push({ name: step.name, description: step.description, result }); + + if (result.status === "error" && result.halt) { + halted = true; + break; + } + } + + const okCount = runs.filter((r) => r.result.status === "ok").length; + const errorCount = runs.filter((r) => r.result.status === "error").length; + return { dbPath: ctx.dbPath, steps: runs, okCount, errorCount, halted }; +} + +// --------------------------------------------------------------------------- +// Helpers used by step implementations +// --------------------------------------------------------------------------- + +/** Convenience: format a duration as `12.3s` or `450ms`. */ +export function formatDurationMs(ms: number): string { + if (ms < 1000) return `${Math.round(ms)}ms`; + return `${(ms / 1000).toFixed(1)}s`; +} + +/** + * Open a connection inside a step, ensuring the handle is closed even when + * the step's body throws. The callback may return any `StepResult`. + */ +export async function withDb( + open: () => Database, + fn: (db: Database) => Promise | T, +): Promise { + const db = open(); + try { + return await fn(db); + } finally { + db.close(); + } +} diff --git a/assistant/src/cli/commands/db/repair.ts b/assistant/src/cli/commands/db/repair.ts new file mode 100644 index 00000000000..f08127162e3 --- /dev/null +++ b/assistant/src/cli/commands/db/repair.ts @@ -0,0 +1,145 @@ +/** + * `assistant db repair` — run the database repair sequence. + * + * Composes the step framework in `repair-steps.ts` with the concrete steps + * imported from `repair-step-*.ts` files. Each step logs its own + * starting/success/error lines; the runner aggregates results into a + * `RepairReport` that we render either as plain text or as a single JSON + * payload (`--json`). + * + * This PR ships exactly one step (integrity-check). Subsequent PRs append + * more steps to the `STEPS` array — that's the only edit they need to make + * here. + * + * Transport: `local`. The whole point of this surface is that it works when + * the daemon is down, so it opens the DB file directly and never goes + * through IPC. + */ + +import { existsSync } from "node:fs"; + +import type { Command } from "commander"; + +import { getDbPath } from "../../../util/platform.js"; +import { dim, green, red } from "../../lib/cli-colors.js"; +import { shouldOutputJson, writeOutput } from "../../output.js"; +import { integrityCheckStep } from "./repair-step-integrity.js"; +import type { RepairReport, RepairStep, StepResult } from "./repair-steps.js"; +import { formatDurationMs, runRepairSteps } from "./repair-steps.js"; + +// --------------------------------------------------------------------------- +// Step sequence +// --------------------------------------------------------------------------- + +/** + * Repair steps run in the order listed here. Order matters: + * 1. integrity-check FIRST — surfaces structural damage before we attempt + * anything that touches the same pages + * 2. (future) conversation backfill from /workspace/conversations + * 3. (future) memory consolidation, lost-and-found triage, etc. + */ +const STEPS: RepairStep[] = [integrityCheckStep]; + +// --------------------------------------------------------------------------- +// Rendering +// --------------------------------------------------------------------------- + +function renderMissingDb(path: string): string { + return ( + `${red("ERROR")} Database not found at ${path}\n\n` + + `Nothing to repair — the assistant SQLite database is missing. If you\n` + + `have a backup, restore it first:\n` + + ` assistant backup list\n` + ); +} + +function emitStepStart(idx: number, total: number, step: RepairStep): void { + process.stdout.write( + `[${idx}/${total}] ${step.name} — ${dim("starting")}\n` + + ` ${dim(step.description)}\n`, + ); +} + +function emitStepFinish( + idx: number, + total: number, + step: RepairStep, + result: StepResult, +): void { + const duration = formatDurationMs(result.durationMs ?? 0); + if (result.status === "ok") { + process.stdout.write( + `[${idx}/${total}] ${step.name} — ${green("ok")} ` + + `${result.summary} ${dim(`(${duration})`)}\n`, + ); + } else { + process.stdout.write( + `[${idx}/${total}] ${step.name} — ${red("error")} ` + + `${result.summary} ${dim(`(${duration})`)}\n`, + ); + } + for (const line of result.detailLines ?? []) { + process.stdout.write(` ${dim(line)}\n`); + } +} + +function renderSummary(report: RepairReport): string { + const { okCount, errorCount, halted } = report; + const total = report.steps.length; + let line = + `\nDone. ${total} step${total === 1 ? "" : "s"} ran: ` + + `${okCount} ok, ${errorCount} failed`; + if (halted) line += " (sequence halted)"; + return line + "\n"; +} + +// --------------------------------------------------------------------------- +// Command wiring +// --------------------------------------------------------------------------- + +export function registerDbRepair(parent: Command): void { + parent + .command("repair") + .description("Run the database repair sequence (integrity check, …)") + .action(async function (this: Command) { + const dbPath = getDbPath(); + + if (!existsSync(dbPath)) { + if (shouldOutputJson(this)) { + writeOutput(this, { + dbPath, + missing: true, + steps: [], + okCount: 0, + errorCount: 0, + halted: false, + }); + } else { + process.stderr.write(renderMissingDb(dbPath)); + } + process.exit(1); + } + + const isJson = shouldOutputJson(this); + + const report = await runRepairSteps( + { dbPath }, + STEPS, + isJson + ? {} + : { + onStart: emitStepStart, + onFinish: emitStepFinish, + }, + ); + + if (isJson) { + writeOutput(this, report); + } else { + process.stdout.write(renderSummary(report)); + } + + // Exit non-zero if any step failed — makes the command scriptable. + if (report.errorCount > 0) process.exit(1); + }); +} diff --git a/assistant/src/cli/lib/cli-colors.ts b/assistant/src/cli/lib/cli-colors.ts index 650a8781c9a..5a894775875 100644 --- a/assistant/src/cli/lib/cli-colors.ts +++ b/assistant/src/cli/lib/cli-colors.ts @@ -1,12 +1,30 @@ /** - * Minimal ANSI red wrapper for CLI error output. Respects `NO_COLOR` - * (https://no-color.org/) and skips coloring when stderr is not a TTY so - * piped/captured output stays clean. + * Minimal ANSI color wrappers for CLI output. Each helper respects `NO_COLOR` + * (https://no-color.org/) and skips coloring when the relevant stream is not + * a TTY so piped/captured output stays clean. + * + * `red` / `green` gate on stderr (error/success lines tend to land there in + * the existing commands), `dim` gates on stdout (used for muted body text). */ + +function colorsDisabled(): boolean { + return process.env.NO_COLOR !== undefined && process.env.NO_COLOR !== ""; +} + export function red(text: string): string { if (!process.stderr.isTTY) return text; - if (process.env.NO_COLOR !== undefined && process.env.NO_COLOR !== "") { - return text; - } + if (colorsDisabled()) return text; return `\x1b[31m${text}\x1b[0m`; } + +export function green(text: string): string { + if (!process.stderr.isTTY) return text; + if (colorsDisabled()) return text; + return `\x1b[32m${text}\x1b[0m`; +} + +export function dim(text: string): string { + if (!process.stdout.isTTY) return text; + if (colorsDisabled()) return text; + return `\x1b[2m${text}\x1b[0m`; +} diff --git a/gateway/src/risk/command-registry/commands/assistant.ts b/gateway/src/risk/command-registry/commands/assistant.ts index 84d4ca6bd42..07f10a740e1 100644 --- a/gateway/src/risk/command-registry/commands/assistant.ts +++ b/gateway/src/risk/command-registry/commands/assistant.ts @@ -126,6 +126,7 @@ const ASSISTANT_SUPPORTED_COMMAND_PATHS = [ "credentials status", "db", "db status", + "db repair", "gateway", "gateway logs", "gateway logs tail", @@ -415,6 +416,12 @@ const riskOverrides: AssistantRiskOverride[] = [ { path: "conversations rename", risk: "low" }, { path: "conversations wake", risk: "low" }, { path: "credential-execution grants revoke", risk: "medium" }, + { + path: "db repair", + risk: "medium", + reason: + "Runs the database repair sequence. First step is a read-only integrity check; future steps will mutate the SQLite database to recover state.", + }, { path: "domain register", risk: "medium" }, { path: "email register", risk: "medium" }, { path: "email unregister", risk: "medium" },