From 3e569ee44ec65ff54d54826773fcfb0af35a20f5 Mon Sep 17 00:00:00 2001 From: Vellum Assistant Date: Mon, 23 Feb 2026 11:26:58 -0500 Subject: [PATCH] feat: add retention cleanup, config, tests, and architecture docs for QA video --- ARCHITECTURE.md | 48 +++- .../src/__tests__/recording-cleanup.test.ts | 237 ++++++++++++++++++ assistant/src/config/defaults.ts | 4 + assistant/src/config/schema.ts | 18 ++ assistant/src/config/types.ts | 1 + assistant/src/daemon/lifecycle.ts | 5 + assistant/src/daemon/recording-cleanup.ts | 100 ++++++++ 7 files changed, 412 insertions(+), 1 deletion(-) create mode 100644 assistant/src/__tests__/recording-cleanup.test.ts create mode 100644 assistant/src/daemon/recording-cleanup.ts diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 0895b35c6d2..acd282244ac 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -4196,6 +4196,51 @@ Keep-alive heartbeats (every 30 s by default): --- +## QA Recording — Automated Video Capture and Retention + +### QA Recording Data Flow + +``` +User asks to test → QA intent detection → CU session created with qaMode + reportToSessionId +→ macOS ScreenRecorder starts → CU action loop executes +→ Session terminates → ScreenRecorder stops → .mp4 saved to ~/Library/Application Support/vellum-assistant/recordings/ +→ cu_session_finalized sent to daemon with recording metadata +→ Daemon handler creates file-backed attachment + assistant message in source chat +→ Client loads video from GET /v1/attachments/:id/content (with Range support) +→ Video playable inline + draggable to Finder +→ Retention cleanup removes expired recordings after configurable period (default 7 days) +``` + +### File-Backed Attachment Storage + +The attachments table supports two storage kinds: + +| `storageKind` | Data location | Use case | +|---------------|---------------|----------| +| `inline_base64` | `dataBase64` column in SQLite | Small attachments (images, documents, up to 20 MB) | +| `file` | On-disk file referenced by `filePath` column | Large files (QA recordings, videos) | + +File-backed attachments store only metadata in SQLite (filename, MIME type, size, SHA-256 hash, expiry timestamp). Binary content is served via `GET /v1/attachments/:id/content` with HTTP Range header support for streaming video playback. + +### Retention Cleanup + +A periodic cleanup worker (`recording-cleanup.ts`) runs on a configurable interval (default: every 6 hours, set via `qaRecording.cleanupIntervalMs`). It also runs one pass on daemon startup to catch recordings that expired while the daemon was offline. + +The cleanup pass: +1. Queries `getExpiredFileAttachments()` for file-backed attachments where `expiresAt < now` +2. Deletes the underlying file from disk via `fs.unlinkSync` +3. Removes the DB row via `deleteFileBackedAttachment(id)` +4. Logs a summary of cleaned-up recordings and freed disk space + +| Key files | Purpose | +|-----------|---------| +| `assistant/src/daemon/recording-cleanup.ts` | Cleanup worker (start/stop/runPass) | +| `assistant/src/memory/attachments-store.ts` | `createFileBackedAttachment`, `getExpiredFileAttachments`, `deleteFileBackedAttachment` | +| `assistant/src/config/schema.ts` | `QaRecordingConfigSchema` (retention days, cleanup interval) | +| `assistant/src/daemon/lifecycle.ts` | Wires cleanup worker start/stop into daemon init/shutdown | + +--- + ## Storage Summary | What | Where | Format | ORM/Driver | Retention | @@ -4213,7 +4258,8 @@ Keep-alive heartbeats (every 30 s by default): | Entity graph (entities/relations/item links) | `~/.vellum/workspace/data/db/assistant.db` | SQLite | Drizzle ORM | Permanent, deduped by unique relation edge | | Embeddings | `~/.vellum/workspace/data/db/assistant.db` | JSON float arrays | Drizzle ORM | Permanent | | Async job queue | `~/.vellum/workspace/data/db/assistant.db` | SQLite | Drizzle ORM | Completed jobs persist | -| Attachments | `~/.vellum/workspace/data/db/assistant.db` | Base64 in SQLite | Drizzle ORM | Permanent | +| Attachments (inline) | `~/.vellum/workspace/data/db/assistant.db` | Base64 in SQLite | Drizzle ORM | Permanent | +| Attachments (file-backed) | `~/Library/Application Support/vellum-assistant/recordings/` + metadata in SQLite | Binary on disk, metadata in SQLite | Drizzle ORM + fs | Configurable (`qaRecording.defaultRetentionDays`, default 7 days) | | Sandbox filesystem | `~/.vellum/workspace` | Real filesystem tree | Node FS APIs | Persistent across sessions | | Tool permission rules | `~/.vellum/protected/trust.json` | JSON | File I/O | Permanent | | Web users & assistants | PostgreSQL | Relational | Drizzle ORM (pg) | Permanent | diff --git a/assistant/src/__tests__/recording-cleanup.test.ts b/assistant/src/__tests__/recording-cleanup.test.ts new file mode 100644 index 00000000000..9ee5b92ed0c --- /dev/null +++ b/assistant/src/__tests__/recording-cleanup.test.ts @@ -0,0 +1,237 @@ +import { describe, test, expect, beforeEach, afterAll, mock } from 'bun:test'; +import { mkdtempSync, rmSync, writeFileSync, existsSync, mkdirSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +const testDir = mkdtempSync(join(tmpdir(), 'recording-cleanup-test-')); + +mock.module('../util/platform.js', () => ({ + getDataDir: () => testDir, + isMacOS: () => process.platform === 'darwin', + isLinux: () => process.platform === 'linux', + isWindows: () => process.platform === 'win32', + getSocketPath: () => join(testDir, 'test.sock'), + getPidPath: () => join(testDir, 'test.pid'), + getDbPath: () => join(testDir, 'test.db'), + getLogPath: () => join(testDir, 'test.log'), + ensureDataDir: () => {}, + getRootDir: () => testDir, +})); + +mock.module('../util/logger.js', () => ({ + getLogger: () => new Proxy({} as Record, { + get: () => () => {}, + }), +})); + +mock.module('../config/loader.js', () => ({ + getConfig: () => ({ + model: 'test', + provider: 'test', + apiKeys: {}, + memory: { enabled: false }, + rateLimit: { maxRequestsPerMinute: 0, maxTokensPerSession: 0 }, + }), +})); + +import { initializeDb, getDb, resetDb } from '../memory/db.js'; +import { + uploadAttachment, + createFileBackedAttachment, + getAttachmentById, + getExpiredFileAttachments, +} from '../memory/attachments-store.js'; +import { runCleanupPass } from '../daemon/recording-cleanup.js'; + +initializeDb(); + +afterAll(() => { + resetDb(); + try { rmSync(testDir, { recursive: true }); } catch { /* best effort */ } +}); + +function resetTables() { + const db = getDb(); + db.run('DELETE FROM message_attachments'); + db.run('DELETE FROM attachments'); +} + +// --------------------------------------------------------------------------- +// Cleanup pass tests +// --------------------------------------------------------------------------- + +describe('runCleanupPass', () => { + beforeEach(resetTables); + + test('deletes expired file-backed attachments and their files', () => { + const now = Date.now(); + const recordingsDir = join(testDir, 'recordings'); + mkdirSync(recordingsDir, { recursive: true }); + + // Create a file on disk + const filePath = join(recordingsDir, 'expired-recording.mp4'); + writeFileSync(filePath, Buffer.alloc(1024, 0)); // 1 KB dummy file + + // Create expired file-backed attachment + const expired = createFileBackedAttachment({ + filename: 'expired-recording.mp4', + mimeType: 'video/mp4', + sizeBytes: 1024, + filePath, + expiresAt: now - 10000, // expired 10 seconds ago + }); + + // Verify file exists before cleanup + expect(existsSync(filePath)).toBe(true); + + const result = runCleanupPass(); + + expect(result.cleaned).toBe(1); + expect(result.bytesFreed).toBe(1024); + + // File should be removed from disk + expect(existsSync(filePath)).toBe(false); + + // DB row should be removed + expect(getAttachmentById(expired.id)).toBeNull(); + }); + + test('does not touch non-expired file-backed attachments', () => { + const now = Date.now(); + const recordingsDir = join(testDir, 'recordings'); + mkdirSync(recordingsDir, { recursive: true }); + + const filePath = join(recordingsDir, 'fresh-recording.mp4'); + writeFileSync(filePath, Buffer.alloc(512, 0)); + + const fresh = createFileBackedAttachment({ + filename: 'fresh-recording.mp4', + mimeType: 'video/mp4', + sizeBytes: 512, + filePath, + expiresAt: now + 86400000, // expires tomorrow + }); + + const result = runCleanupPass(); + + expect(result.cleaned).toBe(0); + expect(result.bytesFreed).toBe(0); + + // File should still exist + expect(existsSync(filePath)).toBe(true); + + // DB row should still exist + expect(getAttachmentById(fresh.id)).not.toBeNull(); + }); + + test('never touches inline_base64 attachments', () => { + // Create an inline base64 attachment + const inline = uploadAttachment('chart.png', 'image/png', 'iVBORw0K'); + + const result = runCleanupPass(); + + expect(result.cleaned).toBe(0); + + // Inline attachment should still exist + expect(getAttachmentById(inline.id)).not.toBeNull(); + }); + + test('handles missing files gracefully (file already deleted)', () => { + const now = Date.now(); + + // Create expired attachment pointing to a non-existent file + const expired = createFileBackedAttachment({ + filename: 'ghost-recording.mp4', + mimeType: 'video/mp4', + sizeBytes: 2048, + filePath: join(testDir, 'nonexistent', 'ghost.mp4'), + expiresAt: now - 10000, + }); + + const result = runCleanupPass(); + + // Should still clean up the DB row even if the file is missing + expect(result.cleaned).toBe(1); + expect(result.bytesFreed).toBe(0); // no file to measure + + expect(getAttachmentById(expired.id)).toBeNull(); + }); + + test('cleans up multiple expired recordings in one pass', () => { + const now = Date.now(); + const recordingsDir = join(testDir, 'recordings-multi'); + mkdirSync(recordingsDir, { recursive: true }); + + const fileA = join(recordingsDir, 'a.mp4'); + const fileB = join(recordingsDir, 'b.mp4'); + writeFileSync(fileA, Buffer.alloc(2048, 0)); + writeFileSync(fileB, Buffer.alloc(4096, 0)); + + createFileBackedAttachment({ + filename: 'a.mp4', + mimeType: 'video/mp4', + sizeBytes: 2048, + filePath: fileA, + expiresAt: now - 5000, + }); + + createFileBackedAttachment({ + filename: 'b.mp4', + mimeType: 'video/mp4', + sizeBytes: 4096, + filePath: fileB, + expiresAt: now - 3000, + }); + + // Also add a non-expired one + const fileC = join(recordingsDir, 'c.mp4'); + writeFileSync(fileC, Buffer.alloc(1024, 0)); + createFileBackedAttachment({ + filename: 'c.mp4', + mimeType: 'video/mp4', + sizeBytes: 1024, + filePath: fileC, + expiresAt: now + 86400000, + }); + + const result = runCleanupPass(); + + expect(result.cleaned).toBe(2); + expect(result.bytesFreed).toBe(2048 + 4096); + + // Expired files gone + expect(existsSync(fileA)).toBe(false); + expect(existsSync(fileB)).toBe(false); + + // Non-expired file still present + expect(existsSync(fileC)).toBe(true); + }); + + test('returns zeros when no expired attachments exist', () => { + const result = runCleanupPass(); + expect(result.cleaned).toBe(0); + expect(result.bytesFreed).toBe(0); + }); + + test('file-backed attachments without expiresAt are never cleaned', () => { + const recordingsDir = join(testDir, 'recordings-no-expiry'); + mkdirSync(recordingsDir, { recursive: true }); + + const filePath = join(recordingsDir, 'permanent.mp4'); + writeFileSync(filePath, Buffer.alloc(256, 0)); + + const permanent = createFileBackedAttachment({ + filename: 'permanent.mp4', + mimeType: 'video/mp4', + sizeBytes: 256, + filePath, + // No expiresAt — should never be cleaned + }); + + const result = runCleanupPass(); + + expect(result.cleaned).toBe(0); + expect(existsSync(filePath)).toBe(true); + expect(getAttachmentById(permanent.id)).not.toBeNull(); + }); +}); diff --git a/assistant/src/config/defaults.ts b/assistant/src/config/defaults.ts index 4855a1b43f9..888c0b9bb24 100644 --- a/assistant/src/config/defaults.ts +++ b/assistant/src/config/defaults.ts @@ -251,6 +251,10 @@ export const DEFAULT_CONFIG: AssistantConfig = { allowPerCallOverride: true, }, }, + qaRecording: { + defaultRetentionDays: 7, + cleanupIntervalMs: 6 * 60 * 60 * 1000, // 6 hours + }, sms: { enabled: false, provider: 'twilio' as const, diff --git a/assistant/src/config/schema.ts b/assistant/src/config/schema.ts index cbc136da4bc..4c7156c6aa0 100644 --- a/assistant/src/config/schema.ts +++ b/assistant/src/config/schema.ts @@ -1054,6 +1054,19 @@ export const SkillsConfigSchema = z.object({ allowBundled: z.array(z.string()).nullable().default(null), }); +export const QaRecordingConfigSchema = z.object({ + defaultRetentionDays: z + .number({ error: 'qaRecording.defaultRetentionDays must be a number' }) + .int('qaRecording.defaultRetentionDays must be an integer') + .positive('qaRecording.defaultRetentionDays must be a positive integer') + .default(7), + cleanupIntervalMs: z + .number({ error: 'qaRecording.cleanupIntervalMs must be a number' }) + .int('qaRecording.cleanupIntervalMs must be an integer') + .positive('qaRecording.cleanupIntervalMs must be a positive integer') + .default(6 * 60 * 60 * 1000), +}); + export const SmsConfigSchema = z.object({ enabled: z .boolean({ error: 'sms.enabled must be a boolean' }) @@ -1366,6 +1379,10 @@ export const AssistantConfigSchema = z.object({ allowPerCallOverride: true, }, }), + qaRecording: QaRecordingConfigSchema.default({ + defaultRetentionDays: 7, + cleanupIntervalMs: 6 * 60 * 60 * 1000, + }), sms: SmsConfigSchema.default({ enabled: false, provider: 'twilio', @@ -1435,5 +1452,6 @@ export type CallsSafetyConfig = z.infer; export type CallsVoiceConfig = z.infer; export type CallsElevenLabsConfig = z.infer; export type CallerIdentityConfig = z.infer; +export type QaRecordingConfig = z.infer; export type SmsConfig = z.infer; export type IngressConfig = z.infer; diff --git a/assistant/src/config/types.ts b/assistant/src/config/types.ts index 087de1c37c9..4cea1f98f92 100644 --- a/assistant/src/config/types.ts +++ b/assistant/src/config/types.ts @@ -37,6 +37,7 @@ export type { CallsVoiceConfig, CallsElevenLabsConfig, CallerIdentityConfig, + QaRecordingConfig, SmsConfig, IngressConfig, } from './schema.js'; diff --git a/assistant/src/daemon/lifecycle.ts b/assistant/src/daemon/lifecycle.ts index 57b6d4c2b21..c09befe2a92 100644 --- a/assistant/src/daemon/lifecycle.ts +++ b/assistant/src/daemon/lifecycle.ts @@ -51,6 +51,7 @@ import { AgentHeartbeatService } from '../agent-heartbeat/agent-heartbeat-servic import { getEnrichmentService } from '../workspace/commit-message-enrichment-service.js'; import { reconcileCallsOnStartup } from '../calls/call-recovery.js'; import { TwilioConversationRelayProvider } from '../calls/twilio-provider.js'; +import { startRecordingCleanup, stopRecordingCleanup } from './recording-cleanup.js'; const log = getLogger('lifecycle'); @@ -484,6 +485,9 @@ export async function runDaemon(): Promise { } } + // Start periodic cleanup of expired file-backed QA recording attachments. + startRecordingCleanup(config.qaRecording.cleanupIntervalMs); + // Start workspace heartbeat service. This periodically checks all // tracked workspaces for uncommitted changes and auto-commits when // thresholds are exceeded (age > 5 min OR > 20 files changed). @@ -558,6 +562,7 @@ export async function runDaemon(): Promise { if (runtimeHttp) await runtimeHttp.stop(); await browserManager.closeAllPages(); + stopRecordingCleanup(); scheduler.stop(); memoryWorker.stop(); await qdrantManager.stop(); diff --git a/assistant/src/daemon/recording-cleanup.ts b/assistant/src/daemon/recording-cleanup.ts new file mode 100644 index 00000000000..4b10323c674 --- /dev/null +++ b/assistant/src/daemon/recording-cleanup.ts @@ -0,0 +1,100 @@ +/** + * Periodic cleanup of expired file-backed QA recording attachments. + * + * Runs on a configurable interval (default: every 6 hours) and also + * executes a single pass on daemon startup to catch recordings that + * expired while the daemon was offline. + */ + +import { existsSync, statSync, unlinkSync } from 'node:fs'; +import { getExpiredFileAttachments, deleteFileBackedAttachment } from '../memory/attachments-store.js'; +import { getLogger } from '../util/logger.js'; + +const log = getLogger('recording-cleanup'); + +/** + * Run a single cleanup pass: find expired file-backed attachments, + * delete their files from disk, and remove the DB rows. + * + * Returns the number of cleaned-up attachments and total bytes freed. + */ +export function runCleanupPass(): { cleaned: number; bytesFreed: number } { + const expired = getExpiredFileAttachments(); + if (expired.length === 0) { + return { cleaned: 0, bytesFreed: 0 }; + } + + let cleaned = 0; + let bytesFreed = 0; + + for (const { id, filePath } of expired) { + try { + let fileSize = 0; + + if (existsSync(filePath)) { + try { + fileSize = statSync(filePath).size; + } catch { + // If we can't stat, still try to delete + } + unlinkSync(filePath); + log.info({ attachmentId: id, filePath }, 'Deleted expired recording file'); + } else { + log.debug({ attachmentId: id, filePath }, 'Expired recording file already missing from disk'); + } + + const result = deleteFileBackedAttachment(id); + if (result === 'deleted') { + cleaned++; + bytesFreed += fileSize; + } + } catch (err) { + log.warn({ err, attachmentId: id, filePath }, 'Failed to clean up expired recording'); + } + } + + if (cleaned > 0) { + const mbFreed = (bytesFreed / (1024 * 1024)).toFixed(1); + log.info({ count: cleaned, bytesFreed, mbFreed }, `Cleaned up ${cleaned} expired QA recordings, freed ${mbFreed} MB`); + } + + return { cleaned, bytesFreed }; +} + +let cleanupTimer: ReturnType | null = null; + +/** + * Start the periodic cleanup worker. Runs one immediate pass, + * then schedules recurring passes at the configured interval. + */ +export function startRecordingCleanup(intervalMs: number): void { + // Run one pass immediately to catch anything that expired while offline + try { + runCleanupPass(); + } catch (err) { + log.warn({ err }, 'Initial recording cleanup pass failed'); + } + + cleanupTimer = setInterval(() => { + try { + runCleanupPass(); + } catch (err) { + log.warn({ err }, 'Periodic recording cleanup pass failed'); + } + }, intervalMs); + + // Don't keep the process alive just for cleanup + cleanupTimer.unref(); + log.info({ intervalMs }, 'Recording cleanup worker started'); +} + +/** + * Stop the periodic cleanup worker. + */ +export function stopRecordingCleanup(): void { + if (cleanupTimer !== null) { + clearInterval(cleanupTimer); + cleanupTimer = null; + log.info('Recording cleanup worker stopped'); + } +}