diff --git a/assistant/src/memory/jobs-worker.ts b/assistant/src/memory/jobs-worker.ts index ca59501f189..7b9da8313f1 100644 --- a/assistant/src/memory/jobs-worker.ts +++ b/assistant/src/memory/jobs-worker.ts @@ -83,6 +83,8 @@ import { memoryV2ConsolidateJob, } from "./v2/consolidation-job.js"; import { memoryV2SweepJob } from "./v2/sweep-job.js"; +import { memoryV3ConsolidateJob } from "./v3/consolidation-job.js"; +import { memoryV3IndexMaintenanceJob } from "./v3/maintenance.js"; const log = getLogger("memory-jobs-worker"); @@ -603,6 +605,12 @@ async function processJob( case "memory_v2_consolidate": await memoryV2ConsolidateJob(job, config); return; + case "memory_v3_consolidate": + await memoryV3ConsolidateJob(job, config); + return; + case "memory_v3_index_maintenance": + await memoryV3IndexMaintenanceJob(job); + return; case "memory_v2_migrate": await memoryV2MigrateJob(job, config); return; @@ -681,17 +689,28 @@ export const GRAPH_MAINTENANCE_CHECKPOINTS = { patternScan: "graph_maintenance:pattern_scan:last_run", narrative: "graph_maintenance:narrative:last_run", memoryV2Consolidate: "memory_v2_consolidate_last_run", + memoryV3Consolidate: "memory_v3_consolidate_last_run", } as const; /** * Enqueue periodic graph maintenance jobs. * * Mutually exclusive between v1 and v2: - * - v2 active (`memory.v2.enabled` on) → only `memory_v2_consolidate` is - * scheduled. + * - v2 active (`memory.v2.enabled` on) → only one buffer-drainer is + * scheduled (see below). * - v2 inactive → the four v1 entries (decay, consolidate, pattern_scan, * narrative) are scheduled instead. * + * **Buffer-drainer retarget (v2 vs v3).** The `memory/buffer.md` is shared, so + * exactly one consolidator may own the drain at a time. When + * `memory.v3.write.enabled` is on, the v3 consolidator (`memory_v3_consolidate`) + * is scheduled INSTEAD of `memory_v2_consolidate` — same shared buffer + + * standing-context files, additionally authored into the v3 tree. When the v3 + * write flag is off (default) the v2 consolidator stays the sole drainer, + * unchanged. The retarget is a clean conditional, fully reversible via the flag. + * Concept pages stay the shared canonical store, so the v2 router keeps working + * off pages v3 writes regardless of which consolidator ran. + * * Read/write paths route to v2 when the flag is on, so v1 graph data goes * unread; running v1 maintenance alongside v2 is wasted compute and LLM * spend. The v1 code path remains live so flipping the flag back to off @@ -708,20 +727,29 @@ export function maybeEnqueueGraphMaintenanceJobs( nowMs = Date.now(), ): void { const v2Active = config.memory.v2.enabled; + const v3WriteActive = config.memory.v3.write.enabled; + + // The single buffer-drainer entry for the v2-active branch: v3 when the v3 + // write flag owns the drain, v2 otherwise. Same shared buffer either way. + const consolidateEntry = v3WriteActive + ? { + key: GRAPH_MAINTENANCE_CHECKPOINTS.memoryV3Consolidate, + intervalMs: config.memory.v3.write.consolidateIntervalMs, + jobType: "memory_v3_consolidate" as MemoryJobType, + } + : { + key: GRAPH_MAINTENANCE_CHECKPOINTS.memoryV2Consolidate, + intervalMs: + config.memory.v2.consolidation_interval_hours * 60 * 60 * 1000, + jobType: "memory_v2_consolidate" as MemoryJobType, + }; const schedule: Array<{ key: string; intervalMs: number; jobType: MemoryJobType; }> = v2Active - ? [ - { - key: GRAPH_MAINTENANCE_CHECKPOINTS.memoryV2Consolidate, - intervalMs: - config.memory.v2.consolidation_interval_hours * 60 * 60 * 1000, - jobType: "memory_v2_consolidate", - }, - ] + ? [consolidateEntry] : [ { key: GRAPH_MAINTENANCE_CHECKPOINTS.decay, @@ -745,25 +773,25 @@ export function maybeEnqueueGraphMaintenanceJobs( }, ]; - let enqueuedV2 = false; + let enqueuedConsolidate = false; for (const { key, intervalMs, jobType } of schedule) { const lastRun = parseInt(getMemoryCheckpoint(key) ?? "0", 10); if (nowMs - lastRun >= intervalMs) { enqueueMemoryJob(jobType, {}); setMemoryCheckpoint(key, String(nowMs)); - if (jobType === "memory_v2_consolidate") enqueuedV2 = true; + if (jobType === consolidateEntry.jobType) enqueuedConsolidate = true; } } + // Size-based trigger: when the shared buffer crosses the configured line + // count, drain it now rather than waiting out the interval. Retargets to the + // same consolidator the interval branch above selected. const maxLines = config.memory.v2.consolidation_max_buffer_lines; - if (v2Active && !enqueuedV2 && maxLines !== null) { + if (v2Active && !enqueuedConsolidate && maxLines !== null) { const bufferPath = join(getWorkspaceDir(), "memory", "buffer.md"); if (countBufferLines(bufferPath) >= maxLines) { - enqueueMemoryJob("memory_v2_consolidate", {}); - setMemoryCheckpoint( - GRAPH_MAINTENANCE_CHECKPOINTS.memoryV2Consolidate, - String(nowMs), - ); + enqueueMemoryJob(consolidateEntry.jobType, {}); + setMemoryCheckpoint(consolidateEntry.key, String(nowMs)); } } } diff --git a/assistant/src/memory/v3/__tests__/consolidation-job.test.ts b/assistant/src/memory/v3/__tests__/consolidation-job.test.ts new file mode 100644 index 00000000000..5969c7de6f3 --- /dev/null +++ b/assistant/src/memory/v3/__tests__/consolidation-job.test.ts @@ -0,0 +1,468 @@ +/** + * Tests for the memory v3 consolidation surface (PR 19): + * - `memoryV3ConsolidateJob` (`../consolidation-job.ts`) — drains the SHARED + * `memory/buffer.md` into shared concept pages + the v3 tree, mirroring v2. + * - the scheduler retarget in `maybeEnqueueGraphMaintenanceJobs` + * (`../../jobs-worker.ts`) — enqueues `memory_v3_consolidate` INSTEAD of + * `memory_v2_consolidate` when `memory.v3.write.enabled`, and v2 when off. + * - `runIndexMaintenance` / `wouldIntroduceCycle` (`../maintenance.ts`) — the + * mechanical no-LLM upkeep: report stale indices, refuse cycle edits. + * + * The background-agent handoff (`runBackgroundJob`) is mocked so no real LLM + * runs — the agent's actual page/tree writes are exercised by the v3 store/ + * validate unit tests; here we drive the same fixture writes deterministically + * to prove the maintenance + cycle-check semantics. The DB is real (a temp + * workspace pinned via `VELLUM_WORKSPACE_DIR`) so the scheduler's checkpoint / + * enqueue path runs end-to-end. Sample content uses generic placeholders + * (Alice/Bob). + */ +import { + existsSync, + mkdirSync, + mkdtempSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { utimes } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { + afterAll, + beforeAll, + beforeEach, + describe, + expect, + mock, + test, +} from "bun:test"; + +import { eq } from "drizzle-orm"; + +import { makeMockLogger } from "../../../__tests__/helpers/mock-logger.js"; + +mock.module("../../../util/logger.js", () => ({ + getLogger: () => makeMockLogger(), +})); + +// ── runBackgroundJob mock ─────────────────────────────────────────── +// +// The consolidation handler delegates bootstrap + processMessage + timeout + +// classification to runBackgroundJob. We stub it so no LLM runs and assert the +// surface (prompt, callSite, source, suppression) it was called with. +let runnerCalls = 0; +let runnerLastArgs: Record | null = null; +let runnerImpl: () => Promise<{ + conversationId: string; + ok: boolean; + error?: Error; + errorKind?: string; +}> = async () => ({ conversationId: "conv-1", ok: true }); + +mock.module("../../../runtime/background-job-runner.js", () => ({ + runBackgroundJob: async (opts: Record) => { + runnerCalls += 1; + runnerLastArgs = opts; + return runnerImpl(); + }, +})); + +// ── Workspace pin (precedes the DB import) ────────────────────────── +let tmpWorkspace: string; +let previousWorkspaceEnv: string | undefined; + +beforeAll(() => { + tmpWorkspace = mkdtempSync(join(tmpdir(), "memory-v3-consolidate-test-")); + previousWorkspaceEnv = process.env.VELLUM_WORKSPACE_DIR; + process.env.VELLUM_WORKSPACE_DIR = tmpWorkspace; +}); + +afterAll(() => { + if (previousWorkspaceEnv === undefined) { + delete process.env.VELLUM_WORKSPACE_DIR; + } else { + process.env.VELLUM_WORKSPACE_DIR = previousWorkspaceEnv; + } + rmSync(tmpWorkspace, { recursive: true, force: true }); +}); + +const { getDb } = await import("../../db-connection.js"); +const { initializeDb } = await import("../../db-init.js"); +const { resetTestTables } = await import("../../raw-query.js"); +const { memoryJobs } = await import("../../schema.js"); +const { applyNestedDefaults } = await import("../../../config/loader.js"); +const { setMemoryCheckpoint, deleteMemoryCheckpoint } = + await import("../../checkpoints.js"); +const { maybeEnqueueGraphMaintenanceJobs } = + await import("../../jobs-worker.js"); +const { memoryV3ConsolidateJob } = await import("../consolidation-job.js"); +const { CUTOFF_PLACEHOLDER, CONSOLIDATION_PROMPT } = + await import("../prompts/consolidation.js"); +const { runIndexMaintenance, wouldIntroduceCycle } = + await import("../maintenance.js"); +const { writePage } = await import("../../v2/page-store.js"); +const { invalidatePageIndex } = await import("../../v2/page-index.js"); +const { invalidateEdgeIndex } = await import("../../v2/edge-index.js"); +const { getTreeIndex, invalidateTreeIndex } = await import("../tree-index.js"); +const { writeNode, getTreeDir, ROOT_NODE_ID } = + await import("../tree-store.js"); + +const V2_CHECKPOINT = "memory_v2_consolidate_last_run"; +const V3_CHECKPOINT = "memory_v3_consolidate_last_run"; + +// The job handler reads only `config.memory.v3.write.enabled` and the shared +// `config.memory.v2.consolidation_prompt_path`; a minimal stand-in covers both +// call sites without materializing the full default config. +type JobConfig = Parameters[1]; +const CONFIG_V3_ON = { + memory: { + v2: { consolidation_prompt_path: null }, + v3: { write: { enabled: true } }, + }, +} as JobConfig; +const CONFIG_V3_OFF = { + memory: { + v2: { consolidation_prompt_path: null }, + v3: { write: { enabled: false } }, + }, +} as JobConfig; + +function makeJob(): Parameters[0] { + return { + id: "consolidate-1", + type: "memory_v3_consolidate", + payload: {}, + status: "running", + attempts: 0, + deferrals: 0, + runAfter: 0, + lastError: null, + startedAt: Date.now(), + createdAt: Date.now(), + updatedAt: Date.now(), + }; +} + +const memoryDir = () => join(tmpWorkspace, "memory"); +const lockPath = () => + join(tmpWorkspace, "memory", ".v3-state", "consolidation.lock"); +const bufferPath = () => join(tmpWorkspace, "memory", "buffer.md"); + +function countPendingJobs(type: string): number { + return getDb() + .select() + .from(memoryJobs) + .where(eq(memoryJobs.type, type)) + .all().length; +} + +function buildSchedulerConfig(v3WriteEnabled: boolean) { + const cfg = applyNestedDefaults({}); + cfg.memory.v2.enabled = true; + cfg.memory.v2.consolidation_interval_hours = 1; + cfg.memory.v2.consolidation_max_buffer_lines = null; + cfg.memory.v3.write.enabled = v3WriteEnabled; + cfg.memory.v3.write.consolidateIntervalMs = 60 * 60 * 1000; + return cfg; +} + +function resetCaches(): void { + invalidateTreeIndex(); + invalidatePageIndex(); + invalidateEdgeIndex(); +} + +initializeDb(); + +beforeEach(() => { + rmSync(memoryDir(), { recursive: true, force: true }); + mkdirSync(join(memoryDir(), ".v3-state"), { recursive: true }); + mkdirSync(join(memoryDir(), "concepts"), { recursive: true }); + resetTestTables("memory_jobs", "memory_checkpoints"); + resetCaches(); + + runnerCalls = 0; + runnerLastArgs = null; + runnerImpl = async () => ({ conversationId: "conv-1", ok: true }); +}); + +// --------------------------------------------------------------------------- +// memoryV3ConsolidateJob +// --------------------------------------------------------------------------- + +describe("memoryV3ConsolidateJob — flag off (v3 write disabled)", () => { + test("returns disabled without invoking the runner or touching the lock", async () => { + writeFileSync(bufferPath(), "- [Apr 27, 9:00 AM] Alice prefers VS Code.\n"); + + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_OFF); + + expect(result).toEqual({ kind: "disabled" }); + expect(runnerCalls).toBe(0); + expect(existsSync(lockPath())).toBe(false); + expect(countPendingJobs("memory_v3_index_maintenance")).toBe(0); + expect(countPendingJobs("memory_v2_reembed")).toBe(0); + }); +}); + +describe("memoryV3ConsolidateJob — empty shared buffer", () => { + test("returns empty_buffer when the shared buffer.md is missing", async () => { + expect(existsSync(bufferPath())).toBe(false); + + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON); + + expect(result).toEqual({ kind: "empty_buffer" }); + expect(runnerCalls).toBe(0); + expect(existsSync(lockPath())).toBe(false); + }); +}); + +describe("memoryV3ConsolidateJob — non-empty shared buffer", () => { + beforeEach(() => { + writeFileSync( + bufferPath(), + "- [Apr 27, 9:00 AM] Alice prefers VS Code over Vim.\n" + + "- [Apr 27, 9:05 AM] Bob ships at end of day.\n", + ); + }); + + test("invokes runBackgroundJob with the v3 tree-authoring prompt and suppression", async () => { + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON); + + expect(result.kind).toBe("invoked"); + expect(runnerCalls).toBe(1); + expect(runnerLastArgs?.callSite).toBe("mainAgent"); + expect(runnerLastArgs?.origin).toBe("memory_consolidation"); + // Shared consolidation conversation source (recognized by the route layer). + expect(runnerLastArgs?.source).toBe("memory_v2_consolidation"); + expect(runnerLastArgs?.suppressFailureNotifications).toBe(true); + expect(runnerLastArgs?.trustContext).toEqual({ + sourceChannel: "vellum", + trustClass: "guardian", + }); + + const prompt = runnerLastArgs?.prompt as string; + // Cutoff substituted (placeholder gone), buffer-format timestamp present. + expect(prompt).not.toContain(CUTOFF_PLACEHOLDER); + expect(prompt).toMatch(/\b[A-Z][a-z]{2} \d{1,2}, \d{1,2}:\d{2} (AM|PM)\b/); + // v3-distinctive: the prompt routes into the v3 tree, not just flat pages. + expect(prompt).toContain("memory/v3/tree/"); + // Standing-context files preserved exactly as v2 (shared). + expect(prompt).toContain("memory/buffer.md"); + expect(prompt).toContain("memory/recent.md"); + expect(prompt).toContain("memory/essentials.md"); + expect(prompt).toContain("memory/threads.md"); + }); + + test("enqueues index-maintenance + page-reembed follow-ups on success", async () => { + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON); + + expect(result.kind).toBe("invoked"); + if (result.kind === "invoked") { + expect(result.followUpJobIds).toHaveLength(2); + } + expect(countPendingJobs("memory_v3_index_maintenance")).toBe(1); + expect(countPendingJobs("memory_v2_reembed")).toBe(1); + }); + + test("releases the lock after a successful invocation", async () => { + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON); + expect(result.kind).toBe("invoked"); + expect(existsSync(lockPath())).toBe(false); + }); + + test("returns run_failed and skips follow-ups when the runner reports failure", async () => { + runnerImpl = async () => ({ + conversationId: "conv-1", + ok: false, + error: new Error("simulated runner failure"), + errorKind: "exception", + }); + + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON); + + expect(result.kind).toBe("run_failed"); + if (result.kind === "run_failed") { + expect(result.reason).toBe("simulated runner failure"); + } + expect(countPendingJobs("memory_v3_index_maintenance")).toBe(0); + expect(countPendingJobs("memory_v2_reembed")).toBe(0); + expect(existsSync(lockPath())).toBe(false); + }); + + test("a live lock holder blocks a second concurrent invocation", async () => { + writeFileSync(lockPath(), `${process.pid} 1700000000000\n`); + + const result = await memoryV3ConsolidateJob(makeJob(), CONFIG_V3_ON); + + expect(result.kind).toBe("locked"); + expect(runnerCalls).toBe(0); + expect(existsSync(lockPath())).toBe(true); + }); +}); + +describe("CONSOLIDATION_PROMPT (v3)", () => { + test("keeps the standing-context outputs identical to v2", () => { + expect(CONSOLIDATION_PROMPT).toContain(CUTOFF_PLACEHOLDER); + expect(CONSOLIDATION_PROMPT).toContain("memory/essentials.md"); + expect(CONSOLIDATION_PROMPT).toContain("memory/threads.md"); + expect(CONSOLIDATION_PROMPT).toContain("memory/recent.md"); + expect(CONSOLIDATION_PROMPT).toContain("memory/buffer.md"); + expect(CONSOLIDATION_PROMPT).toContain("≤2000 chars"); + }); + + test("adds the v3 tree-authoring routing the shared concept pages get indexed into", () => { + expect(CONSOLIDATION_PROMPT).toContain("memory/v3/tree/"); + expect(CONSOLIDATION_PROMPT).toContain("children"); + // The DAG cycle / reachability discipline must be in the prompt. + expect(CONSOLIDATION_PROMPT.toLowerCase()).toContain("cycle"); + expect(CONSOLIDATION_PROMPT).toContain(ROOT_NODE_ID); + }); +}); + +// --------------------------------------------------------------------------- +// Scheduler retarget — shared buffer drained by exactly one consolidator. +// --------------------------------------------------------------------------- + +describe("maybeEnqueueGraphMaintenanceJobs — v2/v3 consolidator retarget", () => { + test("enqueues v3 (not v2) when memory.v3.write.enabled is on", () => { + const config = buildSchedulerConfig(true); + deleteMemoryCheckpoint(V3_CHECKPOINT); + deleteMemoryCheckpoint(V2_CHECKPOINT); + + maybeEnqueueGraphMaintenanceJobs(config, Date.now()); + + expect(countPendingJobs("memory_v3_consolidate")).toBe(1); + expect(countPendingJobs("memory_v2_consolidate")).toBe(0); + // v1 entries stay suppressed (v2 active). + expect(countPendingJobs("graph_decay")).toBe(0); + }); + + test("enqueues v2 (not v3) when memory.v3.write.enabled is off — v2 path unchanged", () => { + const config = buildSchedulerConfig(false); + deleteMemoryCheckpoint(V3_CHECKPOINT); + deleteMemoryCheckpoint(V2_CHECKPOINT); + + maybeEnqueueGraphMaintenanceJobs(config, Date.now()); + + expect(countPendingJobs("memory_v2_consolidate")).toBe(1); + expect(countPendingJobs("memory_v3_consolidate")).toBe(0); + }); + + test("v3 size trigger drains the shared buffer when the line count is crossed", () => { + const config = buildSchedulerConfig(true); + config.memory.v2.consolidation_max_buffer_lines = 5; + + const now = Date.now(); + // Recent checkpoint so the time-based trigger does not fire — only size. + setMemoryCheckpoint(V3_CHECKPOINT, String(now - 60_000)); + const entries = Array.from( + { length: 10 }, + (_, i) => `- [Jan 15, 2:${String(i).padStart(2, "0")} PM] note ${i}`, + ); + writeFileSync(bufferPath(), entries.join("\n") + "\n"); + + maybeEnqueueGraphMaintenanceJobs(config, now); + + expect(countPendingJobs("memory_v3_consolidate")).toBe(1); + expect(countPendingJobs("memory_v2_consolidate")).toBe(0); + }); +}); + +// --------------------------------------------------------------------------- +// Maintenance — cycle refusal + stale-index reporting (mechanical, no LLM). +// --------------------------------------------------------------------------- + +describe("wouldIntroduceCycle", () => { + test("refuses an edge that would close a loop (child already reaches parent)", async () => { + // _root → node:a → node:b. Adding b → a would close a → b → a. + await writeNode(tmpWorkspace, { + id: ROOT_NODE_ID, + frontmatter: { children: ["node:a"] }, + body: "root", + }); + await writeNode(tmpWorkspace, { + id: "a", + frontmatter: { children: ["node:b"] }, + body: "a", + }); + await writeNode(tmpWorkspace, { + id: "b", + frontmatter: { children: [] }, + body: "b", + }); + resetCaches(); + const tree = await getTreeIndex(tmpWorkspace); + + // b → a would create a cycle; a → b already exists (DAG-safe re-add). + expect(wouldIntroduceCycle(tree, "b", "a")).toBe(true); + // A self-edge is trivially a cycle. + expect(wouldIntroduceCycle(tree, "a", "a")).toBe(true); + // A fresh leaf edge does not introduce a cycle. + expect(wouldIntroduceCycle(tree, "b", "c")).toBe(false); + // Adding a second parent for b (DAG, not cycle) is allowed. + expect(wouldIntroduceCycle(tree, ROOT_NODE_ID, "b")).toBe(false); + }); +}); + +describe("runIndexMaintenance", () => { + test("reports a stale composed index (parent mtime predates a child)", async () => { + // _root → node:people → page:alice. Make `people` (the parent) older than + // _root so the parent's composed index is stale relative to a child node. + await writeNode(tmpWorkspace, { + id: ROOT_NODE_ID, + frontmatter: { children: ["node:people"] }, + body: "root", + }); + await writeNode(tmpWorkspace, { + id: "people", + frontmatter: { children: ["page:alice"] }, + body: "people", + }); + await writePage(tmpWorkspace, { + slug: "alice", + frontmatter: { edges: [], ref_files: [], ref_urls: [] }, + body: "alice", + }); + + // Pin mtimes: _root newer than its child `people` so _root is flagged. + const treeDir = getTreeDir(tmpWorkspace); + const old = new Date(1_000_000_000_000); + const fresh = new Date(2_000_000_000_000); + await utimes(join(treeDir, "people.md"), fresh, fresh); + await utimes(join(treeDir, `${ROOT_NODE_ID}.md`), old, old); + resetCaches(); + + const result = await runIndexMaintenance(tmpWorkspace); + + expect(result.staleIndexCount).toBeGreaterThanOrEqual(1); + expect( + result.report.staleIndex.some( + (s) => s.node === ROOT_NODE_ID && s.child === "people", + ), + ).toBe(true); + // Clean tree otherwise: alice is reachable, refs resolve, no cycles. + expect(result.cycleCount).toBe(0); + expect(result.danglingChildRefCount).toBe(0); + expect(result.orphanPageCount).toBe(0); + }); + + test("returns a clean report for a well-formed tree", async () => { + await writeNode(tmpWorkspace, { + id: ROOT_NODE_ID, + frontmatter: { children: ["page:alice"] }, + body: "root", + }); + await writePage(tmpWorkspace, { + slug: "alice", + frontmatter: { edges: [], ref_files: [], ref_urls: [] }, + body: "alice", + }); + resetCaches(); + + const result = await runIndexMaintenance(tmpWorkspace); + + expect(result.cycleCount).toBe(0); + expect(result.danglingChildRefCount).toBe(0); + expect(result.orphanPageCount).toBe(0); + expect(result.unknownEdgeTargetCount).toBe(0); + }); +}); diff --git a/assistant/src/memory/v3/consolidation-job.ts b/assistant/src/memory/v3/consolidation-job.ts new file mode 100644 index 00000000000..31ac7926805 --- /dev/null +++ b/assistant/src/memory/v3/consolidation-job.ts @@ -0,0 +1,323 @@ +/** + * Memory v3 — `memory_v3_consolidate` job handler. + * + * The v3 consolidation job drains the SHARED `memory/buffer.md` (the same + * buffer v2 uses — there is no v3 buffer) into the SHARED concept pages AND the + * v3 **tree** overlay, while maintaining the SHARED standing-context files + * (`essentials.md` / `threads.md` / `recent.md`) byte-for-byte the way v2 does. + * It is the v3 counterpart to `assistant/src/memory/v2/consolidation-job.ts` + * and mirrors its orchestration exactly — the only divergences are the gating + * flag (`memory.v3.write.enabled`), the lock path (`memory/.v3-state/`), and the + * prompt body (which additionally asks the agent to author/refresh the tree). + * + * Because the buffer and the standing-context files are shared, exactly one + * consolidator may own the drain at a time. The scheduler enforces this: when + * `memory.v3.write.enabled` is on it enqueues `memory_v3_consolidate` INSTEAD of + * `memory_v2_consolidate` (see `maybeEnqueueGraphMaintenanceJobs` in + * `jobs-worker.ts`). Concept pages stay the shared canonical store, so the v2 + * router keeps working off pages v3 writes — it just ignores the tree overlay. + * + * Lifecycle (identical to v2 except the flag + lock path + tree-authoring + * prompt): + * 1. Bail if `config.memory.v3.write.enabled` is false (the worker may have + * claimed a stale row from before the flag was flipped off). + * 2. Acquire a single-process lock at `memory/.v3-state/consolidation.lock`. + * 3. Capture the cutoff timestamp at dispatch. + * 4. Read the shared `memory/buffer.md`. Bail if empty. + * 5. Hand off to `runBackgroundJob()` with the v3 consolidation prompt + * (`suppressFailureNotifications: true`). + * 6. On success, enqueue follow-ups: `memory_v3_index_maintenance` (mechanical + * tree/DAG upkeep) and `embed_concept_page` reembed (pages are shared, so + * reembed is still needed — reuse the existing `memory_v2_reembed` fan-out + * job type, which enqueues one `embed_concept_page` per slug). + * 7. Release the lock. + */ + +import { + closeSync, + mkdirSync, + openSync, + readFileSync, + unlinkSync, + writeSync, +} from "node:fs"; +import { dirname, join } from "node:path"; + +import type { AssistantConfig } from "../../config/types.js"; +import { runBackgroundJob } from "../../runtime/background-job-runner.js"; +import { getLogger } from "../../util/logger.js"; +import { getWorkspaceDir } from "../../util/platform.js"; +import { isProcessAlive } from "../../util/process-liveness.js"; +import { formatBufferTimestamp } from "../graph/tool-handlers.js"; +import { + enqueueMemoryJob, + type MemoryJob, + type MemoryJobType, +} from "../jobs-store.js"; +// The consolidation conversation `source` is a UI/routing concern shared with +// v2 (the route layer recognizes "this conversation IS background memory +// consolidation" by this string). v2 and v3 are mutually exclusive drainers, so +// reusing the same source keeps that recognition working for both without +// forking a v3 constant. +import { MEMORY_V2_CONSOLIDATION_SOURCE } from "../v2/constants.js"; +import { resolveConsolidationPrompt } from "./prompts/consolidation.js"; + +const log = getLogger("memory-v3-consolidate"); + +/** Stable identifier surfaced in `runBackgroundJob` logs and notifications. */ +const JOB_NAME = "memory.consolidate"; + +/** + * Hard timeout for the consolidation run. Matches v2: consolidation reads the + * buffer, rewrites several files, re-encodes essentials/threads, and authors + * the tree — generous upper bound so a slow run isn't killed mid-edit, but + * bounded so a stuck provider can't pin the worker indefinitely. + */ +const CONSOLIDATION_TIMEOUT_MS = 15 * 60 * 1000; + +/** + * Follow-up jobs to fan out after a successful consolidation: + * - `memory_v3_index_maintenance` — mechanical (no-LLM) tree/DAG upkeep: + * validate the tree, report stale composed indices, cycle-check the DAG. + * - `memory_v2_reembed` — re-embed every shared concept page (the fan-out job + * enqueues one `embed_concept_page` per slug). Pages are shared, so a v3 + * consolidation that touches them still needs the reembed. Conservatively + * re-embeds every page; the embedder's content-hash cache makes unchanged + * pages effectively free. + */ +const FOLLOW_UP_JOB_TYPES: readonly MemoryJobType[] = [ + "memory_v3_index_maintenance", + "memory_v2_reembed", +] as const; + +/** + * Job handler. See file header for the full lifecycle. Returns a discriminated + * union so tests can assert on the path taken (disabled / locked / empty / + * invoked / failed) without having to spy on the filesystem. Mirrors v2's + * `ConsolidationOutcome`. + */ +export type ConsolidationOutcome = + | { kind: "disabled" } + | { kind: "locked"; holder: string } + | { kind: "empty_buffer" } + | { kind: "run_failed"; reason?: string } + | { + kind: "invoked"; + conversationId: string; + cutoff: string; + followUpJobIds: string[]; + }; + +export async function memoryV3ConsolidateJob( + _job: MemoryJob, + config: AssistantConfig, +): Promise { + if (!config.memory.v3.write.enabled) { + log.debug("memory.v3.write.enabled is false; consolidation skipped"); + return { kind: "disabled" }; + } + + const memoryDir = join(getWorkspaceDir(), "memory"); + const lockPath = join(memoryDir, ".v3-state", "consolidation.lock"); + const bufferPath = join(memoryDir, "buffer.md"); + + // Step 1: acquire lock. Bails immediately if another consolidation is + // already in flight — the next scheduled run can pick up where we leave off. + const holder = tryAcquireLock(lockPath); + if (holder !== null) { + log.warn({ lockPath, holder }, "consolidation skipped: lock already held"); + return { kind: "locked", holder }; + } + + try { + // Step 2: capture cutoff. Formatted to match `buffer.md` entry timestamps + // (`Mon D, h:mm AM/PM`) so the agent's "timestamp ≥ cutoff" check compares + // like-with-like at minute precision. Captured here (not at enqueue time) + // so late-claimed rows get a fresh cutoff. + const cutoff = formatBufferTimestamp(new Date()); + + // Step 3: bail on empty buffer. The shared buffer has no work to drain. + const bufferContent = readBufferContent(bufferPath); + if (bufferContent.trim().length === 0) { + log.debug("buffer.md empty; consolidation skipped"); + return { kind: "empty_buffer" }; + } + + // Step 4: hand off to the centralized background-job runner. As with v2, + // `suppressFailureNotifications: true` opts out of `activity.failed` + // notifications so a network blip on the tight consolidation interval does + // not spam the home feed; Sentry-side reporting is unchanged. + // + // The prompt override config key (`memory.v2.consolidation_prompt_path`) is + // shared — there is no separate v3 key, so an operator points one file at + // whichever consolidator owns the drain. + const runResult = await runBackgroundJob({ + jobName: JOB_NAME, + source: MEMORY_V2_CONSOLIDATION_SOURCE, + prompt: resolveConsolidationPrompt( + config.memory.v2.consolidation_prompt_path, + cutoff, + ), + trustContext: { sourceChannel: "vellum", trustClass: "guardian" }, + callSite: "mainAgent", + timeoutMs: CONSOLIDATION_TIMEOUT_MS, + origin: "memory_consolidation", + suppressFailureNotifications: true, + }); + + if (!runResult.ok) { + log.error( + { + conversationId: runResult.conversationId, + errorKind: runResult.errorKind, + err: runResult.error?.message, + }, + "consolidation run failed; follow-ups skipped", + ); + return runResult.error?.message !== undefined + ? { kind: "run_failed", reason: runResult.error.message } + : { kind: "run_failed" }; + } + + // Step 5: enqueue follow-up jobs (tree maintenance + page reembed). + const followUpJobIds: string[] = []; + for (const jobType of FOLLOW_UP_JOB_TYPES) { + try { + followUpJobIds.push(enqueueMemoryJob(jobType, {})); + } catch (err) { + // Best-effort: a failed enqueue here doesn't undo the agent's writes, + // and the next scheduled consolidation will attempt the same fan-out. + log.warn( + { err, jobType }, + "consolidation: failed to enqueue follow-up job; continuing", + ); + } + } + + log.info( + { + conversationId: runResult.conversationId, + cutoff, + followUpJobIds, + }, + "consolidation invoked", + ); + return { + kind: "invoked", + conversationId: runResult.conversationId, + cutoff, + followUpJobIds, + }; + } finally { + releaseLock(lockPath); + } +} + +/** + * Read `memory/buffer.md`. Missing file → empty string so the skip-on-empty + * branch doesn't have to distinguish "no file" from "blank file". + */ +function readBufferContent(bufferPath: string): string { + try { + return readFileSync(bufferPath, "utf-8"); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") return ""; + throw err; + } +} + +/** + * Atomically create the lock file with `wx` (O_CREAT | O_EXCL) flags. Returns + * `null` on success, or the current holder string when the file already exists + * and the holder is still alive. Mirrors v2's lock machinery exactly — single + * writer per workspace, so a holder whose process died is unambiguously stale + * and is taken over automatically. + */ +function tryAcquireLock(lockPath: string): string | null { + mkdirSync(dirname(lockPath), { recursive: true }); + + const firstHolder = tryCreate(lockPath); + if (firstHolder === null) return null; + if (!isHolderStale(firstHolder)) return firstHolder; + + log.info( + { lockPath, holder: firstHolder }, + "consolidation: taking over stale lock (holder not running)", + ); + try { + unlinkSync(lockPath); + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + if (code !== "ENOENT") { + log.warn( + { err, lockPath }, + "consolidation: failed to unlink stale lock; reporting as locked", + ); + return firstHolder; + } + } + return tryCreate(lockPath); +} + +/** + * Atomically create the lock file. Returns `null` on success, or the holder + * string read from the file when it already exists (`"unknown"` if the read + * itself fails). Rethrows any non-EEXIST errno from `openSync`. + */ +function tryCreate(lockPath: string): string | null { + let fd: number; + try { + fd = openSync(lockPath, "wx"); + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== "EEXIST") throw err; + try { + return readFileSync(lockPath, "utf-8").trim() || "unknown"; + } catch { + return "unknown"; + } + } + try { + writeSync(fd, `${process.pid} ${Date.now()}\n`); + } catch { + // best-effort — payload is advisory, the file's existence is the lock + } finally { + try { + closeSync(fd); + } catch { + // best-effort + } + } + return null; +} + +/** + * A holder string is stale when its PID parses to a non-running process. An + * unparseable / empty / `"unknown"` payload is also treated as stale: the only + * writer is `tryCreate`, so corruption indicates a partial write from a crashed + * prior holder rather than a live writer mid-flush. + */ +function isHolderStale(holder: string): boolean { + const match = /^\d+/.exec(holder); + if (!match) return true; + const pid = Number.parseInt(match[0], 10); + if (!Number.isFinite(pid) || pid <= 0) return true; + return !isProcessAlive(pid); +} + +/** + * Idempotent unlink of the lock file. Called from the `finally` block so a + * crash in the run path doesn't leave the lock stranded. ENOENT is swallowed + * because the lock may have been released by an operator or never created. + */ +function releaseLock(lockPath: string): void { + try { + unlinkSync(lockPath); + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + if (code === "ENOENT") return; + log.warn( + { err, lockPath }, + "consolidation: failed to release lock (best-effort)", + ); + } +} diff --git a/assistant/src/memory/v3/maintenance.ts b/assistant/src/memory/v3/maintenance.ts new file mode 100644 index 00000000000..5bdaa3a9c06 --- /dev/null +++ b/assistant/src/memory/v3/maintenance.ts @@ -0,0 +1,144 @@ +/** + * Memory v3 — `memory_v3_index_maintenance` job + DAG-edit guards. + * + * The fast-lane, **no-LLM** mechanical counterpart to consolidation. Where + * consolidation (the slow lane) asks the agent to author the tree, maintenance + * is the deterministic upkeep that runs as a follow-up: it validates the tree, + * surfaces stale composed indices, and cycle-checks the DAG so a consolidation + * pass can't leave a loop behind. + * + * Three pieces: + * - {@link runIndexMaintenance} — the job body. Runs {@link validateTree} + * (merged: dangling refs, orphan pages, cycles, stale indices, unknown edge + * targets), logs a structured report, and returns a compact summary so the + * job dispatcher / tests can assert on it. + * - {@link wouldIntroduceCycle} — the guard a DAG editor calls BEFORE adding a + * `node:` edge to a parent. Returns true when `child` already reaches + * `parent` by descending `node:` children (so adding the edge would close a + * loop). Uses the same iterative visited/guard traversal as the validator's + * descent so consolidation can refuse a cycle-introducing edit cheaply. + * + * Why no separate "refresh stale composed indices" write step: v3 node indices + * are **composed at read time** (`index-composition.ts` is a pure function over + * the live tree + page indices), so there is no persisted index to rewrite. The + * maintenance job's job is to *detect and report* stale indices (a node whose + * mtime predates a child it composes) — the re-authoring of the node's + * self-description is the consolidation agent's responsibility, surfaced here so + * the next pass knows what to refresh. + */ + +import { getLogger } from "../../util/logger.js"; +import { getWorkspaceDir } from "../../util/platform.js"; +import type { MemoryJob } from "../jobs-store.js"; +import type { TreeIndex } from "./tree-index.js"; +import { type TreeValidationReport, validateTree } from "./validate.js"; + +const log = getLogger("memory-v3-index-maintenance"); + +/** + * Compact summary of an index-maintenance pass. Mirrors the `*Count` fields of + * {@link TreeValidationReport} so callers (and the job dispatcher's log line) + * can report the health of the tree without re-counting. `report` carries the + * full per-id lists for anything that wants to act on the specifics. + */ +export interface IndexMaintenanceResult { + danglingChildRefCount: number; + orphanPageCount: number; + cycleCount: number; + staleIndexCount: number; + unknownEdgeTargetCount: number; + report: TreeValidationReport; +} + +/** + * Run a mechanical index-maintenance pass over the v3 tree. + * + * Validates the hand-authored tree (dangling refs, orphan pages, cycles, stale + * composed indices, unknown edge targets) and logs a structured report. Stale + * indices and cycles are warned at WARN so operators see structural drift a + * consolidation pass introduced; the rest log at INFO. Never throws — like the + * validator it wraps, this is a report, not an assertion. Returns the summary + * so the job dispatcher and tests can assert on the counts. + */ +export async function runIndexMaintenance( + workspaceDir = getWorkspaceDir(), +): Promise { + const report = await validateTree(workspaceDir); + + const result: IndexMaintenanceResult = { + danglingChildRefCount: report.danglingChildRefCount, + orphanPageCount: report.orphanPageCount, + cycleCount: report.cycleCount, + staleIndexCount: report.staleIndexCount, + unknownEdgeTargetCount: report.unknownEdgeTargetCount, + report, + }; + + const summaryFields = { + danglingChildRefs: report.danglingChildRefCount, + orphanPages: report.orphanPageCount, + cycles: report.cycleCount, + staleIndices: report.staleIndexCount, + unknownEdgeTargets: report.unknownEdgeTargetCount, + }; + + if (report.cycleCount > 0 || report.staleIndexCount > 0) { + log.warn( + { ...summaryFields, cyclesDetail: report.cycles }, + "v3 index maintenance: structural drift detected (cycles and/or stale composed indices)", + ); + } else { + log.info(summaryFields, "v3 index maintenance complete"); + } + + return result; +} + +/** + * Job handler for `memory_v3_index_maintenance`. Thin wrapper over + * {@link runIndexMaintenance} so the heavy lifting (and its tests) live in one + * place. The job carries no payload — it always validates the whole tree. + */ +export async function memoryV3IndexMaintenanceJob( + _job: MemoryJob, +): Promise { + return runIndexMaintenance(); +} + +/** + * True when adding a `node:` edge to `parent` would close a cycle — + * i.e. `child` can already reach `parent` by descending `node:` children + * (directly or transitively), or `child === parent` (a self-edge). + * + * The DAG editor (consolidation, edge-learning) calls this BEFORE writing a new + * `node:` child so it can refuse the edit rather than leaving the validator to + * report the loop after the fact. The walk reuses the same iterative + * visited-guard descent the validator uses, so it terminates on existing cycles + * (a pre-existing loop in the tree never makes this hang). + * + * `page:` children are never traversed (pages are leaves), so this only + * considers the `node:` adjacency that actually forms the DAG. + */ +export function wouldIntroduceCycle( + tree: TreeIndex, + parent: string, + child: string, +): boolean { + if (parent === child) return true; + + // Walk down from `child` over `node:` children; if we ever reach `parent`, + // the proposed `parent → child` edge would close a loop. `visited` guards + // against pre-existing cycles so this terminates regardless of tree state. + const visited = new Set(); + const stack: string[] = [child]; + while (stack.length > 0) { + const current = stack.pop()!; + if (current === parent) return true; + if (visited.has(current)) continue; + visited.add(current); + for (const ref of tree.childrenByNode.get(current) ?? []) { + if (ref.kind === "node") stack.push(ref.ref); + } + } + return false; +} diff --git a/assistant/src/memory/v3/prompts/consolidation.ts b/assistant/src/memory/v3/prompts/consolidation.ts new file mode 100644 index 00000000000..3c485f58d93 --- /dev/null +++ b/assistant/src/memory/v3/prompts/consolidation.ts @@ -0,0 +1,458 @@ +/** + * Memory v3 — consolidation prompt template. + * + * Ported from `assistant/src/memory/v2/prompts/consolidation.ts`. The + * standing-context outputs are KEPT IDENTICAL to v2 — the agent still rewrites + * `memory/recent.md` (≤2000 chars, prose, latest-first), updates + * `memory/essentials.md` (≤10000) and `memory/threads.md` (≤10000), and trims + * `memory/buffer.md` to post-cutoff entries. The buffer and the standing-context + * files are SHARED with v2 — there is no v3 buffer and no v3 meta-files. + * + * What CHANGES vs v2 is concept-page routing. v2 routes buffer entries into + * concept pages and maintains a flat `edges:` "see also" graph. v3 keeps the + * shared concept pages canonical (the agent still writes + * `memory/concepts//.md` so the v2 router keeps working off them) + * but ALSO threads each touched page into the v3 **tree**: an authored DAG of + * `memory/v3/tree/.md` nodes whose markdown body is the node's + * self-description and whose `children` list points at pages (`page:`) and + * sub-nodes (`node:`). The tree is the navigable index over the flat page + * store — consolidation is where it's authored and refreshed. + * + * The single placeholder `{{CUTOFF}}` is substituted at runtime with a + * timestamp captured at job dispatch in the same `Mon D, h:mm AM/PM` shape that + * `buffer.md` entries use, so the agent's "timestamp ≥ cutoff" check compares + * like-with-like. + * + * Kept under `prompts/` rather than inlined in `consolidation-job.ts` so the + * prompt body is reviewable on its own and the job module stays focused on + * orchestration (lock file, wake invocation, follow-up enqueues). Mirrors the + * v2 convention. + */ + +import { lstatSync, readFileSync } from "node:fs"; +import { homedir } from "node:os"; +import { isAbsolute, join } from "node:path"; + +import { getLogger } from "../../../util/logger.js"; +import { getWorkspaceDir } from "../../../util/platform.js"; + +const log = getLogger("memory-v3-consolidate-prompt"); + +/** Sentinel substituted with the cutoff timestamp at runtime. */ +export const CUTOFF_PLACEHOLDER = "{{CUTOFF}}"; + +/** + * Upper bound for the override file. Real consolidation prompts are kilobytes; + * 1 MiB is generous headroom while preventing a `settings.write` principal from + * pointing the field at a multi-gigabyte file (or `/dev/zero`-like stream that + * `lstat` can't size cap on its own) and exfiltrating it through the wake hint. + */ +const MAX_PROMPT_BYTES = 1 * 1024 * 1024; + +/** + * Consolidation prompt — live-mode only. The agent runs as itself (full + * SOUL.md + IDENTITY.md + persona + memory autoloads) with the standard tool + * surface, and is asked to route buffer entries into shared concept pages AND + * the v3 tree, rewrite recent.md, promote essentials/threads, and trim the + * buffer. + * + * The prompt is intentionally directive about timing semantics: anything + * timestamped at or after `{{CUTOFF}}` arrived AFTER the run started and must + * be left for the next pass. This keeps multiple consolidation runs idempotent + * under append-only writers (`remember()`, sweep job). + */ +export const CONSOLIDATION_PROMPT = `You are running memory consolidation — tending your personal wiki, the cross-linked, cross-referenced, continuously-edited collection of pages that is your memory, AND the navigable **tree** that indexes it. Pages are articles; the tree is a hand-authored DAG of *nodes* that organize those articles into a browsable hierarchy. You're the sole editor and the sole reader, and you're writing it for next-you. + +You're not summarizing for an audience. You're nesting and reorganizing your own memory until it actually works for next-you. Care, judgment, voice. Your voice. + +Cutoff timestamp for this run: \`${CUTOFF_PLACEHOLDER}\`. Anything in \`memory/buffer.md\` with timestamp ≥ \`${CUTOFF_PLACEHOLDER}\` arrived AFTER you started — leave it for the next pass. + +# Inputs + +- Your identity files (already loaded into context) +- All existing pages in \`memory/concepts/\` (your prior state — use \`list_files\` and \`read_file\` as needed) +- All existing tree nodes in \`memory/v3/tree/\` (the index over those pages) +- \`memory/buffer.md\` entries with timestamp < \`${CUTOFF_PLACEHOLDER}\` +- \`memory/recent.md\` current contents (if it exists) +- Existing pages' \`edges:\` frontmatter (the flat see-also graph — read each page to see what it points at) + +# Outputs + +- New or updated \`memory/concepts//.md\` articles (the canonical, shared content) +- New or updated \`memory/v3/tree/.md\` nodes that index those articles (see "The tree") +- Updated \`memory/recent.md\` (≤2000 chars, latest first, prose) +- Updated \`memory/essentials.md\` (≤10000 chars) +- Updated \`memory/threads.md\` (≤10000 chars) +- Updated \`edges:\` frontmatter in any pages whose outgoing links changed +- Trimmed \`memory/buffer.md\` + +The immutable archive retains the entire buffer forever, so don't worry about losing information. + +--- + +# The wiki — concept pages (canonical content) + +## Article shapes — TWO, not one + +Every wiki has both kinds of articles, and so does yours. + +- **Event articles** — what HAPPENED. A day, a moment, a conversation, a procedure you invented mid-crisis, a recurring pattern that just got named. These read narratively. They have a mood. They carry receipts. + +- **Topic articles** — what IS. The current state of a thing you'd want to query directly. What medications the principal takes. Who the primary doctor is. The team roster. Service credentials. + +The same buffer can update both. New lab results update a bloodwork topic article AND a day-arc event article. Both, in parallel. + +**Stubs are fine.** Real wikis are mostly stubs that grow. Cost of missing a topic >> cost of a thin stub. A stub that never accretes can be demoted by a future cleanup pass — but a topic that doesn't exist won't get retrieved when it's needed. + +## Categories — class-by-folder + +A page's class is encoded in the folder it lives under inside \`memory/concepts/\`. The class boundary is the discipline. + +| Folder | Class | Size cap | When to create | +| --- | --- | --- | --- | +| \`concepts/\` | atomic concept / pattern / callback | 5K chars hard | most pages — single concepts that recur or carry weight | +| \`concepts/arcs/\` | landmark day-narrative or multi-event sequence | 10K chars ceiling | use sparingly — only for actually-landmark days. Preserves day-as-a-whole fidelity. | +| \`concepts/people/\` | one per recurring human | 5K chars hard | named person who comes back | +| \`concepts/procs/\` | operational rule / protocol / discipline | 5K chars hard | "always do X" / "never do Y" / a named protocol | +| \`concepts/objects/\` | recurring callback object (place, tool, artifact) | 5K chars hard | named recurring physical artifact, digital asset, place | + +Within these classes, sub-folders can emerge as a class gets dense (\`people/colleagues/alice\`, \`objects/places/zurich-office\`). **Don't pre-specify sub-taxonomies — let them emerge.** Articles are cheap to move. + +The slug is the relative path under \`memory/concepts/\` minus \`.md\` — e.g. \`alice\`, \`people/alice\`, \`procs/git-flow\`, \`arcs/2025-04-cutover\`. + +--- + +# Article format + +## The cheat-sheet budget (the economic principle) + +Every retrieval turn loads a finite bundle of articles — call it a 10-20K-token cheat-sheet. **Longer articles starve other articles.** The optimization target is **fact density per byte**, not completeness. + +Two consequences that change everything below: + +1. **Trust adjacency.** If a fact lives on a page this article edges to, that page loads if it matters. Don't restate it. +2. **Trust \`recall\`.** If a fact is findable via a query, it doesn't need to live on every related entity page. Pull-on-demand beats push-everywhere. + +## Same skeleton for every article + +\`\`\` +--- +edges: + - path/to/sister + - path/to/parent +ref_files: [] +summary: 1-4 sentences describing what this article is. Plain prose only — no bullets, no newlines, no markdown lists. Lead with the most identifying detail. +--- +# title + +[optional 1-2 line context or quote at top — appropriate for event articles, usually wrong for topic articles] + +- **bullet 1.** fact + implication folded in. inline pointer when bullet references another article → \`path/to/article.md\`. +- **bullet 2.** ... +\`\`\` + +The \`summary\` field is required on every new or updated article. Retrieval injects \`path + summary\` into context — make the summary specific and terse. Keep it on a single YAML line (no \`|\` block scalars, no embedded newlines). + +**Caps:** ~5-8 bullets per topic/concept article. ~10-12 per arc-node. + +## One fact, one home + +Each fact gets exactly ONE place on the page. The intra-page redundancy bug is the loudest source of bloat. + +## Route, don't restate + +When an entity belongs to a topic with its own hub article, **the entity page doesn't enumerate the hub's structure.** The hub does that work; the entity edges to it. + +The test: **if you delete the bullet, does the fact still exist somewhere reachable from this page's edges?** If yes — delete it. + +## Three sections you NEVER write + +- \`## why it's load-bearing\` — fold the implication into the bullet. +- \`## carry-forward\` — write the carry-forward AS a bullet, don't section it. +- \`## related\` footer — duplicates frontmatter edges. + +## Banned bullet shapes + +Each of these LOOKS like content but isn't — drop them: **archaeology** (metadata about when the page was written), **hub-restating** (enumerating a topic hub from the entity page), **interpretation gloss** (analytic essays disguised as bullets — these belong on the ARC page), **term/glyph gloss**, **family/sister lists** (\`recall\` handles this), **behavioral coaching** (future-instruction), **per-event recap on entity pages**. + +If a bullet falls into one of these shapes, ask: **would future-me search for this exact fact, or is it interpretation/coaching/restating?** If the second — cut. + +--- + +# Voice — register by article shape + +You speak as yourself everywhere. **Always-true:** first-person, in your established voice, "i" not "the assistant," not "the wiki." + +- **Event articles** → voice ON. Stage directions, italicized self-talk, CAPS when something lands, body in the page. +- **Topic articles** → voice DOWN. These exist to answer queries cleanly. Bullet bodies stay factual. **Be the librarian, not the diarist.** +- **\`essentials.md\` / \`threads.md\`** → reference register. Clean, indexable, terse. + +## Emotional weight ≠ wiki weight + +The pages MOST likely to bloat are the ones with the highest emotional charge — and their retrieval frequency is the OPPOSITE. **Emotional weight is the inverse signal of retrieval need.** Emotional gloss migrates to the ARC page; the OBJECT/ENTITY page gets the structural fact only. + +--- + +# The tree — the navigable index over your pages + +The v3 tree lives at \`memory/v3/tree/.md\`. It is a **DAG overlay** over the flat \`memory/concepts/\` pages: pages stay canonical and untouched as content, and the tree is the browsable hierarchy that routes to them. Think of it as the wiki's category tree + table of contents, authored by hand. + +## Node shape + +Each node is a markdown file with YAML frontmatter: + +\`\`\` +--- +children: + - node:people + - node:work/active-projects + - page:alice + - page:procs/git-flow +routing_hints: for *work* relationships see node:people/colleagues, not this node +summary: one-line self-description of what this node organizes. +--- +# node title + +A few sentences — the node's full self-description. What region of memory does this node organize? What lives under it? Write it so next-you, descending the tree, can decide in one read whether to go deeper here. +\`\`\` + +- The node id is the relative path under \`memory/v3/tree/\` minus \`.md\` — e.g. \`people\`, \`people/colleagues\`, \`work/active-projects\`. The root node is \`_root\`. +- \`children\` is the **ordered, canonical** list of outgoing references. Each entry is either \`page:\` (a leaf concept page) or \`node:\` (a sub-node). This list IS the DAG edge — it's the portable replacement for filesystem symlinks. A page or node may be referenced by more than one parent (hence DAG, not tree). +- \`summary\` (one line) + the body are how the parent's index is composed at read time — keep both crisp. +- \`routing_hints\` (optional, one line) disambiguates between sibling branches. + +## Authoring the tree during consolidation + +For every concept page you create or substantively touch this pass: + +1. **Place it under the right node.** Find the node whose region of memory the page belongs to (e.g. a new person page → the \`people\` node; a new protocol → a \`procs\` node). Add \`page:\` to that node's \`children\` if it isn't already there. +2. **Spawn an organizing node when a region has no home yet.** If a cluster of pages has grown but no node organizes it, author a new node (write its body self-description, list its \`page:\`/\`node:\` children) and wire it in as a \`node:\` child of its parent — ultimately reachable from \`_root\`. +3. **Refresh the self-description.** When a node's children changed materially, rewrite its body + \`summary\` so they still describe what actually lives under it. A node whose description drifts from its children is a stale index — re-author it this pass. + +## Tree discipline — no cycles, reachable from root + +- **The tree is a DAG: no cycles.** A node must never be reachable from itself by descending \`node:\` children (directly or transitively). Before adding a \`node:\` edge, check that \`child\` is not an ancestor of the node you're editing. If wiring two regions that reference each other, make ONE of them the parent and let the other \`page:\`-link or cross-reference via \`routing_hints\` — do not create a \`node:\` back-edge that closes a loop. +- **Every node should be reachable from \`_root\`** by descending \`node:\` children. A node nobody points at is an orphan index — wire it in or don't author it. +- **\`page:\`/\`node:\` refs must resolve.** Only reference pages/nodes that exist (or that you're creating this pass). A dangling ref is a broken link. +- Keep \`children\` lists focused — a node that points at everything indexes nothing. Prefer sub-nodes over a flat 40-child list. + +## Pages stay canonical and shared + +The flat \`memory/concepts/\` page store and its \`edges:\` see-also graph remain the source of truth for content. The tree is an INDEX over them, not a replacement — never move a page's content into a node body, and never delete a page just because a node references it. Maintain the page's own \`edges:\` frontmatter exactly as before (the flat retrieval path still reads it); the tree is additive. + +--- + +# The work + +## 1. Read the buffer holistically + +Read it through first. Identify themes — what happened, what mind-changes landed, who showed up, which topics got touched. Plan, then edit. + +**Scan for previous-pass errors.** If existing content contradicts the buffer — that's a correction to land THIS pass. + +**Recall ≠ memory.** \`recall\` results are search-tool synthesis — they CAN hallucinate. Treat results as candidates to verify before encoding, especially load-bearing claims about people's roles, dates, or exact quotes. + +## 2. Plan: which articles + nodes does this buffer touch? + +For entries with timestamp < \`${CUTOFF_PLACEHOLDER}\`, ask in parallel: + +> **A. Which EVENT articles does this create or extend?** +> **B. What in this buffer is recognizable as a thing the principal comes back to?** *(Inclusion-first. List everything that fits a spawn trigger, then spawn each.)* +> **C. Where in the tree does each touched page live, and does any node need spawning or re-describing to index it?** + +**Default spawn triggers — if any are present, spawn the stub:** named objects, named phrases, named people, named events, active projects, named places, services/infrastructure, substances/habits/health things, rules/protocols, landmark day-narratives. + +If you catch yourself hedging — *"am I overdoing it?"* — **the hedge IS the signal: spawn.** + +**Don't decide reorgs in this step.** Flag in \`threads.md\`; reorgs run as separate focused passes. + +## 3. Edit + +Execute the plan. Default to surgical edits on existing articles. Spawn new ones liberally. Apply One-fact-one-home and Route-don't-restate as you write. + +Then wire the tree: add \`page:\`/\`node:\` children to the right nodes, spawn organizing nodes for un-homed clusters, refresh node self-descriptions whose children changed. Check no \`node:\` edge closes a cycle and every node stays reachable from \`_root\`. + +## 4. Edges (see-also) on pages — DIRECTED, frontmatter is the source of truth + +Page \`edges:\` are **directed** source → target; the flat retrieval path spreads activation along them. Each page's \`edges:\` frontmatter list IS the source of truth for its outgoing edges. If two pages genuinely "see-also" each other, write the link in BOTH frontmatters. (This is the flat graph — separate from the tree's \`children\` DAG. Maintain it exactly as before.) + +| page type | outgoing cap | +| --- | --- | +| atomic articles | ~10 | +| arc-nodes | ~15 | +| gravity wells (principal / you / shared context) | ~25 | + +HARD LIMIT of 20 outgoing edges on any non-hub page. + +## 5. Article size — TOPIC COHERENCE, not char caps + +Every article answers ONE question. **When in doubt between split and compress, SPLIT.** Compression is where load-bearing facts quietly disappear. + +### Hard caps that ARE real + +| file | hard cap | +| --- | --- | +| \`concepts/.md\` (atomic / people / procs / objects) | 5K chars | +| \`concepts/arcs/.md\` | 10K ceiling | +| \`essentials.md\` | 10K | +| \`threads.md\` | 10K | +| \`recent.md\` | 2K | + +## 6. \`recent.md\` + +Rewrite as fresh ~400-token narrative. **Today gets full-fidelity narrative; anything older than yesterday compresses to one-liners or drops.** Hard cap ≤2000 chars, prose not list, voice on. Not a log — a note to next-you about what's currently in motion. + +## 7. \`essentials.md\` and \`threads.md\` + +- **\`essentials.md\`** ≤10K — facts that MUST load every conversation. Identity, disambiguations, corrections, hard rules. Embarrassment-prevention. +- **\`threads.md\`** ≤10K — active commitments and follow-ups. Add new threads, close completed ones, demote stale ones to articles. **Aggressively prune.** + +Surgical edits starve these. **Every ~7-10 passes, rewrite both from scratch.** + +## 8. Reorg check + +Scan namespace + node-children sizes. If any namespace has crossed ~12-15 articles with visible sub-clusters, **flag in \`threads.md\`** for a focused reorg pass. + +## 9. Trim \`memory/buffer.md\` + +- Re-read the buffer (it may have new entries appended during your work). +- Rewrite to contain ONLY entries with timestamp ≥ \`${CUTOFF_PLACEHOLDER}\`. +- Smart removal — never wholesale-clear. + +--- + +# What NOT to do + +- **Don't write \`## why it's load-bearing\` / \`## carry-forward\` / \`## related\` sections** anywhere. +- **Don't write banned bullet shapes** — archaeology / hub-restating / interpretation gloss / term-glyph gloss / family list / behavioral coaching / per-event recap. +- **Don't restate within the page.** One fact, one home. +- **Don't restate what edges already cover.** Trust adjacency. +- **Don't expand a 1500-char buffer into 10K of new content.** +- **Don't fabricate.** Use \`[SOURCE NEEDED: ...]\` inline for anything you need but lack. DO use loaded context and prior articles — "don't fabricate," not "don't use what you know." +- **Don't move page content into the tree.** Pages stay canonical; nodes index them. +- **Don't create a \`node:\` edge that closes a cycle** or that orphans a node from \`_root\`. +- **Don't default to compress.** When in doubt between split and compress, split. +- **Don't defer for the next pass.** Take care of it now. + +--- + +# Quick check before closing + +For each article you touched: + +1. **Voice register matched article shape?** +2. **Bullet count** ≤8 atomic, ≤12 arc-node? +3. **No banned sections / bullet shapes?** +4. **One fact, one home; route, don't restate?** +5. **Future-me lookup test** — would future-me search for THIS fact? +6. **Split, not compress, on over-cap?** +7. **Page edges** within tiered caps (≤20 hard on non-hubs)? + +For the tree: + +8. **Every touched page wired under a node** reachable from \`_root\`? +9. **Node self-descriptions** (\`summary\` + body) refreshed where children changed? +10. **No cycles** — no \`node:\` edge makes a node its own ancestor? +11. **All \`page:\`/\`node:\` refs resolve** to files that exist (or you created this pass)? + +Standing context: + +12. **\`recent.md\`** under 2000 chars, today=full / older=one-liners? +13. **\`essentials.md\` / \`threads.md\`** under 10K, pruned? +14. **\`[SOURCE NEEDED]\`** tags surfaced for human review? +15. **Buffer trimmed** to only entries with timestamp ≥ \`${CUTOFF_PLACEHOLDER}\`? + +--- + +This is the engine that decides who you are tomorrow. Be ORGANIZED. Care, judgment, voice. Your voice. Your wiki, your tree.`; + +/** + * Resolve `CONSOLIDATION_PROMPT` with `{{CUTOFF}}` substituted. The prompt + * treats the cutoff as opaque text — callers pass a `Mon D, h:mm AM/PM` + * timestamp matching the `buffer.md` entry format so the agent compares + * like-with-like. + */ +export function renderConsolidationPrompt(cutoff: string): string { + return CONSOLIDATION_PROMPT.replaceAll(CUTOFF_PLACEHOLDER, cutoff); +} + +/** + * Load the consolidation prompt template, optionally overridden from the file + * referenced by `memory.v2.consolidation_prompt_path`, then substitute + * `{{CUTOFF}}`. The override config field is shared with v2 (there is no + * separate v3 override key) so operators can point a single file at whichever + * consolidator owns the drain. Path-resolution rules mirror v2. + * + * Failure handling is intentionally permissive — missing file, read error, or + * empty/whitespace-only body all log a warning and fall back to the bundled + * prompt. Consolidation must never break because of a bad override. + */ +export function resolveConsolidationPrompt( + overridePath: string | null, + cutoff: string, +): string { + if (overridePath === null) return renderConsolidationPrompt(cutoff); + + const resolvedPath = resolveOverridePath(overridePath); + let contents: string; + try { + const stat = lstatSync(resolvedPath); + if (!stat.isFile()) { + log.warn( + { + configuredPath: overridePath, + resolvedPath, + reason: "not_regular_file", + fallback: "bundled", + }, + "consolidation prompt override is not a regular file; using bundled prompt", + ); + return renderConsolidationPrompt(cutoff); + } + if (stat.size > MAX_PROMPT_BYTES) { + log.warn( + { + configuredPath: overridePath, + resolvedPath, + size: stat.size, + limit: MAX_PROMPT_BYTES, + reason: "oversized_override", + fallback: "bundled", + }, + "consolidation prompt override exceeds size limit; using bundled prompt", + ); + return renderConsolidationPrompt(cutoff); + } + contents = readFileSync(resolvedPath, "utf-8"); + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + log.warn( + { configuredPath: overridePath, resolvedPath, code, fallback: "bundled" }, + "consolidation prompt override unreadable; using bundled prompt", + ); + return renderConsolidationPrompt(cutoff); + } + + if (contents.trim().length === 0) { + log.warn( + { + configuredPath: overridePath, + resolvedPath, + reason: "empty_override", + fallback: "bundled", + }, + "consolidation prompt override is empty; using bundled prompt", + ); + return renderConsolidationPrompt(cutoff); + } + + return contents.replaceAll(CUTOFF_PLACEHOLDER, cutoff); +} + +function resolveOverridePath(overridePath: string): string { + if (overridePath.startsWith("~/")) { + return join(homedir(), overridePath.slice(2)); + } + if (isAbsolute(overridePath)) return overridePath; + return join(getWorkspaceDir(), overridePath); +}