From 682588f3e26289716d5fe7d6075a3a2b9457ad68 Mon Sep 17 00:00:00 2001 From: Staxed Date: Thu, 16 Apr 2026 20:32:49 -0400 Subject: [PATCH 1/3] feat(knowledge): add global knowledge base tier and correction workflow - Add scope classification (project/global/both) to knowledge extraction - Route global entries to ~/.archon/knowledge/logs/ with source attribution - Add codebase-agnostic global synthesis prompt with contradiction detection - Create archon-knowledge-correct workflow with approval gate - Add scope field to knowledge_extract DAG node schema - 20 new tests covering scope routing, parsing, and global synthesis Implements PRD Phase 3.5 (Global Knowledge Base Tier) --- .../ralph/llm-knowledge-base-system/prd.md | 7 + .../defaults/archon-knowledge-correct.yaml | 132 +++++++++++ .../src/services/knowledge-capture.test.ts | 7 + .../core/src/services/knowledge-capture.ts | 140 +++++++++-- .../src/services/knowledge-extract.test.ts | 221 +++++++++++++++++- .../core/src/services/knowledge-flush.test.ts | 49 ++++ packages/core/src/services/knowledge-flush.ts | 61 ++++- packages/core/src/workflows/store-adapter.ts | 3 +- packages/workflows/package.json | 2 +- packages/workflows/src/dag-executor.ts | 11 +- .../src/defaults/bundled-defaults.test.ts | 3 +- .../src/defaults/bundled-defaults.ts | 4 +- packages/workflows/src/deps.ts | 3 +- .../workflows/src/schemas/dag-node.test.ts | 51 ++++ packages/workflows/src/schemas/dag-node.ts | 1 + 15 files changed, 660 insertions(+), 35 deletions(-) create mode 100644 .archon/workflows/defaults/archon-knowledge-correct.yaml create mode 100644 packages/workflows/src/schemas/dag-node.test.ts diff --git a/.archon/ralph/llm-knowledge-base-system/prd.md b/.archon/ralph/llm-knowledge-base-system/prd.md index 4dac4f66ed..0239c15edf 100644 --- a/.archon/ralph/llm-knowledge-base-system/prd.md +++ b/.archon/ralph/llm-knowledge-base-system/prd.md @@ -201,6 +201,10 @@ knowledge/ | US-018 | Update default workflows with KB awareness | 11 | US-007, US-017 | | US-019 | Update workflow builder for KB awareness | 12 | US-018 | | US-020 | Add explicit knowledge-extract node type | 12 | US-017 | +| US-021 | Add scope field to knowledge-extract nodes (project/global/both) | 13 | US-020, US-013 | +| US-022 | Scoped extraction routing (project log, global log, or both) | 13 | US-021 | +| US-023 | Global synthesis prompt (codebase-agnostic, Sources, contradictions) | 13 | US-013 | +| US-024 | Knowledge correction workflow (archon-knowledge-correct) | 13 | US-013 | ### Dependency Graph ``` @@ -248,6 +252,9 @@ Every story must pass: | File-based flush lock | `knowledge/meta/flush.lock` | Simplest option for single-developer, single-machine use case. | | Capture triggers | `conversation-closed` + `reset-requested` | NOT `isolation-changed`. Reset is ending a line of work. | | KB not in git | `~/.archon/` directory | User-specific, not project-specific. Avoids polluting repo. | +| Scope classification in extraction | AI classifies as PROJECT/GLOBAL with project fallback | Conservative default prevents low-quality global entries. | +| Global synthesis prompt | Codebase-agnostic with Sources + Contradictions sections | Global articles must generalize; contradiction detection surfaces conflicting claims across projects. | +| Knowledge correction workflow | AI-mediated with approval gate | User review before destructive operations (delete/merge) — matches interactive-prd pattern. | | Flush atomicity | Write to temp files, atomic rename | Crash-safe, idempotent — next flush re-runs from scratch. | | Obsidian compatibility | Standard markdown with `[[wikilinks]]` | KB browsable in Obsidian with graph view. No special tooling needed. | diff --git a/.archon/workflows/defaults/archon-knowledge-correct.yaml b/.archon/workflows/defaults/archon-knowledge-correct.yaml new file mode 100644 index 0000000000..17e9539d17 --- /dev/null +++ b/.archon/workflows/defaults/archon-knowledge-correct.yaml @@ -0,0 +1,132 @@ +name: archon-knowledge-correct +description: | + Use when: User wants to correct, update, or delete a knowledge base article. + Triggers: "correct knowledge", "fix kb article", "update knowledge", "knowledge correct", + "delete knowledge article", "kb correction", "fix article". + NOT for: Browsing or reading knowledge (use the $KNOWLEDGE variable in prompts). + + AI-mediated knowledge base correction workflow with approval gate: + 1. Analyze the correction request and find affected articles + 2. Propose edits (update, merge, or delete) with diff preview + 3. Wait for user approval + 4. Apply approved changes to the knowledge base + +provider: claude +interactive: true + +nodes: + # ═══════════════════════════════════════════════════════════════ + # PHASE 1: ANALYZE — Find and assess affected articles + # ══════���═════════════════════════════════���══════════════════════ + + - id: analyze + prompt: | + You are a knowledge base editor. The user wants to make a correction to the knowledge base. + + **Correction request**: $ARGUMENTS + + $KNOWLEDGE + + Your task: + 1. Search the knowledge base directories for articles that match the correction request. + - Project KB: Look under the project's `knowledge/domains/` directory + - Global KB: Look under `~/.archon/knowledge/domains/` + 2. Read the affected article(s) fully. + 3. Determine the type of correction needed: + - **UPDATE**: Modify content within an existing article + - **MERGE**: Combine two overlapping articles into one + - **DELETE**: Remove an article that is incorrect or obsolete + - **RECLASSIFY**: Move an article to a different domain + + Present your findings: + + **Affected Articles:** + - `{domain}/{concept}.md` — {brief description of current content} + + **Proposed Changes:** + For each article, show a clear before/after or describe the change: + - What will change and why + - If deleting: why the article should be removed + - If merging: which articles combine and what the merged result looks like + + **Impact Assessment:** + - Are there [[wikilinks]] from other articles pointing to affected articles? + - Will any cross-references break? + + Keep the proposal concise and actionable. + + # ═══════════════════════════════════════════════════════════���═══ + # GATE: User approves the proposed changes + # ���══════════��═══════════════════════════════════════════════════ + + - id: approval-gate + approval: + message: "Review the proposed knowledge base changes above. Approve to apply them, or provide feedback to adjust the proposal." + depends_on: [analyze] + + # ══��═════��══════════════════════════════════════════════════════ + # PHASE 2: APPLY — Execute approved changes + # ��══════���═══════════════════════════════════════════════════════ + + - id: apply + prompt: | + You are a knowledge base editor applying approved corrections. + + **Original request**: $ARGUMENTS + **Approved changes**: $approval-gate.output + + Apply the approved changes now: + + 1. **Read** each affected article file before modifying it + 2. **Edit** articles as approved — use precise edits, not full rewrites + 3. **Update wikilinks** in other articles if any cross-references changed + 4. **Update domain _index.md** files if articles were added, removed, or moved + + For deletions: + - Delete the article file + - Remove its entry from the domain's _index.md + - Fix broken [[wikilinks]] in other articles that referenced it + + For merges: + - Write the merged article to the target location + - Delete the source article + - Update all [[wikilinks]] that pointed to the source + + After applying all changes, output a summary: + + **Changes Applied:** + - {action}: `{domain}/{concept}.md` — {what changed} + + **Wikilinks Updated:** {count} cross-references fixed (or "None needed") + depends_on: [approval-gate] + + # ════════════════════════════════════════��══════════════════════ + # PHASE 3: VERIFY — Confirm changes are consistent + # ════════��════════���═══════════════════════════════════���═════════ + + - id: verify + bash: | + echo "=== Knowledge Base Verification ===" + # Check for broken wikilinks in all domain articles + broken=0 + for f in $(find knowledge/domains/ ~/.archon/knowledge/domains/ -name "*.md" 2>/dev/null); do + # Extract wikilinks and check if targets exist + links=$(grep -oP '\[\[([^\]|]+)' "$f" 2>/dev/null | sed 's/\[\[//' || true) + for link in $links; do + # Skip _index links + echo "$link" | grep -q "_index" && continue + # Normalize: strip domains/ prefix + normalized=$(echo "$link" | sed 's|^domains/||') + # Check if article exists in any KB + if ! find knowledge/domains/ ~/.archon/knowledge/domains/ -path "*/${normalized}.md" -print -quit 2>/dev/null | grep -q .; then + echo "BROKEN: $f -> [[${link}]]" + broken=$((broken + 1)) + fi + done + done + if [ "$broken" -eq 0 ]; then + echo "All wikilinks valid." + else + echo "WARNING: ${broken} broken wikilink(s) found." + fi + depends_on: [apply] diff --git a/packages/core/src/services/knowledge-capture.test.ts b/packages/core/src/services/knowledge-capture.test.ts index 40ef5d33b9..a57eb24ee7 100644 --- a/packages/core/src/services/knowledge-capture.test.ts +++ b/packages/core/src/services/knowledge-capture.test.ts @@ -74,6 +74,13 @@ mock.module('../config/config-loader', () => ({ const mockInitKnowledgeDir = mock(async () => undefined); mock.module('./knowledge-init', () => ({ initKnowledgeDir: mockInitKnowledgeDir, + initGlobalKnowledgeDir: mock(async () => undefined), +})); + +// Mock knowledge-scheduler (imported by capture module for global flush) +mock.module('./knowledge-scheduler', () => ({ + scheduleFlush: mock(async () => undefined), + scheduleGlobalFlush: mock(async () => undefined), })); // Mock AI client diff --git a/packages/core/src/services/knowledge-capture.ts b/packages/core/src/services/knowledge-capture.ts index 4ec3ae4fea..4a8c50f5d8 100644 --- a/packages/core/src/services/knowledge-capture.ts +++ b/packages/core/src/services/knowledge-capture.ts @@ -4,14 +4,14 @@ */ import { appendFile, mkdir } from 'node:fs/promises'; import { join } from 'node:path'; -import { getProjectKnowledgePath, parseOwnerRepo } from '@archon/paths'; +import { getProjectKnowledgePath, getGlobalKnowledgePath, parseOwnerRepo } from '@archon/paths'; import { createLogger } from '@archon/paths'; import { getAssistantClient } from '../clients/factory'; import * as messageDb from '../db/messages'; import * as codebaseDb from '../db/codebases'; import { loadConfig } from '../config/config-loader'; -import { initKnowledgeDir } from './knowledge-init'; -import { scheduleFlush } from './knowledge-scheduler'; +import { initKnowledgeDir, initGlobalKnowledgeDir } from './knowledge-init'; +import { scheduleFlush, scheduleGlobalFlush } from './knowledge-scheduler'; import type { MergedConfig } from '../config/config-types'; import type { MessageRow } from '../db/messages'; @@ -220,6 +220,58 @@ async function appendToDailyLog( return logFile; } +/** Scope addendum appended to extraction prompts when scope is 'both' */ +const SCOPE_CLASSIFICATION_ADDENDUM = ` + +## Scope Classification + +Classify each extracted item as PROJECT-scoped or GLOBAL-scoped: + +- **PROJECT**: Knowledge specific to this repository — file paths, internal APIs, project-specific conventions, repo-specific decisions. +- **GLOBAL**: Knowledge applicable across any codebase — general engineering patterns, language idioms, tool usage tips, debugging techniques, universal best practices. + +When in doubt, classify as PROJECT (conservative default). + +Format your response with two clearly separated sections: + +## PROJECT + +{project-scoped knowledge items here} + +## GLOBAL + +{global-scoped knowledge items here} + +If all items belong to one scope, include only that section. +`; + +/** + * Parse scoped extraction output into project and global sections. + * Handles: both blocks present, only one block, and malformed (fallback to project). + */ +export function parseScopedOutput( + content: string, + scope: 'project' | 'global' | 'both' +): { project: string; global: string } { + // Single-scope modes: all content goes to the requested scope + if (scope === 'project') return { project: content, global: '' }; + if (scope === 'global') return { project: '', global: content }; + + // 'both' scope: parse ## PROJECT and ## GLOBAL blocks + const projectMatch = /## PROJECT\s*\n([\s\S]*?)(?=## GLOBAL|$)/i.exec(content); + const globalMatch = /## GLOBAL\s*\n([\s\S]*?)$/i.exec(content); + + const projectContent = projectMatch?.[1]?.trim() ?? ''; + const globalContent = globalMatch?.[1]?.trim() ?? ''; + + // If neither block was found, fall back to project (conservative default) + if (!projectContent && !globalContent) { + return { project: content.trim(), global: '' }; + } + + return { project: projectContent, global: globalContent }; +} + /** * Extract knowledge using a custom prompt and context. * Used by knowledge-extract workflow nodes for targeted extraction. @@ -228,13 +280,15 @@ async function appendToDailyLog( * @param context - Upstream context (e.g. workflow node outputs) * @param cwd - Working directory (used to resolve owner/repo via git remote) * @param metadata - Workflow run and node identifiers for log entries + * @param scope - Where to route extracted knowledge: 'project', 'global', or 'both' (default) * @returns Extracted knowledge content */ export async function extractKnowledgeFromContext( prompt: string, context: string, cwd: string, - metadata: { workflowRunId: string; nodeId: string } + metadata: { workflowRunId: string; nodeId: string }, + scope: 'project' | 'global' | 'both' = 'both' ): Promise { const log = getLog(); @@ -258,10 +312,11 @@ export async function extractKnowledgeFromContext( return ''; } - log.info({ owner, repo, nodeId: metadata.nodeId }, 'knowledge.extract_started'); + log.info({ owner, repo, nodeId: metadata.nodeId, scope }, 'knowledge.extract_started'); - // Call AI with the custom prompt + context - const fullPrompt = `${prompt}\n\n---\n\nCONTEXT:\n${context}`; + // Build prompt — append scope classification instructions for 'both' scope + const scopeAddendum = scope === 'both' ? SCOPE_CLASSIFICATION_ADDENDUM : ''; + const fullPrompt = `${prompt}${scopeAddendum}\n\n---\n\nCONTEXT:\n${context}`; const client = getAssistantClient(mergedConfig.knowledge.captureProvider ?? 'claude'); const chunks: string[] = []; const generator = client.sendQuery(fullPrompt, cwd, undefined, { @@ -281,28 +336,71 @@ export async function extractKnowledgeFromContext( return ''; } - // Append to daily log with workflow metadata - await initKnowledgeDir(owner, repo); - const knowledgePath = getProjectKnowledgePath(owner, repo); - const logsDir = join(knowledgePath, 'logs'); + // Parse scoped output + const scoped = parseScopedOutput(extracted, scope); + + // Write project-scoped entries to project daily log + if (scoped.project) { + await initKnowledgeDir(owner, repo); + const knowledgePath = getProjectKnowledgePath(owner, repo); + const logsDir = join(knowledgePath, 'logs'); + await mkdir(logsDir, { recursive: true }); + + const today = new Date().toISOString().slice(0, 10); + const logFile = join(logsDir, `${today}.md`); + const timestamp = new Date().toISOString(); + const entry = `\n---\n\n### Knowledge Extract: ${timestamp}\n**Workflow Run**: ${metadata.workflowRunId}\n**Node**: ${metadata.nodeId}\n\n${scoped.project}\n`; + + await appendFile(logFile, entry); + + log.info( + { owner, repo, nodeId: metadata.nodeId, logFile, contentLength: scoped.project.length }, + 'knowledge.extract_project_completed' + ); + + // Schedule debounced flush after project extraction + await scheduleFlush(owner, repo); + } + + // Write global-scoped entries to global daily log + if (scoped.global) { + await initGlobalKnowledgeDir(); + await appendToGlobalDailyLog(owner, repo, metadata, scoped.global); + + log.info( + { nodeId: metadata.nodeId, contentLength: scoped.global.length }, + 'knowledge.extract_global_completed' + ); + + // Schedule debounced global flush + await scheduleGlobalFlush(); + } + + return extracted; +} + +/** + * Append extracted global knowledge to the global daily log. + * Includes source attribution (owner/repo) for traceability. + */ +async function appendToGlobalDailyLog( + owner: string, + repo: string, + metadata: { workflowRunId: string; nodeId: string }, + content: string +): Promise { + const globalKnowledgePath = getGlobalKnowledgePath(); + const logsDir = join(globalKnowledgePath, 'logs'); await mkdir(logsDir, { recursive: true }); const today = new Date().toISOString().slice(0, 10); const logFile = join(logsDir, `${today}.md`); const timestamp = new Date().toISOString(); - const entry = `\n---\n\n### Knowledge Extract: ${timestamp}\n**Workflow Run**: ${metadata.workflowRunId}\n**Node**: ${metadata.nodeId}\n\n${extracted}\n`; + const entry = `\n---\n\n### Knowledge Extract: ${timestamp}\n**Source**: ${owner}/${repo}\n**Workflow Run**: ${metadata.workflowRunId}\n**Node**: ${metadata.nodeId}\n\n${content}\n`; await appendFile(logFile, entry); - log.info( - { owner, repo, nodeId: metadata.nodeId, logFile, contentLength: extracted.length }, - 'knowledge.extract_completed' - ); - - // Schedule debounced flush after extraction - await scheduleFlush(owner, repo); - - return extracted; + return logFile; } /** diff --git a/packages/core/src/services/knowledge-extract.test.ts b/packages/core/src/services/knowledge-extract.test.ts index a97c968220..f15ee5d35f 100644 --- a/packages/core/src/services/knowledge-extract.test.ts +++ b/packages/core/src/services/knowledge-extract.test.ts @@ -80,14 +80,18 @@ mock.module('../config/config-loader', () => ({ // Mock knowledge-init const mockInitKnowledgeDir = mock(async () => undefined); +const mockInitGlobalKnowledgeDir = mock(async () => undefined); mock.module('./knowledge-init', () => ({ initKnowledgeDir: mockInitKnowledgeDir, + initGlobalKnowledgeDir: mockInitGlobalKnowledgeDir, })); // Mock knowledge-scheduler const mockScheduleFlush = mock(async () => undefined); +const mockScheduleGlobalFlush = mock(async () => undefined); mock.module('./knowledge-scheduler', () => ({ scheduleFlush: mockScheduleFlush, + scheduleGlobalFlush: mockScheduleGlobalFlush, })); // Mock AI client @@ -104,7 +108,7 @@ mock.module('../clients/factory', () => ({ })), })); -import { extractKnowledgeFromContext } from './knowledge-capture'; +import { extractKnowledgeFromContext, parseScopedOutput } from './knowledge-capture'; describe('extractKnowledgeFromContext', () => { beforeEach(() => { @@ -114,7 +118,9 @@ describe('extractKnowledgeFromContext', () => { mockMkdir.mockClear(); mockLoadConfig.mockClear(); mockInitKnowledgeDir.mockClear(); + mockInitGlobalKnowledgeDir.mockClear(); mockScheduleFlush.mockClear(); + mockScheduleGlobalFlush.mockClear(); mockSendQuery.mockClear(); Object.values(mockLogger).forEach(fn => fn.mockClear()); mockSendQueryChunks = []; @@ -206,4 +212,217 @@ describe('extractKnowledgeFromContext', () => { expect(callArgs[0]).toContain('Focus on security patterns'); expect(callArgs[0]).toContain('Auth uses bcrypt for hashing'); }); + + test('scope=project writes only to project log, not global', async () => { + mockSendQueryChunks = [ + { type: 'assistant', content: '## Decisions\n- Project-specific decision' }, + { type: 'result' }, + ]; + + await extractKnowledgeFromContext( + 'Extract decisions', + 'Context', + '/tmp/repo', + { workflowRunId: 'run-123', nodeId: 'extract' }, + 'project' + ); + + expect(mockInitKnowledgeDir).toHaveBeenCalledWith('acme', 'widget'); + expect(mockInitGlobalKnowledgeDir).not.toHaveBeenCalled(); + expect(mockScheduleFlush).toHaveBeenCalledWith('acme', 'widget'); + expect(mockScheduleGlobalFlush).not.toHaveBeenCalled(); + + // Only one appendFile call (project log) + expect(appendFileCalls.length).toBe(1); + expect(appendFileCalls[0].path).toContain('/workspaces/acme/widget/knowledge/logs/'); + }); + + test('scope=global writes only to global log, not project', async () => { + mockSendQueryChunks = [ + { type: 'assistant', content: '## Patterns\n- Universal pattern' }, + { type: 'result' }, + ]; + + await extractKnowledgeFromContext( + 'Extract patterns', + 'Context', + '/tmp/repo', + { workflowRunId: 'run-123', nodeId: 'extract' }, + 'global' + ); + + expect(mockInitKnowledgeDir).not.toHaveBeenCalled(); + expect(mockInitGlobalKnowledgeDir).toHaveBeenCalled(); + expect(mockScheduleFlush).not.toHaveBeenCalled(); + expect(mockScheduleGlobalFlush).toHaveBeenCalled(); + + // Only one appendFile call (global log) + expect(appendFileCalls.length).toBe(1); + expect(appendFileCalls[0].path).toContain('/home/test/.archon/knowledge/logs/'); + }); + + test('scope=both with both sections writes to both logs', async () => { + mockSendQueryChunks = [ + { + type: 'assistant', + content: + '## PROJECT\n\n- Repo-specific pattern\n\n## GLOBAL\n\n- Universal debugging technique', + }, + { type: 'result' }, + ]; + + await extractKnowledgeFromContext( + 'Extract knowledge', + 'Context', + '/tmp/repo', + { workflowRunId: 'run-123', nodeId: 'extract' }, + 'both' + ); + + expect(mockInitKnowledgeDir).toHaveBeenCalledWith('acme', 'widget'); + expect(mockInitGlobalKnowledgeDir).toHaveBeenCalled(); + expect(mockScheduleFlush).toHaveBeenCalledWith('acme', 'widget'); + expect(mockScheduleGlobalFlush).toHaveBeenCalled(); + + // Two appendFile calls (project + global) + expect(appendFileCalls.length).toBe(2); + const projectCall = appendFileCalls.find(c => + c.path.includes('/workspaces/acme/widget/knowledge/logs/') + ); + const globalCall = appendFileCalls.find(c => + c.path.includes('/home/test/.archon/knowledge/logs/') + ); + expect(projectCall).toBeDefined(); + expect(globalCall).toBeDefined(); + expect(projectCall!.content).toContain('Repo-specific pattern'); + expect(globalCall!.content).toContain('Universal debugging technique'); + }); + + test('scope=both with malformed output falls back to project', async () => { + mockSendQueryChunks = [ + { type: 'assistant', content: '## Decisions\n- Some decision without scope blocks' }, + { type: 'result' }, + ]; + + await extractKnowledgeFromContext( + 'Extract decisions', + 'Context', + '/tmp/repo', + { workflowRunId: 'run-123', nodeId: 'extract' }, + 'both' + ); + + // Fallback: all content goes to project + expect(mockInitKnowledgeDir).toHaveBeenCalledWith('acme', 'widget'); + expect(mockInitGlobalKnowledgeDir).not.toHaveBeenCalled(); + expect(appendFileCalls.length).toBe(1); + expect(appendFileCalls[0].path).toContain('/workspaces/acme/widget/knowledge/logs/'); + }); + + test('global log entries include source attribution', async () => { + mockSendQueryChunks = [ + { + type: 'assistant', + content: '## GLOBAL\n\n- Universal pattern', + }, + { type: 'result' }, + ]; + + await extractKnowledgeFromContext( + 'Extract patterns', + 'Context', + '/tmp/repo', + { workflowRunId: 'run-123', nodeId: 'extract' }, + 'both' + ); + + const globalCall = appendFileCalls.find(c => + c.path.includes('/home/test/.archon/knowledge/logs/') + ); + expect(globalCall).toBeDefined(); + expect(globalCall!.content).toContain('**Source**: acme/widget'); + }); + + test('scope=both appends scope classification addendum to prompt', async () => { + mockSendQueryChunks = [{ type: 'assistant', content: 'Content' }, { type: 'result' }]; + + await extractKnowledgeFromContext( + 'Extract knowledge', + 'Context', + '/tmp/repo', + { workflowRunId: 'run-123', nodeId: 'extract' }, + 'both' + ); + + const callArgs = mockSendQuery.mock.calls[0]; + const prompt = callArgs[0] as string; + expect(prompt).toContain('Scope Classification'); + expect(prompt).toContain('PROJECT'); + expect(prompt).toContain('GLOBAL'); + }); + + test('scope=project does not append scope classification addendum', async () => { + mockSendQueryChunks = [{ type: 'assistant', content: 'Content' }, { type: 'result' }]; + + await extractKnowledgeFromContext( + 'Extract knowledge', + 'Context', + '/tmp/repo', + { workflowRunId: 'run-123', nodeId: 'extract' }, + 'project' + ); + + const callArgs = mockSendQuery.mock.calls[0]; + const prompt = callArgs[0] as string; + expect(prompt).not.toContain('Scope Classification'); + }); +}); + +describe('parseScopedOutput', () => { + test('scope=project returns all content as project', () => { + const result = parseScopedOutput('Some content', 'project'); + expect(result.project).toBe('Some content'); + expect(result.global).toBe(''); + }); + + test('scope=global returns all content as global', () => { + const result = parseScopedOutput('Some content', 'global'); + expect(result.project).toBe(''); + expect(result.global).toBe('Some content'); + }); + + test('scope=both parses both blocks', () => { + const content = '## PROJECT\n\nProject stuff\n\n## GLOBAL\n\nGlobal stuff'; + const result = parseScopedOutput(content, 'both'); + expect(result.project).toBe('Project stuff'); + expect(result.global).toBe('Global stuff'); + }); + + test('scope=both with only project block', () => { + const content = '## PROJECT\n\nOnly project content'; + const result = parseScopedOutput(content, 'both'); + expect(result.project).toBe('Only project content'); + expect(result.global).toBe(''); + }); + + test('scope=both with only global block', () => { + const content = '## GLOBAL\n\nOnly global content'; + const result = parseScopedOutput(content, 'both'); + expect(result.project).toBe(''); + expect(result.global).toBe('Only global content'); + }); + + test('scope=both with malformed output falls back to project', () => { + const content = '## Decisions\n- Some decision without scope markers'; + const result = parseScopedOutput(content, 'both'); + expect(result.project).toBe('## Decisions\n- Some decision without scope markers'); + expect(result.global).toBe(''); + }); + + test('scope=both with empty content falls back to project', () => { + const result = parseScopedOutput(' ', 'both'); + // Trimmed empty content still falls back to project + expect(result.project).toBe(''); + expect(result.global).toBe(''); + }); }); diff --git a/packages/core/src/services/knowledge-flush.test.ts b/packages/core/src/services/knowledge-flush.test.ts index be6de38dd5..30617b4d38 100644 --- a/packages/core/src/services/knowledge-flush.test.ts +++ b/packages/core/src/services/knowledge-flush.test.ts @@ -1148,6 +1148,55 @@ describe('knowledge-flush', () => { expect(mockSendQuery).not.toHaveBeenCalled(); }); + test('flushGlobalKnowledge uses codebase-agnostic synthesis prompt with contradiction detection', async () => { + directories[`${GLOBAL_KB_PATH}/logs`] = ['2026-04-11.md']; + directories[`${GLOBAL_KB_PATH}/domains`] = []; + + fileSystem[`${GLOBAL_KB_PATH}/logs/2026-04-11.md`] = '## Global pattern\n- Some lesson\n'; + + mockSendQueryChunks = [ + { + type: 'assistant', + content: JSON.stringify({ articles: [], domainSummaries: {}, indexSummary: '' }), + }, + ]; + + await flushGlobalKnowledge(); + + const prompt = mockSendQuery.mock.calls[0]![0] as string; + // Global prompt should contain codebase-agnostic rules + expect(prompt).toContain('GLOBAL knowledge base'); + expect(prompt).toContain('codebase-agnostic'); + // Should contain Sources footnotes requirement + expect(prompt).toContain('## Sources'); + // Should contain contradiction detection + expect(prompt).toContain('## Contradictions'); + expect(prompt).toContain('contradictory'); + }); + + test('flushKnowledge (project) does NOT use global synthesis prompt', async () => { + directories[`${KB_PATH}/logs`] = ['2026-04-11.md']; + directories[`${KB_PATH}/domains`] = []; + + fileSystem[`${KB_PATH}/logs/2026-04-11.md`] = '## Content\n'; + + mockSendQueryChunks = [ + { + type: 'assistant', + content: JSON.stringify({ articles: [], domainSummaries: {}, indexSummary: '' }), + }, + ]; + + await flushKnowledge('acme', 'widget'); + + const prompt = mockSendQuery.mock.calls[0]![0] as string; + // Project prompt should NOT contain global-specific instructions + expect(prompt).not.toContain('GLOBAL knowledge base'); + expect(prompt).not.toContain('contradictory'); + // But should still contain standard synthesis rules + expect(prompt).toContain('knowledge base compiler'); + }); + // --- AI JSON parse failure tests --- test('throws on malformed JSON from AI synthesis', async () => { diff --git a/packages/core/src/services/knowledge-flush.ts b/packages/core/src/services/knowledge-flush.ts index fb2411f958..e528814685 100644 --- a/packages/core/src/services/knowledge-flush.ts +++ b/packages/core/src/services/knowledge-flush.ts @@ -158,6 +158,47 @@ You MUST respond with a JSON object (no markdown fences, no explanation, just JS `; +/** Global-tier synthesis prompt — codebase-agnostic with source attribution and contradiction detection */ +const GLOBAL_SYNTHESIS_PROMPT = `You are a knowledge base compiler for a GLOBAL knowledge base that spans multiple projects. +Articles must be codebase-agnostic — they should describe general engineering knowledge, patterns, and lessons that apply universally. Do NOT reference specific file paths, repository structures, or project-internal APIs. + +## Output Format + +You MUST respond with a JSON object (no markdown fences, no explanation, just JSON) with this exact structure: + +{ + "articles": [ + { + "domain": "architecture|decisions|patterns|lessons|connections|", + "concept": "kebab-case-concept-name", + "content": "Full markdown article content with [[wikilink]] backlinks to related concepts" + } + ], + "domainSummaries": { + "architecture": "One-line summary of architecture knowledge", + "decisions": "One-line summary of decisions" + }, + "indexSummary": "Brief overview of all domains for the top-level index" +} + +## Rules +- Each article should be a focused concept (e.g., "retry-backoff-patterns", "structured-logging-conventions") +- Articles MUST be codebase-agnostic — generalize project-specific knowledge into universal principles +- Use [[wikilinks]] to cross-reference related articles (e.g., [[patterns/retry-backoff-patterns]]) +- Domain names are lowercase kebab-case +- Concept filenames are lowercase kebab-case +- Merge new knowledge with existing articles when the concept overlaps (prefer updating over creating duplicates) +- You may create new domains beyond the starting set if the knowledge doesn't fit existing domains +- Every article should start with a level-1 heading matching the concept name in title case +- Include a "Related" section at the end of each article with [[wikilinks]] +- **Sources**: End each article with a \`## Sources\` section listing the source projects that contributed the knowledge (e.g., "- Observed in owner/repo during workflow run xyz") +- **Contradiction detection**: If new log entries contradict existing article content, add a \`## Contradictions\` section with a \`⚠ contradictory\` marker explaining both claims and which sources support each side. Do NOT silently overwrite — surface the conflict. +- If no meaningful articles can be produced, return {"articles":[],"domainSummaries":{},"indexSummary":""} + +--- + +`; + /** Options for the shared flush core logic */ interface FlushCoreOptions { /** Label for logging (e.g., "acme/widget" or "global") */ @@ -170,6 +211,8 @@ interface FlushCoreOptions { git?: { owner: string; repo: string }; /** Init function to call before flushing (ensures KB directory exists) */ init: () => Promise; + /** Override synthesis prompt (defaults to project-tier SYNTHESIS_PROMPT) */ + synthesisPrompt?: string; } /** @@ -177,7 +220,14 @@ interface FlushCoreOptions { */ async function flushKnowledgeCore(options: FlushCoreOptions): Promise { const log = getLog(); - const { label, knowledgePath, config: mergedConfig, git: gitInfo, init } = options; + const { + label, + knowledgePath, + config: mergedConfig, + git: gitInfo, + init, + synthesisPrompt, + } = options; if (!mergedConfig.knowledge.enabled) { log.debug({ label }, 'knowledge.flush_skipped_disabled'); @@ -243,7 +293,8 @@ async function flushKnowledgeCore(options: FlushCoreOptions): Promise initGlobalKnowledgeDir(), + synthesisPrompt: GLOBAL_SYNTHESIS_PROMPT, }); } @@ -457,11 +509,12 @@ async function synthesizeLogs( logContents: string, existingArticles: string, compileModel: string, - compileProvider: string + compileProvider: string, + promptOverride?: string ): Promise { const client = getAssistantClient(compileProvider); - const contextParts = [SYNTHESIS_PROMPT]; + const contextParts = [promptOverride ?? SYNTHESIS_PROMPT]; if (existingArticles) { contextParts.push(existingArticles); } diff --git a/packages/core/src/workflows/store-adapter.ts b/packages/core/src/workflows/store-adapter.ts index 2e1b10e188..0bfc260790 100644 --- a/packages/core/src/workflows/store-adapter.ts +++ b/packages/core/src/workflows/store-adapter.ts @@ -154,7 +154,8 @@ export function createWorkflowDeps(): WorkflowDeps { store: createWorkflowStore(), getAssistantClient, loadConfig: loadMergedConfig, - extractKnowledge: extractKnowledgeFromContext, + extractKnowledge: (prompt, context, cwd, metadata, scope) => + extractKnowledgeFromContext(prompt, context, cwd, metadata, scope), loadKnowledgeContext, }; } diff --git a/packages/workflows/package.json b/packages/workflows/package.json index d4b538c384..9c2cfc3ce6 100644 --- a/packages/workflows/package.json +++ b/packages/workflows/package.json @@ -18,7 +18,7 @@ "./test-utils": "./src/test-utils.ts" }, "scripts": { - "test": "bun test src/dag-executor.test.ts && bun test src/loader.test.ts && bun test src/logger.test.ts && bun test src/condition-evaluator.test.ts && bun test src/event-emitter.test.ts && bun test src/executor-shared.test.ts && bun test src/executor.test.ts && bun test src/executor-preamble.test.ts && bun test src/defaults/ src/model-validation.test.ts src/router.test.ts src/utils/ src/hooks.test.ts && bun test src/validation-parser.test.ts src/schemas.test.ts src/command-validation.test.ts && bun test src/validator.test.ts && bun test src/knowledge-extract-node.test.ts", + "test": "bun test src/dag-executor.test.ts && bun test src/loader.test.ts && bun test src/logger.test.ts && bun test src/condition-evaluator.test.ts && bun test src/event-emitter.test.ts && bun test src/executor-shared.test.ts && bun test src/executor.test.ts && bun test src/executor-preamble.test.ts && bun test src/defaults/ src/model-validation.test.ts src/router.test.ts src/utils/ src/hooks.test.ts && bun test src/validation-parser.test.ts src/schemas.test.ts src/schemas/dag-node.test.ts src/command-validation.test.ts && bun test src/validator.test.ts && bun test src/knowledge-extract-node.test.ts", "type-check": "bun x tsc --noEmit" }, "dependencies": { diff --git a/packages/workflows/src/dag-executor.ts b/packages/workflows/src/dag-executor.ts index 4a6eb190b5..923ec0f08e 100644 --- a/packages/workflows/src/dag-executor.ts +++ b/packages/workflows/src/dag-executor.ts @@ -1579,10 +1579,13 @@ async function executeKnowledgeExtractNode( } const context = contextParts.join('\n\n'); - const extracted = await deps.extractKnowledge(finalPrompt, context, cwd, { - workflowRunId: workflowRun.id, - nodeId: node.id, - }); + const extracted = await deps.extractKnowledge( + finalPrompt, + context, + cwd, + { workflowRunId: workflowRun.id, nodeId: node.id }, + node.scope ?? 'both' + ); const duration = Date.now() - startTime; await logNodeComplete(logDir, workflowRun.id, node.id, 'knowledge-extract', { diff --git a/packages/workflows/src/defaults/bundled-defaults.test.ts b/packages/workflows/src/defaults/bundled-defaults.test.ts index 00124a4ee6..992ed8a9a9 100644 --- a/packages/workflows/src/defaults/bundled-defaults.test.ts +++ b/packages/workflows/src/defaults/bundled-defaults.test.ts @@ -118,13 +118,14 @@ describe('bundled-defaults', () => { 'archon-piv-loop', 'archon-adversarial-dev', 'archon-workflow-builder', + 'archon-knowledge-correct', ]; for (const wf of expectedWorkflows) { expect(BUNDLED_WORKFLOWS).toHaveProperty(wf); } - expect(Object.keys(BUNDLED_WORKFLOWS)).toHaveLength(13); + expect(Object.keys(BUNDLED_WORKFLOWS)).toHaveLength(14); }); it('should have non-empty content for all workflows', () => { diff --git a/packages/workflows/src/defaults/bundled-defaults.ts b/packages/workflows/src/defaults/bundled-defaults.ts index 6caf6e7ae0..603df43492 100644 --- a/packages/workflows/src/defaults/bundled-defaults.ts +++ b/packages/workflows/src/defaults/bundled-defaults.ts @@ -35,7 +35,7 @@ import archonValidatePrE2eMainCmd from '../../../../.archon/commands/defaults/ar import archonValidatePrReportCmd from '../../../../.archon/commands/defaults/archon-validate-pr-report.md' with { type: 'text' }; // ============================================================================= -// Default Workflows (13 total) +// Default Workflows (14 total) // ============================================================================= import archonAssistWf from '../../../../.archon/workflows/defaults/archon-assist.yaml' with { type: 'text' }; @@ -51,6 +51,7 @@ import archonInteractivePrdWf from '../../../../.archon/workflows/defaults/archo import archonPivLoopWf from '../../../../.archon/workflows/defaults/archon-piv-loop.yaml' with { type: 'text' }; import archonAdversarialDevWf from '../../../../.archon/workflows/defaults/archon-adversarial-dev.yaml' with { type: 'text' }; import archonWorkflowBuilderWf from '../../../../.archon/workflows/defaults/archon-workflow-builder.yaml' with { type: 'text' }; +import archonKnowledgeCorrectWf from '../../../../.archon/workflows/defaults/archon-knowledge-correct.yaml' with { type: 'text' }; // ============================================================================= // Exports @@ -100,6 +101,7 @@ export const BUNDLED_WORKFLOWS: Record = { 'archon-piv-loop': archonPivLoopWf, 'archon-adversarial-dev': archonAdversarialDevWf, 'archon-workflow-builder': archonWorkflowBuilderWf, + 'archon-knowledge-correct': archonKnowledgeCorrectWf, }; /** diff --git a/packages/workflows/src/deps.ts b/packages/workflows/src/deps.ts index 3813af69ba..3ac5520076 100644 --- a/packages/workflows/src/deps.ts +++ b/packages/workflows/src/deps.ts @@ -313,7 +313,8 @@ export type KnowledgeExtractFn = ( prompt: string, context: string, cwd: string, - metadata: { workflowRunId: string; nodeId: string } + metadata: { workflowRunId: string; nodeId: string }, + scope?: 'project' | 'global' | 'both' ) => Promise; /** diff --git a/packages/workflows/src/schemas/dag-node.test.ts b/packages/workflows/src/schemas/dag-node.test.ts new file mode 100644 index 0000000000..ff21ada25f --- /dev/null +++ b/packages/workflows/src/schemas/dag-node.test.ts @@ -0,0 +1,51 @@ +import { describe, test, expect } from 'bun:test'; +import { knowledgeExtractNodeSchema } from './dag-node'; + +describe('knowledgeExtractNodeSchema', () => { + const baseNode = { + id: 'extract-node', + knowledge_extract: 'Extract patterns from the conversation', + }; + + test('accepts node without scope (defaults to both)', () => { + const result = knowledgeExtractNodeSchema.safeParse(baseNode); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.scope).toBe('both'); + } + }); + + test('accepts scope=project', () => { + const result = knowledgeExtractNodeSchema.safeParse({ ...baseNode, scope: 'project' }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.scope).toBe('project'); + } + }); + + test('accepts scope=global', () => { + const result = knowledgeExtractNodeSchema.safeParse({ ...baseNode, scope: 'global' }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.scope).toBe('global'); + } + }); + + test('accepts scope=both', () => { + const result = knowledgeExtractNodeSchema.safeParse({ ...baseNode, scope: 'both' }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.scope).toBe('both'); + } + }); + + test('rejects invalid scope value', () => { + const result = knowledgeExtractNodeSchema.safeParse({ ...baseNode, scope: 'invalid' }); + expect(result.success).toBe(false); + }); + + test('rejects empty knowledge_extract prompt', () => { + const result = knowledgeExtractNodeSchema.safeParse({ id: 'node', knowledge_extract: '' }); + expect(result.success).toBe(false); + }); +}); diff --git a/packages/workflows/src/schemas/dag-node.ts b/packages/workflows/src/schemas/dag-node.ts index 769be54d63..8d0772c2a1 100644 --- a/packages/workflows/src/schemas/dag-node.ts +++ b/packages/workflows/src/schemas/dag-node.ts @@ -267,6 +267,7 @@ export type CancelNode = z.infer & { */ export const knowledgeExtractNodeSchema = dagNodeBaseSchema.extend({ knowledge_extract: z.string().min(1, "'knowledge_extract' prompt must not be empty"), + scope: z.enum(['project', 'global', 'both']).default('both'), }); /** DAG node that runs targeted knowledge extraction and appends to the daily log */ From 61eba38cd0e1f09de1060d3e0a2bd298cfa32411 Mon Sep 17 00:00:00 2001 From: Staxed Date: Thu, 16 Apr 2026 20:46:58 -0400 Subject: [PATCH 2/3] fix: address review findings (MEDIUM/LOW) for knowledge base PR - Fix globalMatch regex to use lookahead (prevents wrong capture on reversed AI output) - Simplify store-adapter extractKnowledge to direct reference - Add scope fallback logging in parseScopedOutput - Add @param scope to KnowledgeExtractFn JSDoc - Fix "daily log" singular to "daily log(s)" in JSDoc - Document knowledge-extract scope field in CLAUDE.md - Add 2 tests for DAG executor scope forwarding (default + explicit) --- CLAUDE.md | 2 +- .../core/src/services/knowledge-capture.ts | 3 +- packages/core/src/workflows/store-adapter.ts | 3 +- packages/workflows/src/deps.ts | 3 +- .../src/knowledge-extract-node.test.ts | 89 +++++++++++++++++++ packages/workflows/src/schemas/dag-node.ts | 2 +- 6 files changed, 96 insertions(+), 6 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 253a536f61..c053c5e3fa 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -718,7 +718,7 @@ async function createSession(conversationId: string, codebaseId: string) { 2. **Workflows** (YAML-based): - Stored in `.archon/workflows/` (searched recursively) - Multi-step AI execution chains, discovered at runtime - - **`nodes:` (DAG format)**: Nodes with explicit `depends_on` edges; independent nodes in the same topological layer run concurrently. Node types: `command:` (named command file), `prompt:` (inline prompt), `bash:` (shell script, stdout captured as `$nodeId.output`, no AI), `loop:` (iterative AI prompt until completion signal), `knowledge-extract:` (targeted knowledge extraction from workflow context, appends to daily log) . Supports `when:` conditions, `trigger_rule` join semantics, `$nodeId.output` substitution, `output_format` for structured JSON output (Claude and Codex), `allowed_tools`/`denied_tools` for per-node tool restrictions (Claude only), `hooks` for per-node SDK hook callbacks (Claude only), `mcp` for per-node MCP server config files (Claude only, env vars expanded at execution time), and `skills` for per-node skill preloading via AgentDefinition wrapping (Claude only), and `effort`/`thinking`/`maxBudgetUsd`/`systemPrompt`/`fallbackModel`/`betas`/`sandbox` for Claude SDK advanced options (Claude only, also settable at workflow level) + - **`nodes:` (DAG format)**: Nodes with explicit `depends_on` edges; independent nodes in the same topological layer run concurrently. Node types: `command:` (named command file), `prompt:` (inline prompt), `bash:` (shell script, stdout captured as `$nodeId.output`, no AI), `loop:` (iterative AI prompt until completion signal), `knowledge-extract:` (targeted knowledge extraction from workflow context, appends to daily log; `scope` field routes to project, global, or both logs — default `'both'`) . Supports `when:` conditions, `trigger_rule` join semantics, `$nodeId.output` substitution, `output_format` for structured JSON output (Claude and Codex), `allowed_tools`/`denied_tools` for per-node tool restrictions (Claude only), `hooks` for per-node SDK hook callbacks (Claude only), `mcp` for per-node MCP server config files (Claude only, env vars expanded at execution time), and `skills` for per-node skill preloading via AgentDefinition wrapping (Claude only), and `effort`/`thinking`/`maxBudgetUsd`/`systemPrompt`/`fallbackModel`/`betas`/`sandbox` for Claude SDK advanced options (Claude only, also settable at workflow level) - Provider inherited from `.archon/config.yaml` unless explicitly set; per-node `provider` and `model` overrides supported - Model and options can be set per workflow or inherited from config defaults - `interactive: true` at the workflow level forces foreground execution on web (required for approval-gate workflows in the web UI) diff --git a/packages/core/src/services/knowledge-capture.ts b/packages/core/src/services/knowledge-capture.ts index 4a8c50f5d8..da24128545 100644 --- a/packages/core/src/services/knowledge-capture.ts +++ b/packages/core/src/services/knowledge-capture.ts @@ -259,13 +259,14 @@ export function parseScopedOutput( // 'both' scope: parse ## PROJECT and ## GLOBAL blocks const projectMatch = /## PROJECT\s*\n([\s\S]*?)(?=## GLOBAL|$)/i.exec(content); - const globalMatch = /## GLOBAL\s*\n([\s\S]*?)$/i.exec(content); + const globalMatch = /## GLOBAL\s*\n([\s\S]*?)(?=## PROJECT|$)/i.exec(content); const projectContent = projectMatch?.[1]?.trim() ?? ''; const globalContent = globalMatch?.[1]?.trim() ?? ''; // If neither block was found, fall back to project (conservative default) if (!projectContent && !globalContent) { + getLog().info('knowledge.parse_scope_fallback_project'); return { project: content.trim(), global: '' }; } diff --git a/packages/core/src/workflows/store-adapter.ts b/packages/core/src/workflows/store-adapter.ts index 0bfc260790..2e1b10e188 100644 --- a/packages/core/src/workflows/store-adapter.ts +++ b/packages/core/src/workflows/store-adapter.ts @@ -154,8 +154,7 @@ export function createWorkflowDeps(): WorkflowDeps { store: createWorkflowStore(), getAssistantClient, loadConfig: loadMergedConfig, - extractKnowledge: (prompt, context, cwd, metadata, scope) => - extractKnowledgeFromContext(prompt, context, cwd, metadata, scope), + extractKnowledge: extractKnowledgeFromContext, loadKnowledgeContext, }; } diff --git a/packages/workflows/src/deps.ts b/packages/workflows/src/deps.ts index 3ac5520076..89c791cb57 100644 --- a/packages/workflows/src/deps.ts +++ b/packages/workflows/src/deps.ts @@ -301,12 +301,13 @@ export interface WorkflowConfig { /** * Callback for knowledge-extract DAG nodes. * Calls AI (capture model) with a custom prompt + context, appends extracted knowledge - * to the daily log, and returns the extracted content as node output. + * to the appropriate daily log(s), and returns the extracted content as node output. * * @param prompt - Extraction prompt describing what knowledge to extract * @param context - Upstream node outputs and workflow context * @param cwd - Working directory (used to resolve owner/repo) * @param metadata - Workflow run and node identifiers for log entries + * @param scope - Where to route extracted knowledge: 'project', 'global', or 'both' (default) * @returns Extracted knowledge content */ export type KnowledgeExtractFn = ( diff --git a/packages/workflows/src/knowledge-extract-node.test.ts b/packages/workflows/src/knowledge-extract-node.test.ts index 2617dd1740..136df558de 100644 --- a/packages/workflows/src/knowledge-extract-node.test.ts +++ b/packages/workflows/src/knowledge-extract-node.test.ts @@ -314,6 +314,95 @@ describe('knowledge-extract node', () => { expect(store.completeWorkflowRun).toHaveBeenCalled(); }); + test('knowledge-extract node forwards default scope "both" to extractKnowledge', async () => { + const nodes: DagNode[] = [ + { + id: 'extract', + knowledge_extract: 'Extract patterns', + } as KnowledgeExtractNode, + ]; + + const deps: WorkflowDeps = { + store, + getAssistantClient: () => ({ + sendQuery: async function* () { + /* noop */ + }, + getType: () => 'claude', + }), + loadConfig: async () => config, + extractKnowledge: mockExtractKnowledge, + }; + + const workflowRun = createWorkflowRun(); + (store.getWorkflowRunStatus as Mock<() => Promise>).mockResolvedValue('running'); + + await executeDagWorkflow( + deps, + platform, + 'conv-123', + '/tmp/test', + { name: 'test', nodes }, + workflowRun, + 'claude', + 'sonnet', + '/tmp/artifacts', + '/tmp/logs', + 'main', + 'docs/', + config + ); + + expect(mockExtractKnowledge).toHaveBeenCalledTimes(1); + const call = mockExtractKnowledge.mock.calls[0]; + expect(call[4]).toBe('both'); // scope defaults to 'both' + }); + + test('knowledge-extract node forwards explicit scope "project" to extractKnowledge', async () => { + const nodes: DagNode[] = [ + { + id: 'extract', + knowledge_extract: 'Extract patterns', + scope: 'project', + } as KnowledgeExtractNode, + ]; + + const deps: WorkflowDeps = { + store, + getAssistantClient: () => ({ + sendQuery: async function* () { + /* noop */ + }, + getType: () => 'claude', + }), + loadConfig: async () => config, + extractKnowledge: mockExtractKnowledge, + }; + + const workflowRun = createWorkflowRun(); + (store.getWorkflowRunStatus as Mock<() => Promise>).mockResolvedValue('running'); + + await executeDagWorkflow( + deps, + platform, + 'conv-123', + '/tmp/test', + { name: 'test', nodes }, + workflowRun, + 'claude', + 'sonnet', + '/tmp/artifacts', + '/tmp/logs', + 'main', + 'docs/', + config + ); + + expect(mockExtractKnowledge).toHaveBeenCalledTimes(1); + const call = mockExtractKnowledge.mock.calls[0]; + expect(call[4]).toBe('project'); // explicit scope forwarded + }); + test('existing workflows without knowledge-extract nodes work unchanged', async () => { const nodes: DagNode[] = [{ id: 'build', bash: 'echo "built"' } as DagNode]; diff --git a/packages/workflows/src/schemas/dag-node.ts b/packages/workflows/src/schemas/dag-node.ts index 8d0772c2a1..d3cdd11c84 100644 --- a/packages/workflows/src/schemas/dag-node.ts +++ b/packages/workflows/src/schemas/dag-node.ts @@ -270,7 +270,7 @@ export const knowledgeExtractNodeSchema = dagNodeBaseSchema.extend({ scope: z.enum(['project', 'global', 'both']).default('both'), }); -/** DAG node that runs targeted knowledge extraction and appends to the daily log */ +/** DAG node that runs targeted knowledge extraction and appends to the appropriate daily log(s) */ export type KnowledgeExtractNode = z.infer & { command?: never; prompt?: never; From 500c636517b79fcce27f806606c09eef84805a35 Mon Sep 17 00:00:00 2001 From: Staxed Date: Thu, 16 Apr 2026 20:59:39 -0400 Subject: [PATCH 3/3] fix(knowledge): wire captureKnowledge to route global-scoped entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes plan Tasks 1-3 that the initial implementation skipped: 1. EXTRACTION_PROMPT now instructs the capture model to produce ## PROJECT and ## GLOBAL blocks with a conservative bar (project is the default; global only for codebase-independent knowledge). 2. captureKnowledge parses scoped output via parseScopedOutput and routes each block to its tier's daily log. 3. Global entries trigger scheduleGlobalFlush(); global log entries include **Source**: owner/repo for attribution. Also refactored the duplicated global log writer into a shared writeGlobalLogEntry() helper (replacing the workflow-specific appendToGlobalDailyLog). Why this matters: captureKnowledge is the primary automatic capture path (session close, /reset, workflow completion, CLI post-workflow). Without this wiring, ~/.archon/knowledge/ would stay empty in normal use — only workflows with explicit knowledge_extract nodes would populate it, and zero default workflows have them. Tests: added 4 scope-routing cases to knowledge-capture.test.ts covering both-block, project-only, global-only, and malformed (fallback-to-project) outputs. Full validate passes. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/services/knowledge-capture.test.ts | 117 +++++++++++++++++- .../core/src/services/knowledge-capture.ts | 117 +++++++++++++----- 2 files changed, 199 insertions(+), 35 deletions(-) diff --git a/packages/core/src/services/knowledge-capture.test.ts b/packages/core/src/services/knowledge-capture.test.ts index a57eb24ee7..34713adeda 100644 --- a/packages/core/src/services/knowledge-capture.test.ts +++ b/packages/core/src/services/knowledge-capture.test.ts @@ -72,15 +72,18 @@ mock.module('../config/config-loader', () => ({ // Mock knowledge-init const mockInitKnowledgeDir = mock(async () => undefined); +const mockInitGlobalKnowledgeDir = mock(async () => undefined); mock.module('./knowledge-init', () => ({ initKnowledgeDir: mockInitKnowledgeDir, - initGlobalKnowledgeDir: mock(async () => undefined), + initGlobalKnowledgeDir: mockInitGlobalKnowledgeDir, })); // Mock knowledge-scheduler (imported by capture module for global flush) +const mockScheduleFlush = mock(async () => undefined); +const mockScheduleGlobalFlush = mock(async () => undefined); mock.module('./knowledge-scheduler', () => ({ - scheduleFlush: mock(async () => undefined), - scheduleGlobalFlush: mock(async () => undefined), + scheduleFlush: mockScheduleFlush, + scheduleGlobalFlush: mockScheduleGlobalFlush, })); // Mock AI client @@ -109,6 +112,9 @@ describe('knowledge-capture', () => { mockListMessages.mockClear(); mockLoadConfig.mockClear(); mockInitKnowledgeDir.mockClear(); + mockInitGlobalKnowledgeDir.mockClear(); + mockScheduleFlush.mockClear(); + mockScheduleGlobalFlush.mockClear(); mockSendQuery.mockClear(); mockGetAssistantClient.mockClear(); Object.values(mockLogger).forEach(fn => fn.mockClear()); @@ -434,4 +440,109 @@ describe('knowledge-capture', () => { expect(result.extractedContent).toBe('## Decisions\n- use X\n'); }); + + describe('scope routing', () => { + /** Seed a minimal conversation so capture proceeds to extraction. */ + function seedConversation(): void { + mockListMessages.mockResolvedValueOnce([ + { + id: 'msg-1', + conversation_id: 'conv-123', + role: 'user' as const, + content: 'test', + metadata: '{}', + created_at: '2026-04-11T10:00:00Z', + }, + ]); + } + + test('writes BOTH logs when extraction contains project and global blocks', async () => { + seedConversation(); + mockSendQueryChunks = [ + { + type: 'assistant', + content: + '## PROJECT\n### Decisions\n- Use Drizzle ORM\n\n## GLOBAL\n### Lessons\n- Bun mock.module is process-global\n', + }, + ]; + + const result = await captureKnowledge('conv-123', 'acme', 'widget'); + + expect(result.skipped).toBe(false); + + // Two writes: one to project log, one to global log + expect(appendFileCalls).toHaveLength(2); + const projectWrite = appendFileCalls.find(c => c.path.includes('/workspaces/acme/widget/')); + const globalWrite = appendFileCalls.find(c => c.path.includes('/.archon/knowledge/')); + expect(projectWrite).toBeDefined(); + expect(globalWrite).toBeDefined(); + + // Project log contains only the PROJECT block content + expect(projectWrite!.content).toContain('Use Drizzle ORM'); + expect(projectWrite!.content).not.toContain('Bun mock.module'); + + // Global log contains only the GLOBAL block content + source attribution + expect(globalWrite!.content).toContain('Bun mock.module'); + expect(globalWrite!.content).not.toContain('Use Drizzle ORM'); + expect(globalWrite!.content).toContain('**Source**: acme/widget'); + expect(globalWrite!.content).toContain('**Conversation**: conv-123'); + + // Global flush must be scheduled + expect(mockScheduleGlobalFlush).toHaveBeenCalledTimes(1); + expect(mockInitGlobalKnowledgeDir).toHaveBeenCalledTimes(1); + }); + + test('writes ONLY project log when extraction is project-only', async () => { + seedConversation(); + mockSendQueryChunks = [ + { + type: 'assistant', + content: '## PROJECT\n### Decisions\n- Store conversations in DB\n', + }, + ]; + + const result = await captureKnowledge('conv-123', 'acme', 'widget'); + + expect(result.skipped).toBe(false); + expect(appendFileCalls).toHaveLength(1); + expect(appendFileCalls[0]!.path).toContain('/workspaces/acme/widget/'); + expect(mockScheduleGlobalFlush).not.toHaveBeenCalled(); + expect(mockInitGlobalKnowledgeDir).not.toHaveBeenCalled(); + }); + + test('writes ONLY global log when extraction is global-only', async () => { + seedConversation(); + mockSendQueryChunks = [ + { + type: 'assistant', + content: '## GLOBAL\n### Lessons\n- Prefer structured logging\n', + }, + ]; + + const result = await captureKnowledge('conv-123', 'acme', 'widget'); + + expect(result.skipped).toBe(false); + expect(appendFileCalls).toHaveLength(1); + expect(appendFileCalls[0]!.path).toContain('/.archon/knowledge/'); + expect(mockInitKnowledgeDir).not.toHaveBeenCalled(); + expect(mockScheduleGlobalFlush).toHaveBeenCalledTimes(1); + expect(mockInitGlobalKnowledgeDir).toHaveBeenCalledTimes(1); + }); + + test('falls back to project log when extraction lacks scope tags (malformed)', async () => { + seedConversation(); + mockSendQueryChunks = [ + { type: 'assistant', content: '## Decisions\n- Legacy unscoped output\n' }, + ]; + + const result = await captureKnowledge('conv-123', 'acme', 'widget'); + + expect(result.skipped).toBe(false); + // Fallback routes the whole content to project + expect(appendFileCalls).toHaveLength(1); + expect(appendFileCalls[0]!.path).toContain('/workspaces/acme/widget/'); + expect(appendFileCalls[0]!.content).toContain('Legacy unscoped output'); + expect(mockScheduleGlobalFlush).not.toHaveBeenCalled(); + }); + }); }); diff --git a/packages/core/src/services/knowledge-capture.ts b/packages/core/src/services/knowledge-capture.ts index da24128545..307c191e09 100644 --- a/packages/core/src/services/knowledge-capture.ts +++ b/packages/core/src/services/knowledge-capture.ts @@ -23,26 +23,57 @@ function getLog(): ReturnType { } /** Extraction prompt sent to the capture model to extract structured knowledge from a transcript */ -const EXTRACTION_PROMPT = `You are a knowledge extraction agent. Analyze the following conversation transcript and extract any valuable knowledge into these categories: +const EXTRACTION_PROMPT = `You are a knowledge extraction agent. Analyze the following conversation transcript and extract any valuable knowledge. -## Decisions -Architectural or design decisions made, with rationale. +Organize output into two scope blocks; within each block, group items by category. -## Patterns -Recurring code patterns, conventions, or best practices discovered or applied. +## Scope Classification + +- **PROJECT**: Knowledge specific to this repository — file paths, internal APIs, project-specific conventions, repo-specific decisions. +- **GLOBAL**: Knowledge applicable across any codebase — general engineering patterns, language idioms, tool/SDK gotchas, debugging techniques, universal best practices, meta-lessons about working with AI agents. + +Test: "If I opened a brand-new repo tomorrow in a different language, would this still be true?" If yes → GLOBAL. If no → PROJECT. + +When in doubt, classify as PROJECT (conservative default — leaking a global-worthy item into project is cheap; polluting global with project noise ruins its cross-project value). + +## Output Format + +## PROJECT + +### Decisions +{architectural or design decisions specific to this codebase, with rationale} + +### Patterns +{code patterns or conventions specific to this codebase} -## Lessons -Mistakes encountered, debugging insights, gotchas, or constraints learned. +### Lessons +{mistakes, gotchas, or constraints specific to this codebase} -## Connections -Cross-component dependencies, system relationships, or integration points discovered. +### Connections +{cross-component dependencies or system relationships specific to this codebase} -Rules: -- Only include items that would be valuable for a future session on this project +## GLOBAL + +### Decisions +{codebase-agnostic engineering decisions} + +### Patterns +{universal patterns, idioms, or best practices} + +### Lessons +{cross-project tooling/ecosystem gotchas, AI-agent meta-lessons, language quirks} + +### Connections +{general architectural patterns that apply across codebases} + +## Rules + +- Only include items that would be valuable for a future session - Skip trivial or obvious items - Use bullet points with concise descriptions - Include the "why" for decisions and lessons -- If no items exist for a category, omit that category entirely +- If a category has no items within a scope, omit that category entirely +- If a scope has no items at all, omit the entire scope block - If the transcript contains no extractable knowledge, respond with "No knowledge to extract." --- @@ -126,19 +157,45 @@ export async function captureKnowledge( }; } - // Ensure KB directory exists - await initKnowledgeDir(owner, repo); + // Parse scoped output — project entries go to project KB, global entries go to global KB + const scoped = parseScopedOutput(extractedContent, 'both'); - // Append to daily log - const logFile = await appendToDailyLog(owner, repo, conversationId, extractedContent); + let projectLogFile = ''; - log.info( - { conversationId, logFile, contentLength: extractedContent.length }, - 'knowledge.capture_completed' - ); + // Write project-scoped entries to project daily log + if (scoped.project) { + await initKnowledgeDir(owner, repo); + projectLogFile = await appendToDailyLog(owner, repo, conversationId, scoped.project); + log.info( + { conversationId, logFile: projectLogFile, contentLength: scoped.project.length }, + 'knowledge.capture_project_completed' + ); + } + + // Write global-scoped entries to global daily log and schedule global flush + if (scoped.global) { + await initGlobalKnowledgeDir(); + const globalHeader = `### Capture: ${new Date().toISOString()}\n**Source**: ${owner}/${repo}\n**Conversation**: ${conversationId}`; + await writeGlobalLogEntry(globalHeader, scoped.global); + await scheduleGlobalFlush(); + log.info( + { conversationId, contentLength: scoped.global.length }, + 'knowledge.capture_global_completed' + ); + } + + if (!scoped.project && !scoped.global) { + log.info({ conversationId }, 'knowledge.capture_completed_nothing'); + return { + logFile: '', + extractedContent: '', + skipped: true, + skipReason: 'Parsed output contained no entries in either scope', + }; + } return { - logFile, + logFile: projectLogFile, extractedContent, skipped: false, }; @@ -366,7 +423,8 @@ export async function extractKnowledgeFromContext( // Write global-scoped entries to global daily log if (scoped.global) { await initGlobalKnowledgeDir(); - await appendToGlobalDailyLog(owner, repo, metadata, scoped.global); + const globalHeader = `### Knowledge Extract: ${new Date().toISOString()}\n**Source**: ${owner}/${repo}\n**Workflow Run**: ${metadata.workflowRunId}\n**Node**: ${metadata.nodeId}`; + await writeGlobalLogEntry(globalHeader, scoped.global); log.info( { nodeId: metadata.nodeId, contentLength: scoped.global.length }, @@ -381,23 +439,18 @@ export async function extractKnowledgeFromContext( } /** - * Append extracted global knowledge to the global daily log. - * Includes source attribution (owner/repo) for traceability. + * Append a pre-formatted entry to the global daily log (~/.archon/knowledge/logs/YYYY-MM-DD.md). + * The caller provides the header (source/metadata lines) so this helper is reusable across + * conversation-captured and workflow-extracted knowledge. */ -async function appendToGlobalDailyLog( - owner: string, - repo: string, - metadata: { workflowRunId: string; nodeId: string }, - content: string -): Promise { +async function writeGlobalLogEntry(sourceHeader: string, content: string): Promise { const globalKnowledgePath = getGlobalKnowledgePath(); const logsDir = join(globalKnowledgePath, 'logs'); await mkdir(logsDir, { recursive: true }); const today = new Date().toISOString().slice(0, 10); const logFile = join(logsDir, `${today}.md`); - const timestamp = new Date().toISOString(); - const entry = `\n---\n\n### Knowledge Extract: ${timestamp}\n**Source**: ${owner}/${repo}\n**Workflow Run**: ${metadata.workflowRunId}\n**Node**: ${metadata.nodeId}\n\n${content}\n`; + const entry = `\n---\n\n${sourceHeader}\n\n${content}\n`; await appendFile(logFile, entry);