From 5c1a02cb5d3bad82c0f04671e7cc08944e67d8f1 Mon Sep 17 00:00:00 2001 From: Gabe Campbell Date: Wed, 25 Mar 2026 22:47:03 -0500 Subject: [PATCH 1/6] feat: added skip-agents-md cli flag --- README.md | 13 +- gitnexus/README.md | 11 +- gitnexus/src/cli/ai-context.ts | 68 ++-- gitnexus/src/cli/analyze.ts | 447 ++++++++++++++++-------- gitnexus/src/cli/index.ts | 1 + gitnexus/test/unit/ai-context.test.ts | 28 ++ gitnexus/test/unit/skip-git-cli.test.ts | 3 +- 7 files changed, 371 insertions(+), 200 deletions(-) diff --git a/README.md b/README.md index 5616b6a818..56b4d1b3a6 100644 --- a/README.md +++ b/README.md @@ -165,13 +165,14 @@ args = ["-y", "gitnexus@latest", "mcp"] ### CLI Commands ```bash -gitnexus setup # Configure MCP for your editors (one-time) -gitnexus analyze [path] # Index a repository (or update stale index) -gitnexus analyze --force # Force full re-index -gitnexus analyze --skills # Generate repo-specific skill files from detected communities +gitnexus setup # Configure MCP for your editors (one-time) +gitnexus analyze [path] # Index a repository (or update stale index) +gitnexus analyze --force # Force full re-index +gitnexus analyze --skills # Generate repo-specific skill files from detected communities gitnexus analyze --skip-embeddings # Skip embedding generation (faster) -gitnexus analyze --embeddings # Enable embedding generation (slower, better search) -gitnexus analyze --verbose # Log skipped files when parsers are unavailable +gitnexus analyze --skip-agents-md # Preserve custom AGENTS.md/CLAUDE.md gitnexus section edits +gitnexus analyze --embeddings # Enable embedding generation (slower, better search) +gitnexus analyze --verbose # Log skipped files when parsers are unavailable gitnexus mcp # Start MCP server (stdio) — serves all indexed repos gitnexus serve # Start local HTTP server (multi-repo) for web UI connection gitnexus list # List all indexed repositories diff --git a/gitnexus/README.md b/gitnexus/README.md index 312a465391..b56e6c2192 100644 --- a/gitnexus/README.md +++ b/gitnexus/README.md @@ -149,11 +149,12 @@ Your AI agent gets these tools automatically: ## CLI Commands ```bash -gitnexus setup # Configure MCP for your editors (one-time) -gitnexus analyze [path] # Index a repository (or update stale index) -gitnexus analyze --force # Force full re-index -gitnexus analyze --embeddings # Enable embedding generation (slower, better search) -gitnexus analyze --verbose # Log skipped files when parsers are unavailable +gitnexus setup # Configure MCP for your editors (one-time) +gitnexus analyze [path] # Index a repository (or update stale index) +gitnexus analyze --force # Force full re-index +gitnexus analyze --embeddings # Enable embedding generation (slower, better search) +gitnexus analyze --skip-agents-md # Preserve custom AGENTS.md/CLAUDE.md gitnexus section edits +gitnexus analyze --verbose # Log skipped files when parsers are unavailable gitnexus mcp # Start MCP server (stdio) — serves all indexed repos gitnexus serve # Start local HTTP server (multi-repo) for web UI gitnexus index # Register an existing .gitnexus/ folder into the global registry diff --git a/gitnexus/src/cli/ai-context.ts b/gitnexus/src/cli/ai-context.ts index d1e6b7ba84..ec022bcb82 100644 --- a/gitnexus/src/cli/ai-context.ts +++ b/gitnexus/src/cli/ai-context.ts @@ -1,6 +1,6 @@ /** * AI Context Generator - * + * * Creates AGENTS.md and CLAUDE.md with full inline GitNexus context. * AGENTS.md is the standard read by Cursor, Windsurf, OpenCode, Codex, Cline, etc. * CLAUDE.md is for Claude Code which only reads that file. @@ -20,10 +20,14 @@ interface RepoStats { nodes?: number; edges?: number; communities?: number; - clusters?: number; // Aggregated cluster count (what tools show) + clusters?: number; // Aggregated cluster count (what tools show) processes?: number; } +interface AIContextOptions { + skipAgentsMd?: boolean; +} + const GITNEXUS_START_MARKER = ''; const GITNEXUS_END_MARKER = ''; @@ -38,20 +42,12 @@ const GITNEXUS_END_MARKER = ''; * - Exact tool commands with parameters — vague directives get ignored * - Self-review checklist — forces model to verify its own work */ -function generateGitNexusContent( - projectName: string, - stats: RepoStats, - generatedSkills?: GeneratedSkillInfo[], -): string { - const generatedRows = - generatedSkills && generatedSkills.length > 0 - ? generatedSkills - .map( - (s) => - `| Work in the ${s.label} area (${s.symbolCount} symbols) | \`.claude/skills/generated/${s.name}/SKILL.md\` |`, - ) - .join('\n') - : ''; +function generateGitNexusContent(projectName: string, stats: RepoStats, generatedSkills?: GeneratedSkillInfo[]): string { + const generatedRows = (generatedSkills && generatedSkills.length > 0) + ? generatedSkills.map(s => + `| Work in the ${s.label} area (${s.symbolCount} symbols) | \`.claude/skills/generated/${s.name}/SKILL.md\` |` + ).join('\n') + : ''; const skillsTable = `| Task | Read this skill file | |------|---------------------| @@ -158,6 +154,7 @@ ${skillsTable} ${GITNEXUS_END_MARKER}`; } + /** * Check if a file exists */ @@ -178,7 +175,7 @@ async function fileExists(filePath: string): Promise { */ async function upsertGitNexusSection( filePath: string, - content: string, + content: string ): Promise<'created' | 'updated' | 'appended'> { const exists = await fileExists(filePath); @@ -220,33 +217,27 @@ async function installSkills(repoPath: string): Promise { const skills = [ { name: 'gitnexus-exploring', - description: - 'Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: "How does X work?", "What calls this function?", "Show me the auth flow"', + description: 'Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: "How does X work?", "What calls this function?", "Show me the auth flow"', }, { name: 'gitnexus-debugging', - description: - 'Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: "Why is X failing?", "Where does this error come from?", "Trace this bug"', + description: 'Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: "Why is X failing?", "Where does this error come from?", "Trace this bug"', }, { name: 'gitnexus-impact-analysis', - description: - 'Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: "Is it safe to change X?", "What depends on this?", "What will break?"', + description: 'Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: "Is it safe to change X?", "What depends on this?", "What will break?"', }, { name: 'gitnexus-refactoring', - description: - 'Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: "Rename this function", "Extract this into a module", "Refactor this class", "Move this to a separate file"', + description: 'Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: "Rename this function", "Extract this into a module", "Refactor this class", "Move this to a separate file"', }, { name: 'gitnexus-guide', - description: - 'Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: "What GitNexus tools are available?", "How do I use GitNexus?"', + description: 'Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: "What GitNexus tools are available?", "How do I use GitNexus?"', }, { name: 'gitnexus-cli', - description: - 'Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: "Index this repo", "Reanalyze the codebase", "Generate a wiki"', + description: 'Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: "Index this repo", "Reanalyze the codebase", "Generate a wiki"', }, ]; @@ -299,19 +290,22 @@ export async function generateAIContextFiles( projectName: string, stats: RepoStats, generatedSkills?: GeneratedSkillInfo[], + options?: AIContextOptions ): Promise<{ files: string[] }> { const content = generateGitNexusContent(projectName, stats, generatedSkills); const createdFiles: string[] = []; - // Create AGENTS.md (standard for Cursor, Windsurf, OpenCode, Cline, etc.) - const agentsPath = path.join(repoPath, 'AGENTS.md'); - const agentsResult = await upsertGitNexusSection(agentsPath, content); - createdFiles.push(`AGENTS.md (${agentsResult})`); + if (!options?.skipAgentsMd) { + // Create AGENTS.md (standard for Cursor, Windsurf, OpenCode, Cline, etc.) + const agentsPath = path.join(repoPath, 'AGENTS.md'); + const agentsResult = await upsertGitNexusSection(agentsPath, content); + createdFiles.push(`AGENTS.md (${agentsResult})`); - // Create CLAUDE.md (for Claude Code) - const claudePath = path.join(repoPath, 'CLAUDE.md'); - const claudeResult = await upsertGitNexusSection(claudePath, content); - createdFiles.push(`CLAUDE.md (${claudeResult})`); + // Create CLAUDE.md (for Claude Code) + const claudePath = path.join(repoPath, 'CLAUDE.md'); + const claudeResult = await upsertGitNexusSection(claudePath, content); + createdFiles.push(`CLAUDE.md (${claudeResult})`); + } // Install skills to .claude/skills/gitnexus/ const installedSkills = await installSkills(repoPath); diff --git a/gitnexus/src/cli/analyze.ts b/gitnexus/src/cli/analyze.ts index 8dcb19e469..75c0d029f6 100644 --- a/gitnexus/src/cli/analyze.ts +++ b/gitnexus/src/cli/analyze.ts @@ -2,22 +2,25 @@ * Analyze Command * * Indexes a repository and stores the knowledge graph in .gitnexus/ - * - * Delegates core analysis to the shared runFullAnalysis orchestrator. - * This CLI wrapper handles: heap management, progress bar, SIGINT, - * skill generation (--skills), summary output, and process.exit(). */ import path from 'path'; import { execFileSync } from 'child_process'; import v8 from 'v8'; import cliProgress from 'cli-progress'; -import { closeLbug } from '../core/lbug/lbug-adapter.js'; -import { getStoragePaths, getGlobalRegistryPath } from '../storage/repo-manager.js'; -import { getGitRoot, hasGitDir } from '../storage/git.js'; -import { runFullAnalysis } from '../core/run-analyze.js'; +import { runPipelineFromRepo } from '../core/ingestion/pipeline.js'; +import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, createFTSIndex, loadCachedEmbeddings } from '../core/lbug/lbug-adapter.js'; +// Embedding imports are lazy (dynamic import) so onnxruntime-node is never +// loaded when embeddings are not requested. This avoids crashes on Node +// versions whose ABI is not yet supported by the native binary (#89). +// disposeEmbedder intentionally not called — ONNX Runtime segfaults on cleanup (see #38) +import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, getGlobalRegistryPath, cleanupOldKuzuFiles } from '../storage/repo-manager.js'; +import { getCurrentCommit, getGitRoot, hasGitDir } from '../storage/git.js'; +import { generateAIContextFiles } from './ai-context.js'; +import { generateSkillFiles, type GeneratedSkillInfo } from './skill-gen.js'; import fs from 'fs/promises'; + const HEAP_MB = 8192; const HEAP_FLAG = `--max-old-space-size=${HEAP_MB}`; @@ -45,11 +48,35 @@ export interface AnalyzeOptions { embeddings?: boolean; skills?: boolean; verbose?: boolean; + /** Skip AGENTS.md and CLAUDE.md gitnexus block updates. */ + skipAgentsMd?: boolean; /** Index the folder even when no .git directory is present. */ skipGit?: boolean; } -export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOptions) => { +/** Threshold: auto-skip embeddings for repos with more nodes than this */ +const EMBEDDING_NODE_LIMIT = 50_000; + +const PHASE_LABELS: Record = { + extracting: 'Scanning files', + structure: 'Building structure', + parsing: 'Parsing code', + imports: 'Resolving imports', + calls: 'Tracing calls', + heritage: 'Extracting inheritance', + communities: 'Detecting communities', + processes: 'Detecting processes', + complete: 'Pipeline complete', + lbug: 'Loading into LadybugDB', + fts: 'Creating search indexes', + embeddings: 'Generating embeddings', + done: 'Done', +}; + +export const analyzeCommand = async ( + inputPath?: string, + options?: AnalyzeOptions +) => { if (ensureHeap()) return; if (options?.verbose) { @@ -65,9 +92,7 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption const gitRoot = getGitRoot(process.cwd()); if (!gitRoot) { if (!options?.skipGit) { - console.log( - ' Not inside a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n', - ); + console.log(' Not inside a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n'); process.exitCode = 1; return; } @@ -80,83 +105,93 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption const repoHasGit = hasGitDir(repoPath); if (!repoHasGit && !options?.skipGit) { - console.log( - ' Not a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n', - ); + console.log(' Not a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n'); process.exitCode = 1; return; } if (!repoHasGit) { - console.log( - ' Warning: no .git directory found \u2014 commit-tracking and incremental updates disabled.\n', - ); + console.log(' Warning: no .git directory found \u2014 commit-tracking and incremental updates disabled.\n'); + } + + const { storagePath, lbugPath } = getStoragePaths(repoPath); + + // Clean up stale KuzuDB files from before the LadybugDB migration. + // If kuzu existed but lbug doesn't, we're doing a migration re-index — say so. + const kuzuResult = await cleanupOldKuzuFiles(storagePath); + if (kuzuResult.found && kuzuResult.needsReindex) { + console.log(' Migrating from KuzuDB to LadybugDB — rebuilding index...\n'); } - // KuzuDB migration cleanup is handled by runFullAnalysis internally. - // Note: --skills is handled after runFullAnalysis using the returned pipelineResult. + const currentCommit = repoHasGit ? getCurrentCommit(repoPath) : ''; + const existingMeta = await loadMeta(storagePath); + + if (existingMeta && !options?.force && !options?.skills && existingMeta.lastCommit === currentCommit) { + // Non-git folders have currentCommit = '' — always rebuild since we can't detect changes + if (currentCommit !== '') { + console.log(' Already up to date\n'); + return; + } + } if (process.env.GITNEXUS_NO_GITIGNORE) { - console.log( - ' GITNEXUS_NO_GITIGNORE is set — skipping .gitignore (still reading .gitnexusignore)\n', - ); + console.log(' GITNEXUS_NO_GITIGNORE is set — skipping .gitignore (still reading .gitnexusignore)\n'); } - // ── CLI progress bar setup ───────────────────────────────────────── - const bar = new cliProgress.SingleBar( - { - format: ' {bar} {percentage}% | {phase}', - barCompleteChar: '\u2588', - barIncompleteChar: '\u2591', - hideCursor: true, - barGlue: '', - autopadding: true, - clearOnComplete: false, - stopOnComplete: false, - }, - cliProgress.Presets.shades_grey, - ); + // Single progress bar for entire pipeline + const bar = new cliProgress.SingleBar({ + format: ' {bar} {percentage}% | {phase}', + barCompleteChar: '\u2588', + barIncompleteChar: '\u2591', + hideCursor: true, + barGlue: '', + autopadding: true, + clearOnComplete: false, + stopOnComplete: false, + }, cliProgress.Presets.shades_grey); bar.start(100, 0, { phase: 'Initializing...' }); - // Graceful SIGINT handling + // Graceful SIGINT handling — clean up resources and exit let aborted = false; const sigintHandler = () => { - if (aborted) process.exit(1); + if (aborted) process.exit(1); // Second Ctrl-C: force exit aborted = true; bar.stop(); console.log('\n Interrupted — cleaning up...'); - closeLbug() - .catch(() => {}) - .finally(() => process.exit(130)); + closeLbug().catch(() => {}).finally(() => process.exit(130)); }; process.on('SIGINT', sigintHandler); - // Route console output through bar.log() to prevent progress bar corruption + // Route all console output through bar.log() so the bar doesn't stamp itself + // multiple times when other code writes to stdout/stderr mid-render. const origLog = console.log.bind(console); const origWarn = console.warn.bind(console); const origError = console.error.bind(console); const barLog = (...args: any[]) => { + // Clear the bar line, print the message, then let the next bar.update redraw process.stdout.write('\x1b[2K\r'); - origLog(args.map((a) => (typeof a === 'string' ? a : String(a))).join(' ')); + origLog(args.map(a => (typeof a === 'string' ? a : String(a))).join(' ')); }; console.log = barLog; console.warn = barLog; console.error = barLog; - // Track elapsed time per phase + // Track elapsed time per phase — both updateBar and the interval use the + // same format so they don't flicker against each other. let lastPhaseLabel = 'Initializing...'; let phaseStart = Date.now(); + /** Update bar with phase label + elapsed seconds (shown after 3s). */ const updateBar = (value: number, phaseLabel: string) => { - if (phaseLabel !== lastPhaseLabel) { - lastPhaseLabel = phaseLabel; - phaseStart = Date.now(); - } + if (phaseLabel !== lastPhaseLabel) { lastPhaseLabel = phaseLabel; phaseStart = Date.now(); } const elapsed = Math.round((Date.now() - phaseStart) / 1000); const display = elapsed >= 3 ? `${phaseLabel} (${elapsed}s)` : phaseLabel; bar.update(value, { phase: display }); }; + // Tick elapsed seconds for phases with infrequent progress callbacks + // (e.g. CSV streaming, FTS indexing). Uses the same display format as + // updateBar so there's no flickering. const elapsedTimer = setInterval(() => { const elapsed = Math.round((Date.now() - phaseStart) / 1000); if (elapsed >= 3) { @@ -164,125 +199,235 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption } }, 1000); - const t0 = Date.now(); + const t0Global = Date.now(); - // ── Run shared analysis orchestrator ─────────────────────────────── - try { - const result = await runFullAnalysis( - repoPath, - { - force: options?.force || options?.skills, - embeddings: options?.embeddings, - skipGit: options?.skipGit, - }, - { - onProgress: (_phase, percent, message) => { - updateBar(percent, message); - }, - onLog: barLog, - }, - ); + // ── Cache embeddings from existing index before rebuild ──────────── + let cachedEmbeddingNodeIds = new Set(); + let cachedEmbeddings: Array<{ nodeId: string; embedding: number[] }> = []; - if (result.alreadyUpToDate) { - clearInterval(elapsedTimer); - process.removeListener('SIGINT', sigintHandler); - console.log = origLog; - console.warn = origWarn; - console.error = origError; - bar.stop(); - console.log(' Already up to date\n'); - // Safe to return without process.exit(0) — the early-return path in - // runFullAnalysis never opens LadybugDB, so no native handles prevent exit. - return; + if (options?.embeddings && existingMeta && !options?.force) { + try { + updateBar(0, 'Caching embeddings...'); + await initLbug(lbugPath); + const cached = await loadCachedEmbeddings(); + cachedEmbeddingNodeIds = cached.embeddingNodeIds; + cachedEmbeddings = cached.embeddings; + await closeLbug(); + } catch { + try { await closeLbug(); } catch {} } + } + + // ── Phase 1: Full Pipeline (0–60%) ───────────────────────────────── + const pipelineResult = await runPipelineFromRepo(repoPath, (progress) => { + const phaseLabel = PHASE_LABELS[progress.phase] || progress.phase; + const scaled = Math.round(progress.percent * 0.6); + updateBar(scaled, phaseLabel); + }); + + // ── Phase 2: LadybugDB (60–85%) ────────────────────────────────────── + updateBar(60, 'Loading into LadybugDB...'); - // Skill generation (CLI-only, uses pipeline result from analysis) - if (options?.skills && result.pipelineResult) { - updateBar(99, 'Generating skill files...'); - try { - const { generateSkillFiles } = await import('./skill-gen.js'); - const { generateAIContextFiles } = await import('./ai-context.js'); - const skillResult = await generateSkillFiles( - repoPath, - result.repoName, - result.pipelineResult, - ); - if (skillResult.skills.length > 0) { - barLog(` Generated ${skillResult.skills.length} skill files`); - // Re-generate AI context files now that we have skill info - const s = result.stats; - const communityResult = result.pipelineResult?.communityResult; - let aggregatedClusterCount = 0; - if (communityResult?.communities) { - const groups = new Map(); - for (const c of communityResult.communities) { - const label = c.heuristicLabel || c.label || 'Unknown'; - groups.set(label, (groups.get(label) || 0) + c.symbolCount); - } - aggregatedClusterCount = Array.from(groups.values()).filter( - (count: number) => count >= 5, - ).length; - } - const { storagePath: sp } = getStoragePaths(repoPath); - await generateAIContextFiles( - repoPath, - sp, - result.repoName, - { - files: s.files ?? 0, - nodes: s.nodes ?? 0, - edges: s.edges ?? 0, - communities: s.communities, - clusters: aggregatedClusterCount, - processes: s.processes, - }, - skillResult.skills, + await closeLbug(); + const lbugFiles = [lbugPath, `${lbugPath}.wal`, `${lbugPath}.lock`]; + for (const f of lbugFiles) { + try { await fs.rm(f, { recursive: true, force: true }); } catch {} + } + + const t0Lbug = Date.now(); + await initLbug(lbugPath); + let lbugMsgCount = 0; + const lbugResult = await loadGraphToLbug(pipelineResult.graph, pipelineResult.repoPath, storagePath, (msg) => { + lbugMsgCount++; + const progress = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24)); + updateBar(progress, msg); + }); + const lbugTime = ((Date.now() - t0Lbug) / 1000).toFixed(1); + const lbugWarnings = lbugResult.warnings; + + // ── Phase 3: FTS (85–90%) ───────────────────────────────────────── + updateBar(85, 'Creating search indexes...'); + + const t0Fts = Date.now(); + try { + await createFTSIndex('File', 'file_fts', ['name', 'content']); + await createFTSIndex('Function', 'function_fts', ['name', 'content']); + await createFTSIndex('Class', 'class_fts', ['name', 'content']); + await createFTSIndex('Method', 'method_fts', ['name', 'content']); + await createFTSIndex('Interface', 'interface_fts', ['name', 'content']); + } catch (e: any) { + // Non-fatal — FTS is best-effort + } + const ftsTime = ((Date.now() - t0Fts) / 1000).toFixed(1); + + // ── Phase 3.5: Re-insert cached embeddings ──────────────────────── + if (cachedEmbeddings.length > 0) { + // Check if cached embedding dimensions match current schema + const cachedDims = cachedEmbeddings[0].embedding.length; + const { EMBEDDING_DIMS } = await import('../core/lbug/schema.js'); + if (cachedDims !== EMBEDDING_DIMS) { + // Dimensions changed (e.g. switched embedding model) — discard cache and re-embed all + console.error(`⚠️ Embedding dimensions changed (${cachedDims}d → ${EMBEDDING_DIMS}d), discarding cache`); + cachedEmbeddings = []; + cachedEmbeddingNodeIds = new Set(); + } else { + updateBar(88, `Restoring ${cachedEmbeddings.length} cached embeddings...`); + const EMBED_BATCH = 200; + for (let i = 0; i < cachedEmbeddings.length; i += EMBED_BATCH) { + const batch = cachedEmbeddings.slice(i, i + EMBED_BATCH); + const paramsList = batch.map(e => ({ nodeId: e.nodeId, embedding: e.embedding })); + try { + await executeWithReusedStatement( + `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`, + paramsList, ); - } - } catch { - /* best-effort */ + } catch { /* some may fail if node was removed, that's fine */ } } } + } - const totalTime = ((Date.now() - t0) / 1000).toFixed(1); + // ── Phase 4: Embeddings (90–98%) ────────────────────────────────── + const stats = await getLbugStats(); + let embeddingTime = '0.0'; + let embeddingSkipped = true; + let embeddingSkipReason = 'off (use --embeddings to enable)'; - clearInterval(elapsedTimer); - process.removeListener('SIGINT', sigintHandler); + if (options?.embeddings) { + if (stats.nodes > EMBEDDING_NODE_LIMIT) { + embeddingSkipReason = `skipped (${stats.nodes.toLocaleString()} nodes > ${EMBEDDING_NODE_LIMIT.toLocaleString()} limit)`; + } else { + embeddingSkipped = false; + } + } - console.log = origLog; - console.warn = origWarn; - console.error = origError; + if (!embeddingSkipped) { + const { isHttpMode } = await import('../core/embeddings/http-client.js'); + const httpMode = isHttpMode(); + updateBar(90, httpMode ? 'Connecting to embedding endpoint...' : 'Loading embedding model...'); + const t0Emb = Date.now(); + const { runEmbeddingPipeline } = await import('../core/embeddings/embedding-pipeline.js'); + await runEmbeddingPipeline( + executeQuery, + executeWithReusedStatement, + (progress) => { + const scaled = 90 + Math.round((progress.percent / 100) * 8); + const label = progress.phase === 'loading-model' + ? (httpMode ? 'Connecting to embedding endpoint...' : 'Loading embedding model...') + : `Embedding ${progress.nodesProcessed || 0}/${progress.totalNodes || '?'}`; + updateBar(scaled, label); + }, + {}, + cachedEmbeddingNodeIds.size > 0 ? cachedEmbeddingNodeIds : undefined, + ); + embeddingTime = ((Date.now() - t0Emb) / 1000).toFixed(1); + } - bar.update(100, { phase: 'Done' }); - bar.stop(); + // ── Phase 5: Finalize (98–100%) ─────────────────────────────────── + updateBar(98, 'Saving metadata...'); - // ── Summary ──────────────────────────────────────────────────── - const s = result.stats; - console.log(`\n Repository indexed successfully (${totalTime}s)\n`); - console.log( - ` ${(s.nodes ?? 0).toLocaleString()} nodes | ${(s.edges ?? 0).toLocaleString()} edges | ${s.communities ?? 0} clusters | ${s.processes ?? 0} flows`, - ); - console.log(` ${repoPath}`); + // Count embeddings in the index (cached + newly generated) + let embeddingCount = 0; + try { + const embResult = await executeQuery(`MATCH (e:CodeEmbedding) RETURN count(e) AS cnt`); + embeddingCount = embResult?.[0]?.cnt ?? 0; + } catch { /* table may not exist if embeddings never ran */ } + + const meta = { + repoPath, + lastCommit: currentCommit, + indexedAt: new Date().toISOString(), + stats: { + files: pipelineResult.totalFileCount, + nodes: stats.nodes, + edges: stats.edges, + communities: pipelineResult.communityResult?.stats.totalCommunities, + processes: pipelineResult.processResult?.stats.totalProcesses, + embeddings: embeddingCount, + }, + }; + await saveMeta(storagePath, meta); + await registerRepo(repoPath, meta); + // Only attempt to update .gitignore when a .git directory is present. + // Use hasGitDir (filesystem check) rather than git CLI subprocess + // so we skip correctly for --skip-git folders even if git CLI is available. + if (hasGitDir(repoPath)) { + await addToGitignore(repoPath); + } - try { - await fs.access(getGlobalRegistryPath()); - } catch { - console.log('\n Tip: Run `gitnexus setup` to configure MCP for your editor.'); + const projectName = path.basename(repoPath); + let aggregatedClusterCount = 0; + if (pipelineResult.communityResult?.communities) { + const groups = new Map(); + for (const c of pipelineResult.communityResult.communities) { + const label = c.heuristicLabel || c.label || 'Unknown'; + groups.set(label, (groups.get(label) || 0) + c.symbolCount); } + aggregatedClusterCount = Array.from(groups.values()).filter(count => count >= 5).length; + } - console.log(''); - } catch (err: any) { - clearInterval(elapsedTimer); - process.removeListener('SIGINT', sigintHandler); - console.log = origLog; - console.warn = origWarn; - console.error = origError; - bar.stop(); - console.error(`\n Analysis failed: ${err.message}\n`); - process.exitCode = 1; - return; + let generatedSkills: GeneratedSkillInfo[] = []; + if (options?.skills && pipelineResult.communityResult) { + updateBar(99, 'Generating skill files...'); + const skillResult = await generateSkillFiles(repoPath, projectName, pipelineResult); + generatedSkills = skillResult.skills; } + const aiContext = await generateAIContextFiles(repoPath, storagePath, projectName, { + files: pipelineResult.totalFileCount, + nodes: stats.nodes, + edges: stats.edges, + communities: pipelineResult.communityResult?.stats.totalCommunities, + clusters: aggregatedClusterCount, + processes: pipelineResult.processResult?.stats.totalProcesses, + }, generatedSkills, { + skipAgentsMd: options?.skipAgentsMd, + }); + + await closeLbug(); + // Note: we intentionally do NOT call disposeEmbedder() here. + // ONNX Runtime's native cleanup segfaults on macOS and some Linux configs. + // Since the process exits immediately after, Node.js reclaims everything. + + const totalTime = ((Date.now() - t0Global) / 1000).toFixed(1); + + clearInterval(elapsedTimer); + process.removeListener('SIGINT', sigintHandler); + + console.log = origLog; + console.warn = origWarn; + console.error = origError; + + bar.update(100, { phase: 'Done' }); + bar.stop(); + + // ── Summary ─────────────────────────────────────────────────────── + const embeddingsCached = cachedEmbeddings.length > 0; + console.log(`\n Repository indexed successfully (${totalTime}s)${embeddingsCached ? ` [${cachedEmbeddings.length} embeddings cached]` : ''}\n`); + console.log(` ${stats.nodes.toLocaleString()} nodes | ${stats.edges.toLocaleString()} edges | ${pipelineResult.communityResult?.stats.totalCommunities || 0} clusters | ${pipelineResult.processResult?.stats.totalProcesses || 0} flows`); + console.log(` LadybugDB ${lbugTime}s | FTS ${ftsTime}s | Embeddings ${embeddingSkipped ? embeddingSkipReason : embeddingTime + 's'}`); + console.log(` ${repoPath}`); + + if (aiContext.files.length > 0) { + console.log(` Context: ${aiContext.files.join(', ')}`); + } + + // Show a quiet summary if some edge types needed fallback insertion + if (lbugWarnings.length > 0) { + const totalFallback = lbugWarnings.reduce((sum, w) => { + const m = w.match(/\((\d+) edges\)/); + return sum + (m ? parseInt(m[1]) : 0); + }, 0); + console.log(` Note: ${totalFallback} edges across ${lbugWarnings.length} types inserted via fallback (schema will be updated in next release)`); + } + + try { + await fs.access(getGlobalRegistryPath()); + } catch { + console.log('\n Tip: Run `gitnexus setup` to configure MCP for your editor.'); + } + + console.log(''); + // LadybugDB's native module holds open handles that prevent Node from exiting. // ONNX Runtime also registers native atexit hooks that segfault on some // platforms (#38, #40). Force-exit to ensure clean termination. diff --git a/gitnexus/src/cli/index.ts b/gitnexus/src/cli/index.ts index 7ccdf16de1..0cb338ad11 100644 --- a/gitnexus/src/cli/index.ts +++ b/gitnexus/src/cli/index.ts @@ -24,6 +24,7 @@ program .option('-f, --force', 'Force full re-index even if up to date') .option('--embeddings', 'Enable embedding generation for semantic search (off by default)') .option('--skills', 'Generate repo-specific skill files from detected communities') + .option('--skip-agents-md', 'Skip updating the gitnexus section in AGENTS.md and CLAUDE.md') .option('--skip-git', 'Index a folder without requiring a .git directory') .option('-v, --verbose', 'Enable verbose ingestion warnings (default: false)') .addHelpText( diff --git a/gitnexus/test/unit/ai-context.test.ts b/gitnexus/test/unit/ai-context.test.ts index 489eac9a1e..0a9f52a687 100644 --- a/gitnexus/test/unit/ai-context.test.ts +++ b/gitnexus/test/unit/ai-context.test.ts @@ -79,4 +79,32 @@ describe('generateAIContextFiles', () => { // Skills dir may not be created if skills source doesn't exist in test context } }); + + it('preserves manual AGENTS.md and CLAUDE.md edits when skipAgentsMd is enabled', async () => { + const stats = { nodes: 42, edges: 84, processes: 3 }; + const agentsPath = path.join(tmpDir, 'AGENTS.md'); + const claudePath = path.join(tmpDir, 'CLAUDE.md'); + const agentsContent = '# AGENTS\n\nCustom manual instructions only\n'; + const claudeContent = '# CLAUDE\n\nCustom manual instructions only\n'; + + await fs.writeFile(agentsPath, agentsContent, 'utf-8'); + await fs.writeFile(claudePath, claudeContent, 'utf-8'); + + const result = await generateAIContextFiles( + tmpDir, + storagePath, + 'TestProject', + stats, + undefined, + { skipAgentsMd: true }, + ); + + expect(result.files).toContain('AGENTS.md (skipped via --skip-agents-md)'); + expect(result.files).toContain('CLAUDE.md (skipped via --skip-agents-md)'); + + const agentsAfter = await fs.readFile(agentsPath, 'utf-8'); + const claudeAfter = await fs.readFile(claudePath, 'utf-8'); + expect(agentsAfter).toBe(agentsContent); + expect(claudeAfter).toBe(claudeContent); + }); }); diff --git a/gitnexus/test/unit/skip-git-cli.test.ts b/gitnexus/test/unit/skip-git-cli.test.ts index 73707fe61b..740c8265a7 100644 --- a/gitnexus/test/unit/skip-git-cli.test.ts +++ b/gitnexus/test/unit/skip-git-cli.test.ts @@ -6,7 +6,7 @@ import fs from 'fs'; describe('--skip-git CLI flag', () => { it('Commander maps --skip-git to options.skipGit (not --no-git inversion)', () => { - // Verify the CLI defines --skip-git, not --no-git + // Verify the CLI defines --skip-git and --skip-agents-md in analyze help. const helpOutput = execSync('node dist/cli/index.js analyze --help', { cwd: path.resolve(__dirname, '../..'), encoding: 'utf8', @@ -14,6 +14,7 @@ describe('--skip-git CLI flag', () => { }); expect(helpOutput).toContain('--skip-git'); + expect(helpOutput).toContain('--skip-agents-md'); expect(helpOutput).not.toContain('--no-git'); }); From fddb24db1f05ff1de7b933e01ded8b8df7a10ae7 Mon Sep 17 00:00:00 2001 From: Gabe Campbell Date: Sat, 28 Mar 2026 12:43:36 -0500 Subject: [PATCH 2/6] fix: apply prettier formatting --- gitnexus/src/cli/ai-context.ts | 47 +++++---- gitnexus/src/cli/analyze.ts | 175 +++++++++++++++++++++++---------- 2 files changed, 151 insertions(+), 71 deletions(-) diff --git a/gitnexus/src/cli/ai-context.ts b/gitnexus/src/cli/ai-context.ts index ec022bcb82..e9d8accdd6 100644 --- a/gitnexus/src/cli/ai-context.ts +++ b/gitnexus/src/cli/ai-context.ts @@ -1,6 +1,6 @@ /** * AI Context Generator - * + * * Creates AGENTS.md and CLAUDE.md with full inline GitNexus context. * AGENTS.md is the standard read by Cursor, Windsurf, OpenCode, Codex, Cline, etc. * CLAUDE.md is for Claude Code which only reads that file. @@ -20,7 +20,7 @@ interface RepoStats { nodes?: number; edges?: number; communities?: number; - clusters?: number; // Aggregated cluster count (what tools show) + clusters?: number; // Aggregated cluster count (what tools show) processes?: number; } @@ -42,12 +42,20 @@ const GITNEXUS_END_MARKER = ''; * - Exact tool commands with parameters — vague directives get ignored * - Self-review checklist — forces model to verify its own work */ -function generateGitNexusContent(projectName: string, stats: RepoStats, generatedSkills?: GeneratedSkillInfo[]): string { - const generatedRows = (generatedSkills && generatedSkills.length > 0) - ? generatedSkills.map(s => - `| Work in the ${s.label} area (${s.symbolCount} symbols) | \`.claude/skills/generated/${s.name}/SKILL.md\` |` - ).join('\n') - : ''; +function generateGitNexusContent( + projectName: string, + stats: RepoStats, + generatedSkills?: GeneratedSkillInfo[], +): string { + const generatedRows = + generatedSkills && generatedSkills.length > 0 + ? generatedSkills + .map( + (s) => + `| Work in the ${s.label} area (${s.symbolCount} symbols) | \`.claude/skills/generated/${s.name}/SKILL.md\` |`, + ) + .join('\n') + : ''; const skillsTable = `| Task | Read this skill file | |------|---------------------| @@ -154,7 +162,6 @@ ${skillsTable} ${GITNEXUS_END_MARKER}`; } - /** * Check if a file exists */ @@ -175,7 +182,7 @@ async function fileExists(filePath: string): Promise { */ async function upsertGitNexusSection( filePath: string, - content: string + content: string, ): Promise<'created' | 'updated' | 'appended'> { const exists = await fileExists(filePath); @@ -217,27 +224,33 @@ async function installSkills(repoPath: string): Promise { const skills = [ { name: 'gitnexus-exploring', - description: 'Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: "How does X work?", "What calls this function?", "Show me the auth flow"', + description: + 'Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: "How does X work?", "What calls this function?", "Show me the auth flow"', }, { name: 'gitnexus-debugging', - description: 'Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: "Why is X failing?", "Where does this error come from?", "Trace this bug"', + description: + 'Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: "Why is X failing?", "Where does this error come from?", "Trace this bug"', }, { name: 'gitnexus-impact-analysis', - description: 'Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: "Is it safe to change X?", "What depends on this?", "What will break?"', + description: + 'Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: "Is it safe to change X?", "What depends on this?", "What will break?"', }, { name: 'gitnexus-refactoring', - description: 'Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: "Rename this function", "Extract this into a module", "Refactor this class", "Move this to a separate file"', + description: + 'Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: "Rename this function", "Extract this into a module", "Refactor this class", "Move this to a separate file"', }, { name: 'gitnexus-guide', - description: 'Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: "What GitNexus tools are available?", "How do I use GitNexus?"', + description: + 'Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: "What GitNexus tools are available?", "How do I use GitNexus?"', }, { name: 'gitnexus-cli', - description: 'Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: "Index this repo", "Reanalyze the codebase", "Generate a wiki"', + description: + 'Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: "Index this repo", "Reanalyze the codebase", "Generate a wiki"', }, ]; @@ -290,7 +303,7 @@ export async function generateAIContextFiles( projectName: string, stats: RepoStats, generatedSkills?: GeneratedSkillInfo[], - options?: AIContextOptions + options?: AIContextOptions, ): Promise<{ files: string[] }> { const content = generateGitNexusContent(projectName, stats, generatedSkills); const createdFiles: string[] = []; diff --git a/gitnexus/src/cli/analyze.ts b/gitnexus/src/cli/analyze.ts index 75c0d029f6..5fdad0df77 100644 --- a/gitnexus/src/cli/analyze.ts +++ b/gitnexus/src/cli/analyze.ts @@ -9,18 +9,34 @@ import { execFileSync } from 'child_process'; import v8 from 'v8'; import cliProgress from 'cli-progress'; import { runPipelineFromRepo } from '../core/ingestion/pipeline.js'; -import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, createFTSIndex, loadCachedEmbeddings } from '../core/lbug/lbug-adapter.js'; +import { + initLbug, + loadGraphToLbug, + getLbugStats, + executeQuery, + executeWithReusedStatement, + closeLbug, + createFTSIndex, + loadCachedEmbeddings, +} from '../core/lbug/lbug-adapter.js'; // Embedding imports are lazy (dynamic import) so onnxruntime-node is never // loaded when embeddings are not requested. This avoids crashes on Node // versions whose ABI is not yet supported by the native binary (#89). // disposeEmbedder intentionally not called — ONNX Runtime segfaults on cleanup (see #38) -import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, getGlobalRegistryPath, cleanupOldKuzuFiles } from '../storage/repo-manager.js'; +import { + getStoragePaths, + saveMeta, + loadMeta, + addToGitignore, + registerRepo, + getGlobalRegistryPath, + cleanupOldKuzuFiles, +} from '../storage/repo-manager.js'; import { getCurrentCommit, getGitRoot, hasGitDir } from '../storage/git.js'; import { generateAIContextFiles } from './ai-context.js'; import { generateSkillFiles, type GeneratedSkillInfo } from './skill-gen.js'; import fs from 'fs/promises'; - const HEAP_MB = 8192; const HEAP_FLAG = `--max-old-space-size=${HEAP_MB}`; @@ -73,10 +89,7 @@ const PHASE_LABELS: Record = { done: 'Done', }; -export const analyzeCommand = async ( - inputPath?: string, - options?: AnalyzeOptions -) => { +export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOptions) => { if (ensureHeap()) return; if (options?.verbose) { @@ -92,7 +105,9 @@ export const analyzeCommand = async ( const gitRoot = getGitRoot(process.cwd()); if (!gitRoot) { if (!options?.skipGit) { - console.log(' Not inside a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n'); + console.log( + ' Not inside a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n', + ); process.exitCode = 1; return; } @@ -105,12 +120,16 @@ export const analyzeCommand = async ( const repoHasGit = hasGitDir(repoPath); if (!repoHasGit && !options?.skipGit) { - console.log(' Not a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n'); + console.log( + ' Not a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n', + ); process.exitCode = 1; return; } if (!repoHasGit) { - console.log(' Warning: no .git directory found \u2014 commit-tracking and incremental updates disabled.\n'); + console.log( + ' Warning: no .git directory found \u2014 commit-tracking and incremental updates disabled.\n', + ); } const { storagePath, lbugPath } = getStoragePaths(repoPath); @@ -125,7 +144,12 @@ export const analyzeCommand = async ( const currentCommit = repoHasGit ? getCurrentCommit(repoPath) : ''; const existingMeta = await loadMeta(storagePath); - if (existingMeta && !options?.force && !options?.skills && existingMeta.lastCommit === currentCommit) { + if ( + existingMeta && + !options?.force && + !options?.skills && + existingMeta.lastCommit === currentCommit + ) { // Non-git folders have currentCommit = '' — always rebuild since we can't detect changes if (currentCommit !== '') { console.log(' Already up to date\n'); @@ -134,20 +158,25 @@ export const analyzeCommand = async ( } if (process.env.GITNEXUS_NO_GITIGNORE) { - console.log(' GITNEXUS_NO_GITIGNORE is set — skipping .gitignore (still reading .gitnexusignore)\n'); + console.log( + ' GITNEXUS_NO_GITIGNORE is set — skipping .gitignore (still reading .gitnexusignore)\n', + ); } // Single progress bar for entire pipeline - const bar = new cliProgress.SingleBar({ - format: ' {bar} {percentage}% | {phase}', - barCompleteChar: '\u2588', - barIncompleteChar: '\u2591', - hideCursor: true, - barGlue: '', - autopadding: true, - clearOnComplete: false, - stopOnComplete: false, - }, cliProgress.Presets.shades_grey); + const bar = new cliProgress.SingleBar( + { + format: ' {bar} {percentage}% | {phase}', + barCompleteChar: '\u2588', + barIncompleteChar: '\u2591', + hideCursor: true, + barGlue: '', + autopadding: true, + clearOnComplete: false, + stopOnComplete: false, + }, + cliProgress.Presets.shades_grey, + ); bar.start(100, 0, { phase: 'Initializing...' }); @@ -158,7 +187,9 @@ export const analyzeCommand = async ( aborted = true; bar.stop(); console.log('\n Interrupted — cleaning up...'); - closeLbug().catch(() => {}).finally(() => process.exit(130)); + closeLbug() + .catch(() => {}) + .finally(() => process.exit(130)); }; process.on('SIGINT', sigintHandler); @@ -170,7 +201,7 @@ export const analyzeCommand = async ( const barLog = (...args: any[]) => { // Clear the bar line, print the message, then let the next bar.update redraw process.stdout.write('\x1b[2K\r'); - origLog(args.map(a => (typeof a === 'string' ? a : String(a))).join(' ')); + origLog(args.map((a) => (typeof a === 'string' ? a : String(a))).join(' ')); }; console.log = barLog; console.warn = barLog; @@ -183,7 +214,10 @@ export const analyzeCommand = async ( /** Update bar with phase label + elapsed seconds (shown after 3s). */ const updateBar = (value: number, phaseLabel: string) => { - if (phaseLabel !== lastPhaseLabel) { lastPhaseLabel = phaseLabel; phaseStart = Date.now(); } + if (phaseLabel !== lastPhaseLabel) { + lastPhaseLabel = phaseLabel; + phaseStart = Date.now(); + } const elapsed = Math.round((Date.now() - phaseStart) / 1000); const display = elapsed >= 3 ? `${phaseLabel} (${elapsed}s)` : phaseLabel; bar.update(value, { phase: display }); @@ -214,7 +248,9 @@ export const analyzeCommand = async ( cachedEmbeddings = cached.embeddings; await closeLbug(); } catch { - try { await closeLbug(); } catch {} + try { + await closeLbug(); + } catch {} } } @@ -231,17 +267,24 @@ export const analyzeCommand = async ( await closeLbug(); const lbugFiles = [lbugPath, `${lbugPath}.wal`, `${lbugPath}.lock`]; for (const f of lbugFiles) { - try { await fs.rm(f, { recursive: true, force: true }); } catch {} + try { + await fs.rm(f, { recursive: true, force: true }); + } catch {} } const t0Lbug = Date.now(); await initLbug(lbugPath); let lbugMsgCount = 0; - const lbugResult = await loadGraphToLbug(pipelineResult.graph, pipelineResult.repoPath, storagePath, (msg) => { - lbugMsgCount++; - const progress = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24)); - updateBar(progress, msg); - }); + const lbugResult = await loadGraphToLbug( + pipelineResult.graph, + pipelineResult.repoPath, + storagePath, + (msg) => { + lbugMsgCount++; + const progress = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24)); + updateBar(progress, msg); + }, + ); const lbugTime = ((Date.now() - t0Lbug) / 1000).toFixed(1); const lbugWarnings = lbugResult.warnings; @@ -267,7 +310,9 @@ export const analyzeCommand = async ( const { EMBEDDING_DIMS } = await import('../core/lbug/schema.js'); if (cachedDims !== EMBEDDING_DIMS) { // Dimensions changed (e.g. switched embedding model) — discard cache and re-embed all - console.error(`⚠️ Embedding dimensions changed (${cachedDims}d → ${EMBEDDING_DIMS}d), discarding cache`); + console.error( + `⚠️ Embedding dimensions changed (${cachedDims}d → ${EMBEDDING_DIMS}d), discarding cache`, + ); cachedEmbeddings = []; cachedEmbeddingNodeIds = new Set(); } else { @@ -275,13 +320,15 @@ export const analyzeCommand = async ( const EMBED_BATCH = 200; for (let i = 0; i < cachedEmbeddings.length; i += EMBED_BATCH) { const batch = cachedEmbeddings.slice(i, i + EMBED_BATCH); - const paramsList = batch.map(e => ({ nodeId: e.nodeId, embedding: e.embedding })); + const paramsList = batch.map((e) => ({ nodeId: e.nodeId, embedding: e.embedding })); try { await executeWithReusedStatement( `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`, paramsList, ); - } catch { /* some may fail if node was removed, that's fine */ } + } catch { + /* some may fail if node was removed, that's fine */ + } } } } @@ -311,9 +358,12 @@ export const analyzeCommand = async ( executeWithReusedStatement, (progress) => { const scaled = 90 + Math.round((progress.percent / 100) * 8); - const label = progress.phase === 'loading-model' - ? (httpMode ? 'Connecting to embedding endpoint...' : 'Loading embedding model...') - : `Embedding ${progress.nodesProcessed || 0}/${progress.totalNodes || '?'}`; + const label = + progress.phase === 'loading-model' + ? httpMode + ? 'Connecting to embedding endpoint...' + : 'Loading embedding model...' + : `Embedding ${progress.nodesProcessed || 0}/${progress.totalNodes || '?'}`; updateBar(scaled, label); }, {}, @@ -330,7 +380,9 @@ export const analyzeCommand = async ( try { const embResult = await executeQuery(`MATCH (e:CodeEmbedding) RETURN count(e) AS cnt`); embeddingCount = embResult?.[0]?.cnt ?? 0; - } catch { /* table may not exist if embeddings never ran */ } + } catch { + /* table may not exist if embeddings never ran */ + } const meta = { repoPath, @@ -362,7 +414,7 @@ export const analyzeCommand = async ( const label = c.heuristicLabel || c.label || 'Unknown'; groups.set(label, (groups.get(label) || 0) + c.symbolCount); } - aggregatedClusterCount = Array.from(groups.values()).filter(count => count >= 5).length; + aggregatedClusterCount = Array.from(groups.values()).filter((count) => count >= 5).length; } let generatedSkills: GeneratedSkillInfo[] = []; @@ -372,16 +424,23 @@ export const analyzeCommand = async ( generatedSkills = skillResult.skills; } - const aiContext = await generateAIContextFiles(repoPath, storagePath, projectName, { - files: pipelineResult.totalFileCount, - nodes: stats.nodes, - edges: stats.edges, - communities: pipelineResult.communityResult?.stats.totalCommunities, - clusters: aggregatedClusterCount, - processes: pipelineResult.processResult?.stats.totalProcesses, - }, generatedSkills, { - skipAgentsMd: options?.skipAgentsMd, - }); + const aiContext = await generateAIContextFiles( + repoPath, + storagePath, + projectName, + { + files: pipelineResult.totalFileCount, + nodes: stats.nodes, + edges: stats.edges, + communities: pipelineResult.communityResult?.stats.totalCommunities, + clusters: aggregatedClusterCount, + processes: pipelineResult.processResult?.stats.totalProcesses, + }, + generatedSkills, + { + skipAgentsMd: options?.skipAgentsMd, + }, + ); await closeLbug(); // Note: we intentionally do NOT call disposeEmbedder() here. @@ -402,9 +461,15 @@ export const analyzeCommand = async ( // ── Summary ─────────────────────────────────────────────────────── const embeddingsCached = cachedEmbeddings.length > 0; - console.log(`\n Repository indexed successfully (${totalTime}s)${embeddingsCached ? ` [${cachedEmbeddings.length} embeddings cached]` : ''}\n`); - console.log(` ${stats.nodes.toLocaleString()} nodes | ${stats.edges.toLocaleString()} edges | ${pipelineResult.communityResult?.stats.totalCommunities || 0} clusters | ${pipelineResult.processResult?.stats.totalProcesses || 0} flows`); - console.log(` LadybugDB ${lbugTime}s | FTS ${ftsTime}s | Embeddings ${embeddingSkipped ? embeddingSkipReason : embeddingTime + 's'}`); + console.log( + `\n Repository indexed successfully (${totalTime}s)${embeddingsCached ? ` [${cachedEmbeddings.length} embeddings cached]` : ''}\n`, + ); + console.log( + ` ${stats.nodes.toLocaleString()} nodes | ${stats.edges.toLocaleString()} edges | ${pipelineResult.communityResult?.stats.totalCommunities || 0} clusters | ${pipelineResult.processResult?.stats.totalProcesses || 0} flows`, + ); + console.log( + ` LadybugDB ${lbugTime}s | FTS ${ftsTime}s | Embeddings ${embeddingSkipped ? embeddingSkipReason : embeddingTime + 's'}`, + ); console.log(` ${repoPath}`); if (aiContext.files.length > 0) { @@ -417,7 +482,9 @@ export const analyzeCommand = async ( const m = w.match(/\((\d+) edges\)/); return sum + (m ? parseInt(m[1]) : 0); }, 0); - console.log(` Note: ${totalFallback} edges across ${lbugWarnings.length} types inserted via fallback (schema will be updated in next release)`); + console.log( + ` Note: ${totalFallback} edges across ${lbugWarnings.length} types inserted via fallback (schema will be updated in next release)`, + ); } try { From 71fa32ad78355c8405a358e908963e3eefffaaa3 Mon Sep 17 00:00:00 2001 From: Gabe Campbell Date: Wed, 25 Mar 2026 22:47:03 -0500 Subject: [PATCH 3/6] feat: added skip-agents-md cli flag --- README.md | 13 +- gitnexus/README.md | 11 +- gitnexus/src/cli/ai-context.ts | 68 ++-- gitnexus/src/cli/analyze.ts | 447 ++++++++++++++++-------- gitnexus/src/cli/index.ts | 1 + gitnexus/test/unit/ai-context.test.ts | 28 ++ gitnexus/test/unit/skip-git-cli.test.ts | 3 +- 7 files changed, 371 insertions(+), 200 deletions(-) diff --git a/README.md b/README.md index 5616b6a818..56b4d1b3a6 100644 --- a/README.md +++ b/README.md @@ -165,13 +165,14 @@ args = ["-y", "gitnexus@latest", "mcp"] ### CLI Commands ```bash -gitnexus setup # Configure MCP for your editors (one-time) -gitnexus analyze [path] # Index a repository (or update stale index) -gitnexus analyze --force # Force full re-index -gitnexus analyze --skills # Generate repo-specific skill files from detected communities +gitnexus setup # Configure MCP for your editors (one-time) +gitnexus analyze [path] # Index a repository (or update stale index) +gitnexus analyze --force # Force full re-index +gitnexus analyze --skills # Generate repo-specific skill files from detected communities gitnexus analyze --skip-embeddings # Skip embedding generation (faster) -gitnexus analyze --embeddings # Enable embedding generation (slower, better search) -gitnexus analyze --verbose # Log skipped files when parsers are unavailable +gitnexus analyze --skip-agents-md # Preserve custom AGENTS.md/CLAUDE.md gitnexus section edits +gitnexus analyze --embeddings # Enable embedding generation (slower, better search) +gitnexus analyze --verbose # Log skipped files when parsers are unavailable gitnexus mcp # Start MCP server (stdio) — serves all indexed repos gitnexus serve # Start local HTTP server (multi-repo) for web UI connection gitnexus list # List all indexed repositories diff --git a/gitnexus/README.md b/gitnexus/README.md index 312a465391..b56e6c2192 100644 --- a/gitnexus/README.md +++ b/gitnexus/README.md @@ -149,11 +149,12 @@ Your AI agent gets these tools automatically: ## CLI Commands ```bash -gitnexus setup # Configure MCP for your editors (one-time) -gitnexus analyze [path] # Index a repository (or update stale index) -gitnexus analyze --force # Force full re-index -gitnexus analyze --embeddings # Enable embedding generation (slower, better search) -gitnexus analyze --verbose # Log skipped files when parsers are unavailable +gitnexus setup # Configure MCP for your editors (one-time) +gitnexus analyze [path] # Index a repository (or update stale index) +gitnexus analyze --force # Force full re-index +gitnexus analyze --embeddings # Enable embedding generation (slower, better search) +gitnexus analyze --skip-agents-md # Preserve custom AGENTS.md/CLAUDE.md gitnexus section edits +gitnexus analyze --verbose # Log skipped files when parsers are unavailable gitnexus mcp # Start MCP server (stdio) — serves all indexed repos gitnexus serve # Start local HTTP server (multi-repo) for web UI gitnexus index # Register an existing .gitnexus/ folder into the global registry diff --git a/gitnexus/src/cli/ai-context.ts b/gitnexus/src/cli/ai-context.ts index d1e6b7ba84..ec022bcb82 100644 --- a/gitnexus/src/cli/ai-context.ts +++ b/gitnexus/src/cli/ai-context.ts @@ -1,6 +1,6 @@ /** * AI Context Generator - * + * * Creates AGENTS.md and CLAUDE.md with full inline GitNexus context. * AGENTS.md is the standard read by Cursor, Windsurf, OpenCode, Codex, Cline, etc. * CLAUDE.md is for Claude Code which only reads that file. @@ -20,10 +20,14 @@ interface RepoStats { nodes?: number; edges?: number; communities?: number; - clusters?: number; // Aggregated cluster count (what tools show) + clusters?: number; // Aggregated cluster count (what tools show) processes?: number; } +interface AIContextOptions { + skipAgentsMd?: boolean; +} + const GITNEXUS_START_MARKER = ''; const GITNEXUS_END_MARKER = ''; @@ -38,20 +42,12 @@ const GITNEXUS_END_MARKER = ''; * - Exact tool commands with parameters — vague directives get ignored * - Self-review checklist — forces model to verify its own work */ -function generateGitNexusContent( - projectName: string, - stats: RepoStats, - generatedSkills?: GeneratedSkillInfo[], -): string { - const generatedRows = - generatedSkills && generatedSkills.length > 0 - ? generatedSkills - .map( - (s) => - `| Work in the ${s.label} area (${s.symbolCount} symbols) | \`.claude/skills/generated/${s.name}/SKILL.md\` |`, - ) - .join('\n') - : ''; +function generateGitNexusContent(projectName: string, stats: RepoStats, generatedSkills?: GeneratedSkillInfo[]): string { + const generatedRows = (generatedSkills && generatedSkills.length > 0) + ? generatedSkills.map(s => + `| Work in the ${s.label} area (${s.symbolCount} symbols) | \`.claude/skills/generated/${s.name}/SKILL.md\` |` + ).join('\n') + : ''; const skillsTable = `| Task | Read this skill file | |------|---------------------| @@ -158,6 +154,7 @@ ${skillsTable} ${GITNEXUS_END_MARKER}`; } + /** * Check if a file exists */ @@ -178,7 +175,7 @@ async function fileExists(filePath: string): Promise { */ async function upsertGitNexusSection( filePath: string, - content: string, + content: string ): Promise<'created' | 'updated' | 'appended'> { const exists = await fileExists(filePath); @@ -220,33 +217,27 @@ async function installSkills(repoPath: string): Promise { const skills = [ { name: 'gitnexus-exploring', - description: - 'Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: "How does X work?", "What calls this function?", "Show me the auth flow"', + description: 'Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: "How does X work?", "What calls this function?", "Show me the auth flow"', }, { name: 'gitnexus-debugging', - description: - 'Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: "Why is X failing?", "Where does this error come from?", "Trace this bug"', + description: 'Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: "Why is X failing?", "Where does this error come from?", "Trace this bug"', }, { name: 'gitnexus-impact-analysis', - description: - 'Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: "Is it safe to change X?", "What depends on this?", "What will break?"', + description: 'Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: "Is it safe to change X?", "What depends on this?", "What will break?"', }, { name: 'gitnexus-refactoring', - description: - 'Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: "Rename this function", "Extract this into a module", "Refactor this class", "Move this to a separate file"', + description: 'Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: "Rename this function", "Extract this into a module", "Refactor this class", "Move this to a separate file"', }, { name: 'gitnexus-guide', - description: - 'Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: "What GitNexus tools are available?", "How do I use GitNexus?"', + description: 'Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: "What GitNexus tools are available?", "How do I use GitNexus?"', }, { name: 'gitnexus-cli', - description: - 'Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: "Index this repo", "Reanalyze the codebase", "Generate a wiki"', + description: 'Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: "Index this repo", "Reanalyze the codebase", "Generate a wiki"', }, ]; @@ -299,19 +290,22 @@ export async function generateAIContextFiles( projectName: string, stats: RepoStats, generatedSkills?: GeneratedSkillInfo[], + options?: AIContextOptions ): Promise<{ files: string[] }> { const content = generateGitNexusContent(projectName, stats, generatedSkills); const createdFiles: string[] = []; - // Create AGENTS.md (standard for Cursor, Windsurf, OpenCode, Cline, etc.) - const agentsPath = path.join(repoPath, 'AGENTS.md'); - const agentsResult = await upsertGitNexusSection(agentsPath, content); - createdFiles.push(`AGENTS.md (${agentsResult})`); + if (!options?.skipAgentsMd) { + // Create AGENTS.md (standard for Cursor, Windsurf, OpenCode, Cline, etc.) + const agentsPath = path.join(repoPath, 'AGENTS.md'); + const agentsResult = await upsertGitNexusSection(agentsPath, content); + createdFiles.push(`AGENTS.md (${agentsResult})`); - // Create CLAUDE.md (for Claude Code) - const claudePath = path.join(repoPath, 'CLAUDE.md'); - const claudeResult = await upsertGitNexusSection(claudePath, content); - createdFiles.push(`CLAUDE.md (${claudeResult})`); + // Create CLAUDE.md (for Claude Code) + const claudePath = path.join(repoPath, 'CLAUDE.md'); + const claudeResult = await upsertGitNexusSection(claudePath, content); + createdFiles.push(`CLAUDE.md (${claudeResult})`); + } // Install skills to .claude/skills/gitnexus/ const installedSkills = await installSkills(repoPath); diff --git a/gitnexus/src/cli/analyze.ts b/gitnexus/src/cli/analyze.ts index 8dcb19e469..75c0d029f6 100644 --- a/gitnexus/src/cli/analyze.ts +++ b/gitnexus/src/cli/analyze.ts @@ -2,22 +2,25 @@ * Analyze Command * * Indexes a repository and stores the knowledge graph in .gitnexus/ - * - * Delegates core analysis to the shared runFullAnalysis orchestrator. - * This CLI wrapper handles: heap management, progress bar, SIGINT, - * skill generation (--skills), summary output, and process.exit(). */ import path from 'path'; import { execFileSync } from 'child_process'; import v8 from 'v8'; import cliProgress from 'cli-progress'; -import { closeLbug } from '../core/lbug/lbug-adapter.js'; -import { getStoragePaths, getGlobalRegistryPath } from '../storage/repo-manager.js'; -import { getGitRoot, hasGitDir } from '../storage/git.js'; -import { runFullAnalysis } from '../core/run-analyze.js'; +import { runPipelineFromRepo } from '../core/ingestion/pipeline.js'; +import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, createFTSIndex, loadCachedEmbeddings } from '../core/lbug/lbug-adapter.js'; +// Embedding imports are lazy (dynamic import) so onnxruntime-node is never +// loaded when embeddings are not requested. This avoids crashes on Node +// versions whose ABI is not yet supported by the native binary (#89). +// disposeEmbedder intentionally not called — ONNX Runtime segfaults on cleanup (see #38) +import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, getGlobalRegistryPath, cleanupOldKuzuFiles } from '../storage/repo-manager.js'; +import { getCurrentCommit, getGitRoot, hasGitDir } from '../storage/git.js'; +import { generateAIContextFiles } from './ai-context.js'; +import { generateSkillFiles, type GeneratedSkillInfo } from './skill-gen.js'; import fs from 'fs/promises'; + const HEAP_MB = 8192; const HEAP_FLAG = `--max-old-space-size=${HEAP_MB}`; @@ -45,11 +48,35 @@ export interface AnalyzeOptions { embeddings?: boolean; skills?: boolean; verbose?: boolean; + /** Skip AGENTS.md and CLAUDE.md gitnexus block updates. */ + skipAgentsMd?: boolean; /** Index the folder even when no .git directory is present. */ skipGit?: boolean; } -export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOptions) => { +/** Threshold: auto-skip embeddings for repos with more nodes than this */ +const EMBEDDING_NODE_LIMIT = 50_000; + +const PHASE_LABELS: Record = { + extracting: 'Scanning files', + structure: 'Building structure', + parsing: 'Parsing code', + imports: 'Resolving imports', + calls: 'Tracing calls', + heritage: 'Extracting inheritance', + communities: 'Detecting communities', + processes: 'Detecting processes', + complete: 'Pipeline complete', + lbug: 'Loading into LadybugDB', + fts: 'Creating search indexes', + embeddings: 'Generating embeddings', + done: 'Done', +}; + +export const analyzeCommand = async ( + inputPath?: string, + options?: AnalyzeOptions +) => { if (ensureHeap()) return; if (options?.verbose) { @@ -65,9 +92,7 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption const gitRoot = getGitRoot(process.cwd()); if (!gitRoot) { if (!options?.skipGit) { - console.log( - ' Not inside a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n', - ); + console.log(' Not inside a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n'); process.exitCode = 1; return; } @@ -80,83 +105,93 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption const repoHasGit = hasGitDir(repoPath); if (!repoHasGit && !options?.skipGit) { - console.log( - ' Not a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n', - ); + console.log(' Not a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n'); process.exitCode = 1; return; } if (!repoHasGit) { - console.log( - ' Warning: no .git directory found \u2014 commit-tracking and incremental updates disabled.\n', - ); + console.log(' Warning: no .git directory found \u2014 commit-tracking and incremental updates disabled.\n'); + } + + const { storagePath, lbugPath } = getStoragePaths(repoPath); + + // Clean up stale KuzuDB files from before the LadybugDB migration. + // If kuzu existed but lbug doesn't, we're doing a migration re-index — say so. + const kuzuResult = await cleanupOldKuzuFiles(storagePath); + if (kuzuResult.found && kuzuResult.needsReindex) { + console.log(' Migrating from KuzuDB to LadybugDB — rebuilding index...\n'); } - // KuzuDB migration cleanup is handled by runFullAnalysis internally. - // Note: --skills is handled after runFullAnalysis using the returned pipelineResult. + const currentCommit = repoHasGit ? getCurrentCommit(repoPath) : ''; + const existingMeta = await loadMeta(storagePath); + + if (existingMeta && !options?.force && !options?.skills && existingMeta.lastCommit === currentCommit) { + // Non-git folders have currentCommit = '' — always rebuild since we can't detect changes + if (currentCommit !== '') { + console.log(' Already up to date\n'); + return; + } + } if (process.env.GITNEXUS_NO_GITIGNORE) { - console.log( - ' GITNEXUS_NO_GITIGNORE is set — skipping .gitignore (still reading .gitnexusignore)\n', - ); + console.log(' GITNEXUS_NO_GITIGNORE is set — skipping .gitignore (still reading .gitnexusignore)\n'); } - // ── CLI progress bar setup ───────────────────────────────────────── - const bar = new cliProgress.SingleBar( - { - format: ' {bar} {percentage}% | {phase}', - barCompleteChar: '\u2588', - barIncompleteChar: '\u2591', - hideCursor: true, - barGlue: '', - autopadding: true, - clearOnComplete: false, - stopOnComplete: false, - }, - cliProgress.Presets.shades_grey, - ); + // Single progress bar for entire pipeline + const bar = new cliProgress.SingleBar({ + format: ' {bar} {percentage}% | {phase}', + barCompleteChar: '\u2588', + barIncompleteChar: '\u2591', + hideCursor: true, + barGlue: '', + autopadding: true, + clearOnComplete: false, + stopOnComplete: false, + }, cliProgress.Presets.shades_grey); bar.start(100, 0, { phase: 'Initializing...' }); - // Graceful SIGINT handling + // Graceful SIGINT handling — clean up resources and exit let aborted = false; const sigintHandler = () => { - if (aborted) process.exit(1); + if (aborted) process.exit(1); // Second Ctrl-C: force exit aborted = true; bar.stop(); console.log('\n Interrupted — cleaning up...'); - closeLbug() - .catch(() => {}) - .finally(() => process.exit(130)); + closeLbug().catch(() => {}).finally(() => process.exit(130)); }; process.on('SIGINT', sigintHandler); - // Route console output through bar.log() to prevent progress bar corruption + // Route all console output through bar.log() so the bar doesn't stamp itself + // multiple times when other code writes to stdout/stderr mid-render. const origLog = console.log.bind(console); const origWarn = console.warn.bind(console); const origError = console.error.bind(console); const barLog = (...args: any[]) => { + // Clear the bar line, print the message, then let the next bar.update redraw process.stdout.write('\x1b[2K\r'); - origLog(args.map((a) => (typeof a === 'string' ? a : String(a))).join(' ')); + origLog(args.map(a => (typeof a === 'string' ? a : String(a))).join(' ')); }; console.log = barLog; console.warn = barLog; console.error = barLog; - // Track elapsed time per phase + // Track elapsed time per phase — both updateBar and the interval use the + // same format so they don't flicker against each other. let lastPhaseLabel = 'Initializing...'; let phaseStart = Date.now(); + /** Update bar with phase label + elapsed seconds (shown after 3s). */ const updateBar = (value: number, phaseLabel: string) => { - if (phaseLabel !== lastPhaseLabel) { - lastPhaseLabel = phaseLabel; - phaseStart = Date.now(); - } + if (phaseLabel !== lastPhaseLabel) { lastPhaseLabel = phaseLabel; phaseStart = Date.now(); } const elapsed = Math.round((Date.now() - phaseStart) / 1000); const display = elapsed >= 3 ? `${phaseLabel} (${elapsed}s)` : phaseLabel; bar.update(value, { phase: display }); }; + // Tick elapsed seconds for phases with infrequent progress callbacks + // (e.g. CSV streaming, FTS indexing). Uses the same display format as + // updateBar so there's no flickering. const elapsedTimer = setInterval(() => { const elapsed = Math.round((Date.now() - phaseStart) / 1000); if (elapsed >= 3) { @@ -164,125 +199,235 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption } }, 1000); - const t0 = Date.now(); + const t0Global = Date.now(); - // ── Run shared analysis orchestrator ─────────────────────────────── - try { - const result = await runFullAnalysis( - repoPath, - { - force: options?.force || options?.skills, - embeddings: options?.embeddings, - skipGit: options?.skipGit, - }, - { - onProgress: (_phase, percent, message) => { - updateBar(percent, message); - }, - onLog: barLog, - }, - ); + // ── Cache embeddings from existing index before rebuild ──────────── + let cachedEmbeddingNodeIds = new Set(); + let cachedEmbeddings: Array<{ nodeId: string; embedding: number[] }> = []; - if (result.alreadyUpToDate) { - clearInterval(elapsedTimer); - process.removeListener('SIGINT', sigintHandler); - console.log = origLog; - console.warn = origWarn; - console.error = origError; - bar.stop(); - console.log(' Already up to date\n'); - // Safe to return without process.exit(0) — the early-return path in - // runFullAnalysis never opens LadybugDB, so no native handles prevent exit. - return; + if (options?.embeddings && existingMeta && !options?.force) { + try { + updateBar(0, 'Caching embeddings...'); + await initLbug(lbugPath); + const cached = await loadCachedEmbeddings(); + cachedEmbeddingNodeIds = cached.embeddingNodeIds; + cachedEmbeddings = cached.embeddings; + await closeLbug(); + } catch { + try { await closeLbug(); } catch {} } + } + + // ── Phase 1: Full Pipeline (0–60%) ───────────────────────────────── + const pipelineResult = await runPipelineFromRepo(repoPath, (progress) => { + const phaseLabel = PHASE_LABELS[progress.phase] || progress.phase; + const scaled = Math.round(progress.percent * 0.6); + updateBar(scaled, phaseLabel); + }); + + // ── Phase 2: LadybugDB (60–85%) ────────────────────────────────────── + updateBar(60, 'Loading into LadybugDB...'); - // Skill generation (CLI-only, uses pipeline result from analysis) - if (options?.skills && result.pipelineResult) { - updateBar(99, 'Generating skill files...'); - try { - const { generateSkillFiles } = await import('./skill-gen.js'); - const { generateAIContextFiles } = await import('./ai-context.js'); - const skillResult = await generateSkillFiles( - repoPath, - result.repoName, - result.pipelineResult, - ); - if (skillResult.skills.length > 0) { - barLog(` Generated ${skillResult.skills.length} skill files`); - // Re-generate AI context files now that we have skill info - const s = result.stats; - const communityResult = result.pipelineResult?.communityResult; - let aggregatedClusterCount = 0; - if (communityResult?.communities) { - const groups = new Map(); - for (const c of communityResult.communities) { - const label = c.heuristicLabel || c.label || 'Unknown'; - groups.set(label, (groups.get(label) || 0) + c.symbolCount); - } - aggregatedClusterCount = Array.from(groups.values()).filter( - (count: number) => count >= 5, - ).length; - } - const { storagePath: sp } = getStoragePaths(repoPath); - await generateAIContextFiles( - repoPath, - sp, - result.repoName, - { - files: s.files ?? 0, - nodes: s.nodes ?? 0, - edges: s.edges ?? 0, - communities: s.communities, - clusters: aggregatedClusterCount, - processes: s.processes, - }, - skillResult.skills, + await closeLbug(); + const lbugFiles = [lbugPath, `${lbugPath}.wal`, `${lbugPath}.lock`]; + for (const f of lbugFiles) { + try { await fs.rm(f, { recursive: true, force: true }); } catch {} + } + + const t0Lbug = Date.now(); + await initLbug(lbugPath); + let lbugMsgCount = 0; + const lbugResult = await loadGraphToLbug(pipelineResult.graph, pipelineResult.repoPath, storagePath, (msg) => { + lbugMsgCount++; + const progress = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24)); + updateBar(progress, msg); + }); + const lbugTime = ((Date.now() - t0Lbug) / 1000).toFixed(1); + const lbugWarnings = lbugResult.warnings; + + // ── Phase 3: FTS (85–90%) ───────────────────────────────────────── + updateBar(85, 'Creating search indexes...'); + + const t0Fts = Date.now(); + try { + await createFTSIndex('File', 'file_fts', ['name', 'content']); + await createFTSIndex('Function', 'function_fts', ['name', 'content']); + await createFTSIndex('Class', 'class_fts', ['name', 'content']); + await createFTSIndex('Method', 'method_fts', ['name', 'content']); + await createFTSIndex('Interface', 'interface_fts', ['name', 'content']); + } catch (e: any) { + // Non-fatal — FTS is best-effort + } + const ftsTime = ((Date.now() - t0Fts) / 1000).toFixed(1); + + // ── Phase 3.5: Re-insert cached embeddings ──────────────────────── + if (cachedEmbeddings.length > 0) { + // Check if cached embedding dimensions match current schema + const cachedDims = cachedEmbeddings[0].embedding.length; + const { EMBEDDING_DIMS } = await import('../core/lbug/schema.js'); + if (cachedDims !== EMBEDDING_DIMS) { + // Dimensions changed (e.g. switched embedding model) — discard cache and re-embed all + console.error(`⚠️ Embedding dimensions changed (${cachedDims}d → ${EMBEDDING_DIMS}d), discarding cache`); + cachedEmbeddings = []; + cachedEmbeddingNodeIds = new Set(); + } else { + updateBar(88, `Restoring ${cachedEmbeddings.length} cached embeddings...`); + const EMBED_BATCH = 200; + for (let i = 0; i < cachedEmbeddings.length; i += EMBED_BATCH) { + const batch = cachedEmbeddings.slice(i, i + EMBED_BATCH); + const paramsList = batch.map(e => ({ nodeId: e.nodeId, embedding: e.embedding })); + try { + await executeWithReusedStatement( + `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`, + paramsList, ); - } - } catch { - /* best-effort */ + } catch { /* some may fail if node was removed, that's fine */ } } } + } - const totalTime = ((Date.now() - t0) / 1000).toFixed(1); + // ── Phase 4: Embeddings (90–98%) ────────────────────────────────── + const stats = await getLbugStats(); + let embeddingTime = '0.0'; + let embeddingSkipped = true; + let embeddingSkipReason = 'off (use --embeddings to enable)'; - clearInterval(elapsedTimer); - process.removeListener('SIGINT', sigintHandler); + if (options?.embeddings) { + if (stats.nodes > EMBEDDING_NODE_LIMIT) { + embeddingSkipReason = `skipped (${stats.nodes.toLocaleString()} nodes > ${EMBEDDING_NODE_LIMIT.toLocaleString()} limit)`; + } else { + embeddingSkipped = false; + } + } - console.log = origLog; - console.warn = origWarn; - console.error = origError; + if (!embeddingSkipped) { + const { isHttpMode } = await import('../core/embeddings/http-client.js'); + const httpMode = isHttpMode(); + updateBar(90, httpMode ? 'Connecting to embedding endpoint...' : 'Loading embedding model...'); + const t0Emb = Date.now(); + const { runEmbeddingPipeline } = await import('../core/embeddings/embedding-pipeline.js'); + await runEmbeddingPipeline( + executeQuery, + executeWithReusedStatement, + (progress) => { + const scaled = 90 + Math.round((progress.percent / 100) * 8); + const label = progress.phase === 'loading-model' + ? (httpMode ? 'Connecting to embedding endpoint...' : 'Loading embedding model...') + : `Embedding ${progress.nodesProcessed || 0}/${progress.totalNodes || '?'}`; + updateBar(scaled, label); + }, + {}, + cachedEmbeddingNodeIds.size > 0 ? cachedEmbeddingNodeIds : undefined, + ); + embeddingTime = ((Date.now() - t0Emb) / 1000).toFixed(1); + } - bar.update(100, { phase: 'Done' }); - bar.stop(); + // ── Phase 5: Finalize (98–100%) ─────────────────────────────────── + updateBar(98, 'Saving metadata...'); - // ── Summary ──────────────────────────────────────────────────── - const s = result.stats; - console.log(`\n Repository indexed successfully (${totalTime}s)\n`); - console.log( - ` ${(s.nodes ?? 0).toLocaleString()} nodes | ${(s.edges ?? 0).toLocaleString()} edges | ${s.communities ?? 0} clusters | ${s.processes ?? 0} flows`, - ); - console.log(` ${repoPath}`); + // Count embeddings in the index (cached + newly generated) + let embeddingCount = 0; + try { + const embResult = await executeQuery(`MATCH (e:CodeEmbedding) RETURN count(e) AS cnt`); + embeddingCount = embResult?.[0]?.cnt ?? 0; + } catch { /* table may not exist if embeddings never ran */ } + + const meta = { + repoPath, + lastCommit: currentCommit, + indexedAt: new Date().toISOString(), + stats: { + files: pipelineResult.totalFileCount, + nodes: stats.nodes, + edges: stats.edges, + communities: pipelineResult.communityResult?.stats.totalCommunities, + processes: pipelineResult.processResult?.stats.totalProcesses, + embeddings: embeddingCount, + }, + }; + await saveMeta(storagePath, meta); + await registerRepo(repoPath, meta); + // Only attempt to update .gitignore when a .git directory is present. + // Use hasGitDir (filesystem check) rather than git CLI subprocess + // so we skip correctly for --skip-git folders even if git CLI is available. + if (hasGitDir(repoPath)) { + await addToGitignore(repoPath); + } - try { - await fs.access(getGlobalRegistryPath()); - } catch { - console.log('\n Tip: Run `gitnexus setup` to configure MCP for your editor.'); + const projectName = path.basename(repoPath); + let aggregatedClusterCount = 0; + if (pipelineResult.communityResult?.communities) { + const groups = new Map(); + for (const c of pipelineResult.communityResult.communities) { + const label = c.heuristicLabel || c.label || 'Unknown'; + groups.set(label, (groups.get(label) || 0) + c.symbolCount); } + aggregatedClusterCount = Array.from(groups.values()).filter(count => count >= 5).length; + } - console.log(''); - } catch (err: any) { - clearInterval(elapsedTimer); - process.removeListener('SIGINT', sigintHandler); - console.log = origLog; - console.warn = origWarn; - console.error = origError; - bar.stop(); - console.error(`\n Analysis failed: ${err.message}\n`); - process.exitCode = 1; - return; + let generatedSkills: GeneratedSkillInfo[] = []; + if (options?.skills && pipelineResult.communityResult) { + updateBar(99, 'Generating skill files...'); + const skillResult = await generateSkillFiles(repoPath, projectName, pipelineResult); + generatedSkills = skillResult.skills; } + const aiContext = await generateAIContextFiles(repoPath, storagePath, projectName, { + files: pipelineResult.totalFileCount, + nodes: stats.nodes, + edges: stats.edges, + communities: pipelineResult.communityResult?.stats.totalCommunities, + clusters: aggregatedClusterCount, + processes: pipelineResult.processResult?.stats.totalProcesses, + }, generatedSkills, { + skipAgentsMd: options?.skipAgentsMd, + }); + + await closeLbug(); + // Note: we intentionally do NOT call disposeEmbedder() here. + // ONNX Runtime's native cleanup segfaults on macOS and some Linux configs. + // Since the process exits immediately after, Node.js reclaims everything. + + const totalTime = ((Date.now() - t0Global) / 1000).toFixed(1); + + clearInterval(elapsedTimer); + process.removeListener('SIGINT', sigintHandler); + + console.log = origLog; + console.warn = origWarn; + console.error = origError; + + bar.update(100, { phase: 'Done' }); + bar.stop(); + + // ── Summary ─────────────────────────────────────────────────────── + const embeddingsCached = cachedEmbeddings.length > 0; + console.log(`\n Repository indexed successfully (${totalTime}s)${embeddingsCached ? ` [${cachedEmbeddings.length} embeddings cached]` : ''}\n`); + console.log(` ${stats.nodes.toLocaleString()} nodes | ${stats.edges.toLocaleString()} edges | ${pipelineResult.communityResult?.stats.totalCommunities || 0} clusters | ${pipelineResult.processResult?.stats.totalProcesses || 0} flows`); + console.log(` LadybugDB ${lbugTime}s | FTS ${ftsTime}s | Embeddings ${embeddingSkipped ? embeddingSkipReason : embeddingTime + 's'}`); + console.log(` ${repoPath}`); + + if (aiContext.files.length > 0) { + console.log(` Context: ${aiContext.files.join(', ')}`); + } + + // Show a quiet summary if some edge types needed fallback insertion + if (lbugWarnings.length > 0) { + const totalFallback = lbugWarnings.reduce((sum, w) => { + const m = w.match(/\((\d+) edges\)/); + return sum + (m ? parseInt(m[1]) : 0); + }, 0); + console.log(` Note: ${totalFallback} edges across ${lbugWarnings.length} types inserted via fallback (schema will be updated in next release)`); + } + + try { + await fs.access(getGlobalRegistryPath()); + } catch { + console.log('\n Tip: Run `gitnexus setup` to configure MCP for your editor.'); + } + + console.log(''); + // LadybugDB's native module holds open handles that prevent Node from exiting. // ONNX Runtime also registers native atexit hooks that segfault on some // platforms (#38, #40). Force-exit to ensure clean termination. diff --git a/gitnexus/src/cli/index.ts b/gitnexus/src/cli/index.ts index 7ccdf16de1..0cb338ad11 100644 --- a/gitnexus/src/cli/index.ts +++ b/gitnexus/src/cli/index.ts @@ -24,6 +24,7 @@ program .option('-f, --force', 'Force full re-index even if up to date') .option('--embeddings', 'Enable embedding generation for semantic search (off by default)') .option('--skills', 'Generate repo-specific skill files from detected communities') + .option('--skip-agents-md', 'Skip updating the gitnexus section in AGENTS.md and CLAUDE.md') .option('--skip-git', 'Index a folder without requiring a .git directory') .option('-v, --verbose', 'Enable verbose ingestion warnings (default: false)') .addHelpText( diff --git a/gitnexus/test/unit/ai-context.test.ts b/gitnexus/test/unit/ai-context.test.ts index 489eac9a1e..0a9f52a687 100644 --- a/gitnexus/test/unit/ai-context.test.ts +++ b/gitnexus/test/unit/ai-context.test.ts @@ -79,4 +79,32 @@ describe('generateAIContextFiles', () => { // Skills dir may not be created if skills source doesn't exist in test context } }); + + it('preserves manual AGENTS.md and CLAUDE.md edits when skipAgentsMd is enabled', async () => { + const stats = { nodes: 42, edges: 84, processes: 3 }; + const agentsPath = path.join(tmpDir, 'AGENTS.md'); + const claudePath = path.join(tmpDir, 'CLAUDE.md'); + const agentsContent = '# AGENTS\n\nCustom manual instructions only\n'; + const claudeContent = '# CLAUDE\n\nCustom manual instructions only\n'; + + await fs.writeFile(agentsPath, agentsContent, 'utf-8'); + await fs.writeFile(claudePath, claudeContent, 'utf-8'); + + const result = await generateAIContextFiles( + tmpDir, + storagePath, + 'TestProject', + stats, + undefined, + { skipAgentsMd: true }, + ); + + expect(result.files).toContain('AGENTS.md (skipped via --skip-agents-md)'); + expect(result.files).toContain('CLAUDE.md (skipped via --skip-agents-md)'); + + const agentsAfter = await fs.readFile(agentsPath, 'utf-8'); + const claudeAfter = await fs.readFile(claudePath, 'utf-8'); + expect(agentsAfter).toBe(agentsContent); + expect(claudeAfter).toBe(claudeContent); + }); }); diff --git a/gitnexus/test/unit/skip-git-cli.test.ts b/gitnexus/test/unit/skip-git-cli.test.ts index 73707fe61b..740c8265a7 100644 --- a/gitnexus/test/unit/skip-git-cli.test.ts +++ b/gitnexus/test/unit/skip-git-cli.test.ts @@ -6,7 +6,7 @@ import fs from 'fs'; describe('--skip-git CLI flag', () => { it('Commander maps --skip-git to options.skipGit (not --no-git inversion)', () => { - // Verify the CLI defines --skip-git, not --no-git + // Verify the CLI defines --skip-git and --skip-agents-md in analyze help. const helpOutput = execSync('node dist/cli/index.js analyze --help', { cwd: path.resolve(__dirname, '../..'), encoding: 'utf8', @@ -14,6 +14,7 @@ describe('--skip-git CLI flag', () => { }); expect(helpOutput).toContain('--skip-git'); + expect(helpOutput).toContain('--skip-agents-md'); expect(helpOutput).not.toContain('--no-git'); }); From e055b5cc97c2208d057484c1a3c2c9772dcc3a45 Mon Sep 17 00:00:00 2001 From: Gabe Campbell Date: Sat, 28 Mar 2026 12:43:36 -0500 Subject: [PATCH 4/6] fix: apply prettier formatting --- gitnexus/src/cli/ai-context.ts | 47 +++++---- gitnexus/src/cli/analyze.ts | 175 +++++++++++++++++++++++---------- 2 files changed, 151 insertions(+), 71 deletions(-) diff --git a/gitnexus/src/cli/ai-context.ts b/gitnexus/src/cli/ai-context.ts index ec022bcb82..e9d8accdd6 100644 --- a/gitnexus/src/cli/ai-context.ts +++ b/gitnexus/src/cli/ai-context.ts @@ -1,6 +1,6 @@ /** * AI Context Generator - * + * * Creates AGENTS.md and CLAUDE.md with full inline GitNexus context. * AGENTS.md is the standard read by Cursor, Windsurf, OpenCode, Codex, Cline, etc. * CLAUDE.md is for Claude Code which only reads that file. @@ -20,7 +20,7 @@ interface RepoStats { nodes?: number; edges?: number; communities?: number; - clusters?: number; // Aggregated cluster count (what tools show) + clusters?: number; // Aggregated cluster count (what tools show) processes?: number; } @@ -42,12 +42,20 @@ const GITNEXUS_END_MARKER = ''; * - Exact tool commands with parameters — vague directives get ignored * - Self-review checklist — forces model to verify its own work */ -function generateGitNexusContent(projectName: string, stats: RepoStats, generatedSkills?: GeneratedSkillInfo[]): string { - const generatedRows = (generatedSkills && generatedSkills.length > 0) - ? generatedSkills.map(s => - `| Work in the ${s.label} area (${s.symbolCount} symbols) | \`.claude/skills/generated/${s.name}/SKILL.md\` |` - ).join('\n') - : ''; +function generateGitNexusContent( + projectName: string, + stats: RepoStats, + generatedSkills?: GeneratedSkillInfo[], +): string { + const generatedRows = + generatedSkills && generatedSkills.length > 0 + ? generatedSkills + .map( + (s) => + `| Work in the ${s.label} area (${s.symbolCount} symbols) | \`.claude/skills/generated/${s.name}/SKILL.md\` |`, + ) + .join('\n') + : ''; const skillsTable = `| Task | Read this skill file | |------|---------------------| @@ -154,7 +162,6 @@ ${skillsTable} ${GITNEXUS_END_MARKER}`; } - /** * Check if a file exists */ @@ -175,7 +182,7 @@ async function fileExists(filePath: string): Promise { */ async function upsertGitNexusSection( filePath: string, - content: string + content: string, ): Promise<'created' | 'updated' | 'appended'> { const exists = await fileExists(filePath); @@ -217,27 +224,33 @@ async function installSkills(repoPath: string): Promise { const skills = [ { name: 'gitnexus-exploring', - description: 'Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: "How does X work?", "What calls this function?", "Show me the auth flow"', + description: + 'Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: "How does X work?", "What calls this function?", "Show me the auth flow"', }, { name: 'gitnexus-debugging', - description: 'Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: "Why is X failing?", "Where does this error come from?", "Trace this bug"', + description: + 'Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: "Why is X failing?", "Where does this error come from?", "Trace this bug"', }, { name: 'gitnexus-impact-analysis', - description: 'Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: "Is it safe to change X?", "What depends on this?", "What will break?"', + description: + 'Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: "Is it safe to change X?", "What depends on this?", "What will break?"', }, { name: 'gitnexus-refactoring', - description: 'Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: "Rename this function", "Extract this into a module", "Refactor this class", "Move this to a separate file"', + description: + 'Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: "Rename this function", "Extract this into a module", "Refactor this class", "Move this to a separate file"', }, { name: 'gitnexus-guide', - description: 'Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: "What GitNexus tools are available?", "How do I use GitNexus?"', + description: + 'Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: "What GitNexus tools are available?", "How do I use GitNexus?"', }, { name: 'gitnexus-cli', - description: 'Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: "Index this repo", "Reanalyze the codebase", "Generate a wiki"', + description: + 'Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: "Index this repo", "Reanalyze the codebase", "Generate a wiki"', }, ]; @@ -290,7 +303,7 @@ export async function generateAIContextFiles( projectName: string, stats: RepoStats, generatedSkills?: GeneratedSkillInfo[], - options?: AIContextOptions + options?: AIContextOptions, ): Promise<{ files: string[] }> { const content = generateGitNexusContent(projectName, stats, generatedSkills); const createdFiles: string[] = []; diff --git a/gitnexus/src/cli/analyze.ts b/gitnexus/src/cli/analyze.ts index 75c0d029f6..5fdad0df77 100644 --- a/gitnexus/src/cli/analyze.ts +++ b/gitnexus/src/cli/analyze.ts @@ -9,18 +9,34 @@ import { execFileSync } from 'child_process'; import v8 from 'v8'; import cliProgress from 'cli-progress'; import { runPipelineFromRepo } from '../core/ingestion/pipeline.js'; -import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, createFTSIndex, loadCachedEmbeddings } from '../core/lbug/lbug-adapter.js'; +import { + initLbug, + loadGraphToLbug, + getLbugStats, + executeQuery, + executeWithReusedStatement, + closeLbug, + createFTSIndex, + loadCachedEmbeddings, +} from '../core/lbug/lbug-adapter.js'; // Embedding imports are lazy (dynamic import) so onnxruntime-node is never // loaded when embeddings are not requested. This avoids crashes on Node // versions whose ABI is not yet supported by the native binary (#89). // disposeEmbedder intentionally not called — ONNX Runtime segfaults on cleanup (see #38) -import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, getGlobalRegistryPath, cleanupOldKuzuFiles } from '../storage/repo-manager.js'; +import { + getStoragePaths, + saveMeta, + loadMeta, + addToGitignore, + registerRepo, + getGlobalRegistryPath, + cleanupOldKuzuFiles, +} from '../storage/repo-manager.js'; import { getCurrentCommit, getGitRoot, hasGitDir } from '../storage/git.js'; import { generateAIContextFiles } from './ai-context.js'; import { generateSkillFiles, type GeneratedSkillInfo } from './skill-gen.js'; import fs from 'fs/promises'; - const HEAP_MB = 8192; const HEAP_FLAG = `--max-old-space-size=${HEAP_MB}`; @@ -73,10 +89,7 @@ const PHASE_LABELS: Record = { done: 'Done', }; -export const analyzeCommand = async ( - inputPath?: string, - options?: AnalyzeOptions -) => { +export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOptions) => { if (ensureHeap()) return; if (options?.verbose) { @@ -92,7 +105,9 @@ export const analyzeCommand = async ( const gitRoot = getGitRoot(process.cwd()); if (!gitRoot) { if (!options?.skipGit) { - console.log(' Not inside a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n'); + console.log( + ' Not inside a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n', + ); process.exitCode = 1; return; } @@ -105,12 +120,16 @@ export const analyzeCommand = async ( const repoHasGit = hasGitDir(repoPath); if (!repoHasGit && !options?.skipGit) { - console.log(' Not a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n'); + console.log( + ' Not a git repository.\n Tip: pass --skip-git to index any folder without a .git directory.\n', + ); process.exitCode = 1; return; } if (!repoHasGit) { - console.log(' Warning: no .git directory found \u2014 commit-tracking and incremental updates disabled.\n'); + console.log( + ' Warning: no .git directory found \u2014 commit-tracking and incremental updates disabled.\n', + ); } const { storagePath, lbugPath } = getStoragePaths(repoPath); @@ -125,7 +144,12 @@ export const analyzeCommand = async ( const currentCommit = repoHasGit ? getCurrentCommit(repoPath) : ''; const existingMeta = await loadMeta(storagePath); - if (existingMeta && !options?.force && !options?.skills && existingMeta.lastCommit === currentCommit) { + if ( + existingMeta && + !options?.force && + !options?.skills && + existingMeta.lastCommit === currentCommit + ) { // Non-git folders have currentCommit = '' — always rebuild since we can't detect changes if (currentCommit !== '') { console.log(' Already up to date\n'); @@ -134,20 +158,25 @@ export const analyzeCommand = async ( } if (process.env.GITNEXUS_NO_GITIGNORE) { - console.log(' GITNEXUS_NO_GITIGNORE is set — skipping .gitignore (still reading .gitnexusignore)\n'); + console.log( + ' GITNEXUS_NO_GITIGNORE is set — skipping .gitignore (still reading .gitnexusignore)\n', + ); } // Single progress bar for entire pipeline - const bar = new cliProgress.SingleBar({ - format: ' {bar} {percentage}% | {phase}', - barCompleteChar: '\u2588', - barIncompleteChar: '\u2591', - hideCursor: true, - barGlue: '', - autopadding: true, - clearOnComplete: false, - stopOnComplete: false, - }, cliProgress.Presets.shades_grey); + const bar = new cliProgress.SingleBar( + { + format: ' {bar} {percentage}% | {phase}', + barCompleteChar: '\u2588', + barIncompleteChar: '\u2591', + hideCursor: true, + barGlue: '', + autopadding: true, + clearOnComplete: false, + stopOnComplete: false, + }, + cliProgress.Presets.shades_grey, + ); bar.start(100, 0, { phase: 'Initializing...' }); @@ -158,7 +187,9 @@ export const analyzeCommand = async ( aborted = true; bar.stop(); console.log('\n Interrupted — cleaning up...'); - closeLbug().catch(() => {}).finally(() => process.exit(130)); + closeLbug() + .catch(() => {}) + .finally(() => process.exit(130)); }; process.on('SIGINT', sigintHandler); @@ -170,7 +201,7 @@ export const analyzeCommand = async ( const barLog = (...args: any[]) => { // Clear the bar line, print the message, then let the next bar.update redraw process.stdout.write('\x1b[2K\r'); - origLog(args.map(a => (typeof a === 'string' ? a : String(a))).join(' ')); + origLog(args.map((a) => (typeof a === 'string' ? a : String(a))).join(' ')); }; console.log = barLog; console.warn = barLog; @@ -183,7 +214,10 @@ export const analyzeCommand = async ( /** Update bar with phase label + elapsed seconds (shown after 3s). */ const updateBar = (value: number, phaseLabel: string) => { - if (phaseLabel !== lastPhaseLabel) { lastPhaseLabel = phaseLabel; phaseStart = Date.now(); } + if (phaseLabel !== lastPhaseLabel) { + lastPhaseLabel = phaseLabel; + phaseStart = Date.now(); + } const elapsed = Math.round((Date.now() - phaseStart) / 1000); const display = elapsed >= 3 ? `${phaseLabel} (${elapsed}s)` : phaseLabel; bar.update(value, { phase: display }); @@ -214,7 +248,9 @@ export const analyzeCommand = async ( cachedEmbeddings = cached.embeddings; await closeLbug(); } catch { - try { await closeLbug(); } catch {} + try { + await closeLbug(); + } catch {} } } @@ -231,17 +267,24 @@ export const analyzeCommand = async ( await closeLbug(); const lbugFiles = [lbugPath, `${lbugPath}.wal`, `${lbugPath}.lock`]; for (const f of lbugFiles) { - try { await fs.rm(f, { recursive: true, force: true }); } catch {} + try { + await fs.rm(f, { recursive: true, force: true }); + } catch {} } const t0Lbug = Date.now(); await initLbug(lbugPath); let lbugMsgCount = 0; - const lbugResult = await loadGraphToLbug(pipelineResult.graph, pipelineResult.repoPath, storagePath, (msg) => { - lbugMsgCount++; - const progress = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24)); - updateBar(progress, msg); - }); + const lbugResult = await loadGraphToLbug( + pipelineResult.graph, + pipelineResult.repoPath, + storagePath, + (msg) => { + lbugMsgCount++; + const progress = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24)); + updateBar(progress, msg); + }, + ); const lbugTime = ((Date.now() - t0Lbug) / 1000).toFixed(1); const lbugWarnings = lbugResult.warnings; @@ -267,7 +310,9 @@ export const analyzeCommand = async ( const { EMBEDDING_DIMS } = await import('../core/lbug/schema.js'); if (cachedDims !== EMBEDDING_DIMS) { // Dimensions changed (e.g. switched embedding model) — discard cache and re-embed all - console.error(`⚠️ Embedding dimensions changed (${cachedDims}d → ${EMBEDDING_DIMS}d), discarding cache`); + console.error( + `⚠️ Embedding dimensions changed (${cachedDims}d → ${EMBEDDING_DIMS}d), discarding cache`, + ); cachedEmbeddings = []; cachedEmbeddingNodeIds = new Set(); } else { @@ -275,13 +320,15 @@ export const analyzeCommand = async ( const EMBED_BATCH = 200; for (let i = 0; i < cachedEmbeddings.length; i += EMBED_BATCH) { const batch = cachedEmbeddings.slice(i, i + EMBED_BATCH); - const paramsList = batch.map(e => ({ nodeId: e.nodeId, embedding: e.embedding })); + const paramsList = batch.map((e) => ({ nodeId: e.nodeId, embedding: e.embedding })); try { await executeWithReusedStatement( `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`, paramsList, ); - } catch { /* some may fail if node was removed, that's fine */ } + } catch { + /* some may fail if node was removed, that's fine */ + } } } } @@ -311,9 +358,12 @@ export const analyzeCommand = async ( executeWithReusedStatement, (progress) => { const scaled = 90 + Math.round((progress.percent / 100) * 8); - const label = progress.phase === 'loading-model' - ? (httpMode ? 'Connecting to embedding endpoint...' : 'Loading embedding model...') - : `Embedding ${progress.nodesProcessed || 0}/${progress.totalNodes || '?'}`; + const label = + progress.phase === 'loading-model' + ? httpMode + ? 'Connecting to embedding endpoint...' + : 'Loading embedding model...' + : `Embedding ${progress.nodesProcessed || 0}/${progress.totalNodes || '?'}`; updateBar(scaled, label); }, {}, @@ -330,7 +380,9 @@ export const analyzeCommand = async ( try { const embResult = await executeQuery(`MATCH (e:CodeEmbedding) RETURN count(e) AS cnt`); embeddingCount = embResult?.[0]?.cnt ?? 0; - } catch { /* table may not exist if embeddings never ran */ } + } catch { + /* table may not exist if embeddings never ran */ + } const meta = { repoPath, @@ -362,7 +414,7 @@ export const analyzeCommand = async ( const label = c.heuristicLabel || c.label || 'Unknown'; groups.set(label, (groups.get(label) || 0) + c.symbolCount); } - aggregatedClusterCount = Array.from(groups.values()).filter(count => count >= 5).length; + aggregatedClusterCount = Array.from(groups.values()).filter((count) => count >= 5).length; } let generatedSkills: GeneratedSkillInfo[] = []; @@ -372,16 +424,23 @@ export const analyzeCommand = async ( generatedSkills = skillResult.skills; } - const aiContext = await generateAIContextFiles(repoPath, storagePath, projectName, { - files: pipelineResult.totalFileCount, - nodes: stats.nodes, - edges: stats.edges, - communities: pipelineResult.communityResult?.stats.totalCommunities, - clusters: aggregatedClusterCount, - processes: pipelineResult.processResult?.stats.totalProcesses, - }, generatedSkills, { - skipAgentsMd: options?.skipAgentsMd, - }); + const aiContext = await generateAIContextFiles( + repoPath, + storagePath, + projectName, + { + files: pipelineResult.totalFileCount, + nodes: stats.nodes, + edges: stats.edges, + communities: pipelineResult.communityResult?.stats.totalCommunities, + clusters: aggregatedClusterCount, + processes: pipelineResult.processResult?.stats.totalProcesses, + }, + generatedSkills, + { + skipAgentsMd: options?.skipAgentsMd, + }, + ); await closeLbug(); // Note: we intentionally do NOT call disposeEmbedder() here. @@ -402,9 +461,15 @@ export const analyzeCommand = async ( // ── Summary ─────────────────────────────────────────────────────── const embeddingsCached = cachedEmbeddings.length > 0; - console.log(`\n Repository indexed successfully (${totalTime}s)${embeddingsCached ? ` [${cachedEmbeddings.length} embeddings cached]` : ''}\n`); - console.log(` ${stats.nodes.toLocaleString()} nodes | ${stats.edges.toLocaleString()} edges | ${pipelineResult.communityResult?.stats.totalCommunities || 0} clusters | ${pipelineResult.processResult?.stats.totalProcesses || 0} flows`); - console.log(` LadybugDB ${lbugTime}s | FTS ${ftsTime}s | Embeddings ${embeddingSkipped ? embeddingSkipReason : embeddingTime + 's'}`); + console.log( + `\n Repository indexed successfully (${totalTime}s)${embeddingsCached ? ` [${cachedEmbeddings.length} embeddings cached]` : ''}\n`, + ); + console.log( + ` ${stats.nodes.toLocaleString()} nodes | ${stats.edges.toLocaleString()} edges | ${pipelineResult.communityResult?.stats.totalCommunities || 0} clusters | ${pipelineResult.processResult?.stats.totalProcesses || 0} flows`, + ); + console.log( + ` LadybugDB ${lbugTime}s | FTS ${ftsTime}s | Embeddings ${embeddingSkipped ? embeddingSkipReason : embeddingTime + 's'}`, + ); console.log(` ${repoPath}`); if (aiContext.files.length > 0) { @@ -417,7 +482,9 @@ export const analyzeCommand = async ( const m = w.match(/\((\d+) edges\)/); return sum + (m ? parseInt(m[1]) : 0); }, 0); - console.log(` Note: ${totalFallback} edges across ${lbugWarnings.length} types inserted via fallback (schema will be updated in next release)`); + console.log( + ` Note: ${totalFallback} edges across ${lbugWarnings.length} types inserted via fallback (schema will be updated in next release)`, + ); } try { From d1d5dacc2d9878db97168c6d005ba38fd570571c Mon Sep 17 00:00:00 2001 From: Gabe Campbell Date: Sat, 28 Mar 2026 13:53:56 -0500 Subject: [PATCH 5/6] feat: add skipAgentsMd option to skip AGENTS.md and CLAUDE.md updates --- gitnexus/src/cli/ai-context.ts | 5 +- gitnexus/src/cli/analyze.ts | 444 ++++++++++++++++--------------- gitnexus/src/core/run-analyze.ts | 25 +- 3 files changed, 252 insertions(+), 222 deletions(-) diff --git a/gitnexus/src/cli/ai-context.ts b/gitnexus/src/cli/ai-context.ts index e9d8accdd6..81aaa496d8 100644 --- a/gitnexus/src/cli/ai-context.ts +++ b/gitnexus/src/cli/ai-context.ts @@ -24,7 +24,7 @@ interface RepoStats { processes?: number; } -interface AIContextOptions { +export interface AIContextOptions { skipAgentsMd?: boolean; } @@ -318,6 +318,9 @@ export async function generateAIContextFiles( const claudePath = path.join(repoPath, 'CLAUDE.md'); const claudeResult = await upsertGitNexusSection(claudePath, content); createdFiles.push(`CLAUDE.md (${claudeResult})`); + } else { + createdFiles.push('AGENTS.md (skipped via --skip-agents-md)'); + createdFiles.push('CLAUDE.md (skipped via --skip-agents-md)'); } // Install skills to .claude/skills/gitnexus/ diff --git a/gitnexus/src/cli/analyze.ts b/gitnexus/src/cli/analyze.ts index 5fdad0df77..4652fd36f0 100644 --- a/gitnexus/src/cli/analyze.ts +++ b/gitnexus/src/cli/analyze.ts @@ -254,247 +254,265 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption } } - // ── Phase 1: Full Pipeline (0–60%) ───────────────────────────────── - const pipelineResult = await runPipelineFromRepo(repoPath, (progress) => { - const phaseLabel = PHASE_LABELS[progress.phase] || progress.phase; - const scaled = Math.round(progress.percent * 0.6); - updateBar(scaled, phaseLabel); - }); - - // ── Phase 2: LadybugDB (60–85%) ────────────────────────────────────── - updateBar(60, 'Loading into LadybugDB...'); - - await closeLbug(); - const lbugFiles = [lbugPath, `${lbugPath}.wal`, `${lbugPath}.lock`]; - for (const f of lbugFiles) { - try { - await fs.rm(f, { recursive: true, force: true }); - } catch {} - } + try { + // ── Phase 1: Full Pipeline (0–60%) ───────────────────────────────── + const pipelineResult = await runPipelineFromRepo(repoPath, (progress) => { + const phaseLabel = PHASE_LABELS[progress.phase] || progress.phase; + const scaled = Math.round(progress.percent * 0.6); + updateBar(scaled, phaseLabel); + }); - const t0Lbug = Date.now(); - await initLbug(lbugPath); - let lbugMsgCount = 0; - const lbugResult = await loadGraphToLbug( - pipelineResult.graph, - pipelineResult.repoPath, - storagePath, - (msg) => { - lbugMsgCount++; - const progress = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24)); - updateBar(progress, msg); - }, - ); - const lbugTime = ((Date.now() - t0Lbug) / 1000).toFixed(1); - const lbugWarnings = lbugResult.warnings; + // ── Phase 2: LadybugDB (60–85%) ────────────────────────────────────── + updateBar(60, 'Loading into LadybugDB...'); - // ── Phase 3: FTS (85–90%) ───────────────────────────────────────── - updateBar(85, 'Creating search indexes...'); + await closeLbug(); + const lbugFiles = [lbugPath, `${lbugPath}.wal`, `${lbugPath}.lock`]; + for (const f of lbugFiles) { + try { + await fs.rm(f, { recursive: true, force: true }); + } catch {} + } - const t0Fts = Date.now(); - try { - await createFTSIndex('File', 'file_fts', ['name', 'content']); - await createFTSIndex('Function', 'function_fts', ['name', 'content']); - await createFTSIndex('Class', 'class_fts', ['name', 'content']); - await createFTSIndex('Method', 'method_fts', ['name', 'content']); - await createFTSIndex('Interface', 'interface_fts', ['name', 'content']); - } catch (e: any) { - // Non-fatal — FTS is best-effort - } - const ftsTime = ((Date.now() - t0Fts) / 1000).toFixed(1); - - // ── Phase 3.5: Re-insert cached embeddings ──────────────────────── - if (cachedEmbeddings.length > 0) { - // Check if cached embedding dimensions match current schema - const cachedDims = cachedEmbeddings[0].embedding.length; - const { EMBEDDING_DIMS } = await import('../core/lbug/schema.js'); - if (cachedDims !== EMBEDDING_DIMS) { - // Dimensions changed (e.g. switched embedding model) — discard cache and re-embed all - console.error( - `⚠️ Embedding dimensions changed (${cachedDims}d → ${EMBEDDING_DIMS}d), discarding cache`, - ); - cachedEmbeddings = []; - cachedEmbeddingNodeIds = new Set(); - } else { - updateBar(88, `Restoring ${cachedEmbeddings.length} cached embeddings...`); - const EMBED_BATCH = 200; - for (let i = 0; i < cachedEmbeddings.length; i += EMBED_BATCH) { - const batch = cachedEmbeddings.slice(i, i + EMBED_BATCH); - const paramsList = batch.map((e) => ({ nodeId: e.nodeId, embedding: e.embedding })); - try { - await executeWithReusedStatement( - `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`, - paramsList, - ); - } catch { - /* some may fail if node was removed, that's fine */ + const t0Lbug = Date.now(); + await initLbug(lbugPath); + let lbugMsgCount = 0; + const lbugResult = await loadGraphToLbug( + pipelineResult.graph, + pipelineResult.repoPath, + storagePath, + (msg) => { + lbugMsgCount++; + const progress = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24)); + updateBar(progress, msg); + }, + ); + const lbugTime = ((Date.now() - t0Lbug) / 1000).toFixed(1); + const lbugWarnings = lbugResult.warnings; + + // ── Phase 3: FTS (85–90%) ───────────────────────────────────────── + updateBar(85, 'Creating search indexes...'); + + const t0Fts = Date.now(); + try { + await createFTSIndex('File', 'file_fts', ['name', 'content']); + await createFTSIndex('Function', 'function_fts', ['name', 'content']); + await createFTSIndex('Class', 'class_fts', ['name', 'content']); + await createFTSIndex('Method', 'method_fts', ['name', 'content']); + await createFTSIndex('Interface', 'interface_fts', ['name', 'content']); + } catch (e: any) { + // Non-fatal — FTS is best-effort + } + const ftsTime = ((Date.now() - t0Fts) / 1000).toFixed(1); + + // ── Phase 3.5: Re-insert cached embeddings ──────────────────────── + if (cachedEmbeddings.length > 0) { + // Check if cached embedding dimensions match current schema + const cachedDims = cachedEmbeddings[0].embedding.length; + const { EMBEDDING_DIMS } = await import('../core/lbug/schema.js'); + if (cachedDims !== EMBEDDING_DIMS) { + // Dimensions changed (e.g. switched embedding model) — discard cache and re-embed all + console.error( + `⚠️ Embedding dimensions changed (${cachedDims}d → ${EMBEDDING_DIMS}d), discarding cache`, + ); + cachedEmbeddings = []; + cachedEmbeddingNodeIds = new Set(); + } else { + updateBar(88, `Restoring ${cachedEmbeddings.length} cached embeddings...`); + const EMBED_BATCH = 200; + for (let i = 0; i < cachedEmbeddings.length; i += EMBED_BATCH) { + const batch = cachedEmbeddings.slice(i, i + EMBED_BATCH); + const paramsList = batch.map((e) => ({ nodeId: e.nodeId, embedding: e.embedding })); + try { + await executeWithReusedStatement( + `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`, + paramsList, + ); + } catch { + /* some may fail if node was removed, that's fine */ + } } } } - } - // ── Phase 4: Embeddings (90–98%) ────────────────────────────────── - const stats = await getLbugStats(); - let embeddingTime = '0.0'; - let embeddingSkipped = true; - let embeddingSkipReason = 'off (use --embeddings to enable)'; + // ── Phase 4: Embeddings (90–98%) ────────────────────────────────── + const stats = await getLbugStats(); + let embeddingTime = '0.0'; + let embeddingSkipped = true; + let embeddingSkipReason = 'off (use --embeddings to enable)'; + + if (options?.embeddings) { + if (stats.nodes > EMBEDDING_NODE_LIMIT) { + embeddingSkipReason = `skipped (${stats.nodes.toLocaleString()} nodes > ${EMBEDDING_NODE_LIMIT.toLocaleString()} limit)`; + } else { + embeddingSkipped = false; + } + } - if (options?.embeddings) { - if (stats.nodes > EMBEDDING_NODE_LIMIT) { - embeddingSkipReason = `skipped (${stats.nodes.toLocaleString()} nodes > ${EMBEDDING_NODE_LIMIT.toLocaleString()} limit)`; - } else { - embeddingSkipped = false; + if (!embeddingSkipped) { + const { isHttpMode } = await import('../core/embeddings/http-client.js'); + const httpMode = isHttpMode(); + updateBar( + 90, + httpMode ? 'Connecting to embedding endpoint...' : 'Loading embedding model...', + ); + const t0Emb = Date.now(); + const { runEmbeddingPipeline } = await import('../core/embeddings/embedding-pipeline.js'); + await runEmbeddingPipeline( + executeQuery, + executeWithReusedStatement, + (progress) => { + const scaled = 90 + Math.round((progress.percent / 100) * 8); + const label = + progress.phase === 'loading-model' + ? httpMode + ? 'Connecting to embedding endpoint...' + : 'Loading embedding model...' + : `Embedding ${progress.nodesProcessed || 0}/${progress.totalNodes || '?'}`; + updateBar(scaled, label); + }, + {}, + cachedEmbeddingNodeIds.size > 0 ? cachedEmbeddingNodeIds : undefined, + ); + embeddingTime = ((Date.now() - t0Emb) / 1000).toFixed(1); } - } - if (!embeddingSkipped) { - const { isHttpMode } = await import('../core/embeddings/http-client.js'); - const httpMode = isHttpMode(); - updateBar(90, httpMode ? 'Connecting to embedding endpoint...' : 'Loading embedding model...'); - const t0Emb = Date.now(); - const { runEmbeddingPipeline } = await import('../core/embeddings/embedding-pipeline.js'); - await runEmbeddingPipeline( - executeQuery, - executeWithReusedStatement, - (progress) => { - const scaled = 90 + Math.round((progress.percent / 100) * 8); - const label = - progress.phase === 'loading-model' - ? httpMode - ? 'Connecting to embedding endpoint...' - : 'Loading embedding model...' - : `Embedding ${progress.nodesProcessed || 0}/${progress.totalNodes || '?'}`; - updateBar(scaled, label); - }, - {}, - cachedEmbeddingNodeIds.size > 0 ? cachedEmbeddingNodeIds : undefined, - ); - embeddingTime = ((Date.now() - t0Emb) / 1000).toFixed(1); - } + // ── Phase 5: Finalize (98–100%) ─────────────────────────────────── + updateBar(98, 'Saving metadata...'); - // ── Phase 5: Finalize (98–100%) ─────────────────────────────────── - updateBar(98, 'Saving metadata...'); + // Count embeddings in the index (cached + newly generated) + let embeddingCount = 0; + try { + const embResult = await executeQuery(`MATCH (e:CodeEmbedding) RETURN count(e) AS cnt`); + embeddingCount = embResult?.[0]?.cnt ?? 0; + } catch { + /* table may not exist if embeddings never ran */ + } - // Count embeddings in the index (cached + newly generated) - let embeddingCount = 0; - try { - const embResult = await executeQuery(`MATCH (e:CodeEmbedding) RETURN count(e) AS cnt`); - embeddingCount = embResult?.[0]?.cnt ?? 0; - } catch { - /* table may not exist if embeddings never ran */ - } + const meta = { + repoPath, + lastCommit: currentCommit, + indexedAt: new Date().toISOString(), + stats: { + files: pipelineResult.totalFileCount, + nodes: stats.nodes, + edges: stats.edges, + communities: pipelineResult.communityResult?.stats.totalCommunities, + processes: pipelineResult.processResult?.stats.totalProcesses, + embeddings: embeddingCount, + }, + }; + await saveMeta(storagePath, meta); + await registerRepo(repoPath, meta); + // Only attempt to update .gitignore when a .git directory is present. + // Use hasGitDir (filesystem check) rather than git CLI subprocess + // so we skip correctly for --skip-git folders even if git CLI is available. + if (hasGitDir(repoPath)) { + await addToGitignore(repoPath); + } - const meta = { - repoPath, - lastCommit: currentCommit, - indexedAt: new Date().toISOString(), - stats: { - files: pipelineResult.totalFileCount, - nodes: stats.nodes, - edges: stats.edges, - communities: pipelineResult.communityResult?.stats.totalCommunities, - processes: pipelineResult.processResult?.stats.totalProcesses, - embeddings: embeddingCount, - }, - }; - await saveMeta(storagePath, meta); - await registerRepo(repoPath, meta); - // Only attempt to update .gitignore when a .git directory is present. - // Use hasGitDir (filesystem check) rather than git CLI subprocess - // so we skip correctly for --skip-git folders even if git CLI is available. - if (hasGitDir(repoPath)) { - await addToGitignore(repoPath); - } + const projectName = path.basename(repoPath); + let aggregatedClusterCount = 0; + if (pipelineResult.communityResult?.communities) { + const groups = new Map(); + for (const c of pipelineResult.communityResult.communities) { + const label = c.heuristicLabel || c.label || 'Unknown'; + groups.set(label, (groups.get(label) || 0) + c.symbolCount); + } + aggregatedClusterCount = Array.from(groups.values()).filter((count) => count >= 5).length; + } - const projectName = path.basename(repoPath); - let aggregatedClusterCount = 0; - if (pipelineResult.communityResult?.communities) { - const groups = new Map(); - for (const c of pipelineResult.communityResult.communities) { - const label = c.heuristicLabel || c.label || 'Unknown'; - groups.set(label, (groups.get(label) || 0) + c.symbolCount); + let generatedSkills: GeneratedSkillInfo[] = []; + if (options?.skills && pipelineResult.communityResult) { + updateBar(99, 'Generating skill files...'); + const skillResult = await generateSkillFiles(repoPath, projectName, pipelineResult); + generatedSkills = skillResult.skills; } - aggregatedClusterCount = Array.from(groups.values()).filter((count) => count >= 5).length; - } - let generatedSkills: GeneratedSkillInfo[] = []; - if (options?.skills && pipelineResult.communityResult) { - updateBar(99, 'Generating skill files...'); - const skillResult = await generateSkillFiles(repoPath, projectName, pipelineResult); - generatedSkills = skillResult.skills; - } + const aiContext = await generateAIContextFiles( + repoPath, + storagePath, + projectName, + { + files: pipelineResult.totalFileCount, + nodes: stats.nodes, + edges: stats.edges, + communities: pipelineResult.communityResult?.stats.totalCommunities, + clusters: aggregatedClusterCount, + processes: pipelineResult.processResult?.stats.totalProcesses, + }, + generatedSkills, + { + skipAgentsMd: options?.skipAgentsMd, + }, + ); - const aiContext = await generateAIContextFiles( - repoPath, - storagePath, - projectName, - { - files: pipelineResult.totalFileCount, - nodes: stats.nodes, - edges: stats.edges, - communities: pipelineResult.communityResult?.stats.totalCommunities, - clusters: aggregatedClusterCount, - processes: pipelineResult.processResult?.stats.totalProcesses, - }, - generatedSkills, - { - skipAgentsMd: options?.skipAgentsMd, - }, - ); + await closeLbug(); + // Note: we intentionally do NOT call disposeEmbedder() here. + // ONNX Runtime's native cleanup segfaults on macOS and some Linux configs. + // Since the process exits immediately after, Node.js reclaims everything. - await closeLbug(); - // Note: we intentionally do NOT call disposeEmbedder() here. - // ONNX Runtime's native cleanup segfaults on macOS and some Linux configs. - // Since the process exits immediately after, Node.js reclaims everything. + const totalTime = ((Date.now() - t0Global) / 1000).toFixed(1); - const totalTime = ((Date.now() - t0Global) / 1000).toFixed(1); + clearInterval(elapsedTimer); + process.removeListener('SIGINT', sigintHandler); - clearInterval(elapsedTimer); - process.removeListener('SIGINT', sigintHandler); + console.log = origLog; + console.warn = origWarn; + console.error = origError; - console.log = origLog; - console.warn = origWarn; - console.error = origError; + bar.update(100, { phase: 'Done' }); + bar.stop(); - bar.update(100, { phase: 'Done' }); - bar.stop(); + // ── Summary ─────────────────────────────────────────────────────── + const embeddingsCached = cachedEmbeddings.length > 0; + console.log( + `\n Repository indexed successfully (${totalTime}s)${embeddingsCached ? ` [${cachedEmbeddings.length} embeddings cached]` : ''}\n`, + ); + console.log( + ` ${stats.nodes.toLocaleString()} nodes | ${stats.edges.toLocaleString()} edges | ${pipelineResult.communityResult?.stats.totalCommunities || 0} clusters | ${pipelineResult.processResult?.stats.totalProcesses || 0} flows`, + ); + console.log( + ` LadybugDB ${lbugTime}s | FTS ${ftsTime}s | Embeddings ${embeddingSkipped ? embeddingSkipReason : embeddingTime + 's'}`, + ); + console.log(` ${repoPath}`); - // ── Summary ─────────────────────────────────────────────────────── - const embeddingsCached = cachedEmbeddings.length > 0; - console.log( - `\n Repository indexed successfully (${totalTime}s)${embeddingsCached ? ` [${cachedEmbeddings.length} embeddings cached]` : ''}\n`, - ); - console.log( - ` ${stats.nodes.toLocaleString()} nodes | ${stats.edges.toLocaleString()} edges | ${pipelineResult.communityResult?.stats.totalCommunities || 0} clusters | ${pipelineResult.processResult?.stats.totalProcesses || 0} flows`, - ); - console.log( - ` LadybugDB ${lbugTime}s | FTS ${ftsTime}s | Embeddings ${embeddingSkipped ? embeddingSkipReason : embeddingTime + 's'}`, - ); - console.log(` ${repoPath}`); + if (aiContext.files.length > 0) { + console.log(` Context: ${aiContext.files.join(', ')}`); + } - if (aiContext.files.length > 0) { - console.log(` Context: ${aiContext.files.join(', ')}`); - } + // Show a quiet summary if some edge types needed fallback insertion + if (lbugWarnings.length > 0) { + const totalFallback = lbugWarnings.reduce((sum, w) => { + const m = w.match(/\((\d+) edges\)/); + return sum + (m ? parseInt(m[1]) : 0); + }, 0); + console.log( + ` Note: ${totalFallback} edges across ${lbugWarnings.length} types inserted via fallback (schema will be updated in next release)`, + ); + } - // Show a quiet summary if some edge types needed fallback insertion - if (lbugWarnings.length > 0) { - const totalFallback = lbugWarnings.reduce((sum, w) => { - const m = w.match(/\((\d+) edges\)/); - return sum + (m ? parseInt(m[1]) : 0); - }, 0); - console.log( - ` Note: ${totalFallback} edges across ${lbugWarnings.length} types inserted via fallback (schema will be updated in next release)`, - ); - } + try { + await fs.access(getGlobalRegistryPath()); + } catch { + console.log('\n Tip: Run `gitnexus setup` to configure MCP for your editor.'); + } - try { - await fs.access(getGlobalRegistryPath()); - } catch { - console.log('\n Tip: Run `gitnexus setup` to configure MCP for your editor.'); + console.log(''); + } catch (err: any) { + clearInterval(elapsedTimer); + process.removeListener('SIGINT', sigintHandler); + console.log = origLog; + console.warn = origWarn; + console.error = origError; + bar.stop(); + try { + await closeLbug(); + } catch {} + console.error(`\n Analysis failed: ${err.message}\n`); + process.exitCode = 1; + return; } - console.log(''); - // LadybugDB's native module holds open handles that prevent Node from exiting. // ONNX Runtime also registers native atexit hooks that segfault on some // platforms (#38, #40). Force-exit to ensure clean termination. diff --git a/gitnexus/src/core/run-analyze.ts b/gitnexus/src/core/run-analyze.ts index 3a6425ba79..e8a108c716 100644 --- a/gitnexus/src/core/run-analyze.ts +++ b/gitnexus/src/core/run-analyze.ts @@ -46,6 +46,8 @@ export interface AnalyzeOptions { force?: boolean; embeddings?: boolean; skipGit?: boolean; + /** Skip AGENTS.md and CLAUDE.md gitnexus block updates. */ + skipAgentsMd?: boolean; } export interface AnalyzeResult { @@ -312,14 +314,21 @@ export async function runFullAnalysis( } try { - await generateAIContextFiles(repoPath, storagePath, projectName, { - files: pipelineResult.totalFileCount, - nodes: stats.nodes, - edges: stats.edges, - communities: pipelineResult.communityResult?.stats.totalCommunities, - clusters: aggregatedClusterCount, - processes: pipelineResult.processResult?.stats.totalProcesses, - }); + await generateAIContextFiles( + repoPath, + storagePath, + projectName, + { + files: pipelineResult.totalFileCount, + nodes: stats.nodes, + edges: stats.edges, + communities: pipelineResult.communityResult?.stats.totalCommunities, + clusters: aggregatedClusterCount, + processes: pipelineResult.processResult?.stats.totalProcesses, + }, + undefined, + { skipAgentsMd: options.skipAgentsMd }, + ); } catch { // Best-effort — don't fail the entire analysis for context file issues } From 1e921659793562fcf5118ecea40a3fbadab413c8 Mon Sep 17 00:00:00 2001 From: Gabe Campbell Date: Sat, 28 Mar 2026 14:09:40 -0500 Subject: [PATCH 6/6] fixed bad merge --- gitnexus/src/cli/analyze.ts | 406 ++++++++---------------------------- 1 file changed, 90 insertions(+), 316 deletions(-) diff --git a/gitnexus/src/cli/analyze.ts b/gitnexus/src/cli/analyze.ts index 4652fd36f0..c77903de0b 100644 --- a/gitnexus/src/cli/analyze.ts +++ b/gitnexus/src/cli/analyze.ts @@ -2,39 +2,20 @@ * Analyze Command * * Indexes a repository and stores the knowledge graph in .gitnexus/ + * + * Delegates core analysis to the shared runFullAnalysis orchestrator. + * This CLI wrapper handles: heap management, progress bar, SIGINT, + * skill generation (--skills), summary output, and process.exit(). */ import path from 'path'; import { execFileSync } from 'child_process'; import v8 from 'v8'; import cliProgress from 'cli-progress'; -import { runPipelineFromRepo } from '../core/ingestion/pipeline.js'; -import { - initLbug, - loadGraphToLbug, - getLbugStats, - executeQuery, - executeWithReusedStatement, - closeLbug, - createFTSIndex, - loadCachedEmbeddings, -} from '../core/lbug/lbug-adapter.js'; -// Embedding imports are lazy (dynamic import) so onnxruntime-node is never -// loaded when embeddings are not requested. This avoids crashes on Node -// versions whose ABI is not yet supported by the native binary (#89). -// disposeEmbedder intentionally not called — ONNX Runtime segfaults on cleanup (see #38) -import { - getStoragePaths, - saveMeta, - loadMeta, - addToGitignore, - registerRepo, - getGlobalRegistryPath, - cleanupOldKuzuFiles, -} from '../storage/repo-manager.js'; -import { getCurrentCommit, getGitRoot, hasGitDir } from '../storage/git.js'; -import { generateAIContextFiles } from './ai-context.js'; -import { generateSkillFiles, type GeneratedSkillInfo } from './skill-gen.js'; +import { closeLbug } from '../core/lbug/lbug-adapter.js'; +import { getStoragePaths, getGlobalRegistryPath } from '../storage/repo-manager.js'; +import { getGitRoot, hasGitDir } from '../storage/git.js'; +import { runFullAnalysis } from '../core/run-analyze.js'; import fs from 'fs/promises'; const HEAP_MB = 8192; @@ -70,25 +51,6 @@ export interface AnalyzeOptions { skipGit?: boolean; } -/** Threshold: auto-skip embeddings for repos with more nodes than this */ -const EMBEDDING_NODE_LIMIT = 50_000; - -const PHASE_LABELS: Record = { - extracting: 'Scanning files', - structure: 'Building structure', - parsing: 'Parsing code', - imports: 'Resolving imports', - calls: 'Tracing calls', - heritage: 'Extracting inheritance', - communities: 'Detecting communities', - processes: 'Detecting processes', - complete: 'Pipeline complete', - lbug: 'Loading into LadybugDB', - fts: 'Creating search indexes', - embeddings: 'Generating embeddings', - done: 'Done', -}; - export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOptions) => { if (ensureHeap()) return; @@ -132,30 +94,8 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption ); } - const { storagePath, lbugPath } = getStoragePaths(repoPath); - - // Clean up stale KuzuDB files from before the LadybugDB migration. - // If kuzu existed but lbug doesn't, we're doing a migration re-index — say so. - const kuzuResult = await cleanupOldKuzuFiles(storagePath); - if (kuzuResult.found && kuzuResult.needsReindex) { - console.log(' Migrating from KuzuDB to LadybugDB — rebuilding index...\n'); - } - - const currentCommit = repoHasGit ? getCurrentCommit(repoPath) : ''; - const existingMeta = await loadMeta(storagePath); - - if ( - existingMeta && - !options?.force && - !options?.skills && - existingMeta.lastCommit === currentCommit - ) { - // Non-git folders have currentCommit = '' — always rebuild since we can't detect changes - if (currentCommit !== '') { - console.log(' Already up to date\n'); - return; - } - } + // KuzuDB migration cleanup is handled by runFullAnalysis internally. + // Note: --skills is handled after runFullAnalysis using the returned pipelineResult. if (process.env.GITNEXUS_NO_GITIGNORE) { console.log( @@ -163,7 +103,7 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption ); } - // Single progress bar for entire pipeline + // ── CLI progress bar setup ───────────────────────────────────────── const bar = new cliProgress.SingleBar( { format: ' {bar} {percentage}% | {phase}', @@ -180,10 +120,10 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption bar.start(100, 0, { phase: 'Initializing...' }); - // Graceful SIGINT handling — clean up resources and exit + // Graceful SIGINT handling let aborted = false; const sigintHandler = () => { - if (aborted) process.exit(1); // Second Ctrl-C: force exit + if (aborted) process.exit(1); aborted = true; bar.stop(); console.log('\n Interrupted — cleaning up...'); @@ -193,13 +133,11 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption }; process.on('SIGINT', sigintHandler); - // Route all console output through bar.log() so the bar doesn't stamp itself - // multiple times when other code writes to stdout/stderr mid-render. + // Route console output through bar.log() to prevent progress bar corruption const origLog = console.log.bind(console); const origWarn = console.warn.bind(console); const origError = console.error.bind(console); const barLog = (...args: any[]) => { - // Clear the bar line, print the message, then let the next bar.update redraw process.stdout.write('\x1b[2K\r'); origLog(args.map((a) => (typeof a === 'string' ? a : String(a))).join(' ')); }; @@ -207,12 +145,10 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption console.warn = barLog; console.error = barLog; - // Track elapsed time per phase — both updateBar and the interval use the - // same format so they don't flicker against each other. + // Track elapsed time per phase let lastPhaseLabel = 'Initializing...'; let phaseStart = Date.now(); - /** Update bar with phase label + elapsed seconds (shown after 3s). */ const updateBar = (value: number, phaseLabel: string) => { if (phaseLabel !== lastPhaseLabel) { lastPhaseLabel = phaseLabel; @@ -223,9 +159,6 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption bar.update(value, { phase: display }); }; - // Tick elapsed seconds for phases with infrequent progress callbacks - // (e.g. CSV streaming, FTS indexing). Uses the same display format as - // updateBar so there's no flickering. const elapsedTimer = setInterval(() => { const elapsed = Math.round((Date.now() - phaseStart) / 1000); if (elapsed >= 3) { @@ -233,225 +166,89 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption } }, 1000); - const t0Global = Date.now(); - - // ── Cache embeddings from existing index before rebuild ──────────── - let cachedEmbeddingNodeIds = new Set(); - let cachedEmbeddings: Array<{ nodeId: string; embedding: number[] }> = []; - - if (options?.embeddings && existingMeta && !options?.force) { - try { - updateBar(0, 'Caching embeddings...'); - await initLbug(lbugPath); - const cached = await loadCachedEmbeddings(); - cachedEmbeddingNodeIds = cached.embeddingNodeIds; - cachedEmbeddings = cached.embeddings; - await closeLbug(); - } catch { - try { - await closeLbug(); - } catch {} - } - } + const t0 = Date.now(); + // ── Run shared analysis orchestrator ─────────────────────────────── try { - // ── Phase 1: Full Pipeline (0–60%) ───────────────────────────────── - const pipelineResult = await runPipelineFromRepo(repoPath, (progress) => { - const phaseLabel = PHASE_LABELS[progress.phase] || progress.phase; - const scaled = Math.round(progress.percent * 0.6); - updateBar(scaled, phaseLabel); - }); - - // ── Phase 2: LadybugDB (60–85%) ────────────────────────────────────── - updateBar(60, 'Loading into LadybugDB...'); - - await closeLbug(); - const lbugFiles = [lbugPath, `${lbugPath}.wal`, `${lbugPath}.lock`]; - for (const f of lbugFiles) { - try { - await fs.rm(f, { recursive: true, force: true }); - } catch {} - } - - const t0Lbug = Date.now(); - await initLbug(lbugPath); - let lbugMsgCount = 0; - const lbugResult = await loadGraphToLbug( - pipelineResult.graph, - pipelineResult.repoPath, - storagePath, - (msg) => { - lbugMsgCount++; - const progress = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24)); - updateBar(progress, msg); + const result = await runFullAnalysis( + repoPath, + { + force: options?.force || options?.skills, + embeddings: options?.embeddings, + skipGit: options?.skipGit, + skipAgentsMd: options?.skipAgentsMd, + }, + { + onProgress: (_phase, percent, message) => { + updateBar(percent, message); + }, + onLog: barLog, }, ); - const lbugTime = ((Date.now() - t0Lbug) / 1000).toFixed(1); - const lbugWarnings = lbugResult.warnings; - // ── Phase 3: FTS (85–90%) ───────────────────────────────────────── - updateBar(85, 'Creating search indexes...'); - - const t0Fts = Date.now(); - try { - await createFTSIndex('File', 'file_fts', ['name', 'content']); - await createFTSIndex('Function', 'function_fts', ['name', 'content']); - await createFTSIndex('Class', 'class_fts', ['name', 'content']); - await createFTSIndex('Method', 'method_fts', ['name', 'content']); - await createFTSIndex('Interface', 'interface_fts', ['name', 'content']); - } catch (e: any) { - // Non-fatal — FTS is best-effort + if (result.alreadyUpToDate) { + clearInterval(elapsedTimer); + process.removeListener('SIGINT', sigintHandler); + console.log = origLog; + console.warn = origWarn; + console.error = origError; + bar.stop(); + console.log(' Already up to date\n'); + // Safe to return without process.exit(0) — the early-return path in + // runFullAnalysis never opens LadybugDB, so no native handles prevent exit. + return; } - const ftsTime = ((Date.now() - t0Fts) / 1000).toFixed(1); - - // ── Phase 3.5: Re-insert cached embeddings ──────────────────────── - if (cachedEmbeddings.length > 0) { - // Check if cached embedding dimensions match current schema - const cachedDims = cachedEmbeddings[0].embedding.length; - const { EMBEDDING_DIMS } = await import('../core/lbug/schema.js'); - if (cachedDims !== EMBEDDING_DIMS) { - // Dimensions changed (e.g. switched embedding model) — discard cache and re-embed all - console.error( - `⚠️ Embedding dimensions changed (${cachedDims}d → ${EMBEDDING_DIMS}d), discarding cache`, + + // Skill generation (CLI-only, uses pipeline result from analysis) + if (options?.skills && result.pipelineResult) { + updateBar(99, 'Generating skill files...'); + try { + const { generateSkillFiles } = await import('./skill-gen.js'); + const { generateAIContextFiles } = await import('./ai-context.js'); + const skillResult = await generateSkillFiles( + repoPath, + result.repoName, + result.pipelineResult, ); - cachedEmbeddings = []; - cachedEmbeddingNodeIds = new Set(); - } else { - updateBar(88, `Restoring ${cachedEmbeddings.length} cached embeddings...`); - const EMBED_BATCH = 200; - for (let i = 0; i < cachedEmbeddings.length; i += EMBED_BATCH) { - const batch = cachedEmbeddings.slice(i, i + EMBED_BATCH); - const paramsList = batch.map((e) => ({ nodeId: e.nodeId, embedding: e.embedding })); - try { - await executeWithReusedStatement( - `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`, - paramsList, - ); - } catch { - /* some may fail if node was removed, that's fine */ + if (skillResult.skills.length > 0) { + barLog(` Generated ${skillResult.skills.length} skill files`); + // Re-generate AI context files now that we have skill info + const s = result.stats; + const communityResult = result.pipelineResult?.communityResult; + let aggregatedClusterCount = 0; + if (communityResult?.communities) { + const groups = new Map(); + for (const c of communityResult.communities) { + const label = c.heuristicLabel || c.label || 'Unknown'; + groups.set(label, (groups.get(label) || 0) + c.symbolCount); + } + aggregatedClusterCount = Array.from(groups.values()).filter( + (count: number) => count >= 5, + ).length; } + const { storagePath: sp } = getStoragePaths(repoPath); + await generateAIContextFiles( + repoPath, + sp, + result.repoName, + { + files: s.files ?? 0, + nodes: s.nodes ?? 0, + edges: s.edges ?? 0, + communities: s.communities, + clusters: aggregatedClusterCount, + processes: s.processes, + }, + skillResult.skills, + { skipAgentsMd: options?.skipAgentsMd }, + ); } + } catch { + /* best-effort */ } } - // ── Phase 4: Embeddings (90–98%) ────────────────────────────────── - const stats = await getLbugStats(); - let embeddingTime = '0.0'; - let embeddingSkipped = true; - let embeddingSkipReason = 'off (use --embeddings to enable)'; - - if (options?.embeddings) { - if (stats.nodes > EMBEDDING_NODE_LIMIT) { - embeddingSkipReason = `skipped (${stats.nodes.toLocaleString()} nodes > ${EMBEDDING_NODE_LIMIT.toLocaleString()} limit)`; - } else { - embeddingSkipped = false; - } - } - - if (!embeddingSkipped) { - const { isHttpMode } = await import('../core/embeddings/http-client.js'); - const httpMode = isHttpMode(); - updateBar( - 90, - httpMode ? 'Connecting to embedding endpoint...' : 'Loading embedding model...', - ); - const t0Emb = Date.now(); - const { runEmbeddingPipeline } = await import('../core/embeddings/embedding-pipeline.js'); - await runEmbeddingPipeline( - executeQuery, - executeWithReusedStatement, - (progress) => { - const scaled = 90 + Math.round((progress.percent / 100) * 8); - const label = - progress.phase === 'loading-model' - ? httpMode - ? 'Connecting to embedding endpoint...' - : 'Loading embedding model...' - : `Embedding ${progress.nodesProcessed || 0}/${progress.totalNodes || '?'}`; - updateBar(scaled, label); - }, - {}, - cachedEmbeddingNodeIds.size > 0 ? cachedEmbeddingNodeIds : undefined, - ); - embeddingTime = ((Date.now() - t0Emb) / 1000).toFixed(1); - } - - // ── Phase 5: Finalize (98–100%) ─────────────────────────────────── - updateBar(98, 'Saving metadata...'); - - // Count embeddings in the index (cached + newly generated) - let embeddingCount = 0; - try { - const embResult = await executeQuery(`MATCH (e:CodeEmbedding) RETURN count(e) AS cnt`); - embeddingCount = embResult?.[0]?.cnt ?? 0; - } catch { - /* table may not exist if embeddings never ran */ - } - - const meta = { - repoPath, - lastCommit: currentCommit, - indexedAt: new Date().toISOString(), - stats: { - files: pipelineResult.totalFileCount, - nodes: stats.nodes, - edges: stats.edges, - communities: pipelineResult.communityResult?.stats.totalCommunities, - processes: pipelineResult.processResult?.stats.totalProcesses, - embeddings: embeddingCount, - }, - }; - await saveMeta(storagePath, meta); - await registerRepo(repoPath, meta); - // Only attempt to update .gitignore when a .git directory is present. - // Use hasGitDir (filesystem check) rather than git CLI subprocess - // so we skip correctly for --skip-git folders even if git CLI is available. - if (hasGitDir(repoPath)) { - await addToGitignore(repoPath); - } - - const projectName = path.basename(repoPath); - let aggregatedClusterCount = 0; - if (pipelineResult.communityResult?.communities) { - const groups = new Map(); - for (const c of pipelineResult.communityResult.communities) { - const label = c.heuristicLabel || c.label || 'Unknown'; - groups.set(label, (groups.get(label) || 0) + c.symbolCount); - } - aggregatedClusterCount = Array.from(groups.values()).filter((count) => count >= 5).length; - } - - let generatedSkills: GeneratedSkillInfo[] = []; - if (options?.skills && pipelineResult.communityResult) { - updateBar(99, 'Generating skill files...'); - const skillResult = await generateSkillFiles(repoPath, projectName, pipelineResult); - generatedSkills = skillResult.skills; - } - - const aiContext = await generateAIContextFiles( - repoPath, - storagePath, - projectName, - { - files: pipelineResult.totalFileCount, - nodes: stats.nodes, - edges: stats.edges, - communities: pipelineResult.communityResult?.stats.totalCommunities, - clusters: aggregatedClusterCount, - processes: pipelineResult.processResult?.stats.totalProcesses, - }, - generatedSkills, - { - skipAgentsMd: options?.skipAgentsMd, - }, - ); - - await closeLbug(); - // Note: we intentionally do NOT call disposeEmbedder() here. - // ONNX Runtime's native cleanup segfaults on macOS and some Linux configs. - // Since the process exits immediately after, Node.js reclaims everything. - - const totalTime = ((Date.now() - t0Global) / 1000).toFixed(1); + const totalTime = ((Date.now() - t0) / 1000).toFixed(1); clearInterval(elapsedTimer); process.removeListener('SIGINT', sigintHandler); @@ -463,34 +260,14 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption bar.update(100, { phase: 'Done' }); bar.stop(); - // ── Summary ─────────────────────────────────────────────────────── - const embeddingsCached = cachedEmbeddings.length > 0; + // ── Summary ──────────────────────────────────────────────────── + const s = result.stats; + console.log(`\n Repository indexed successfully (${totalTime}s)\n`); console.log( - `\n Repository indexed successfully (${totalTime}s)${embeddingsCached ? ` [${cachedEmbeddings.length} embeddings cached]` : ''}\n`, - ); - console.log( - ` ${stats.nodes.toLocaleString()} nodes | ${stats.edges.toLocaleString()} edges | ${pipelineResult.communityResult?.stats.totalCommunities || 0} clusters | ${pipelineResult.processResult?.stats.totalProcesses || 0} flows`, - ); - console.log( - ` LadybugDB ${lbugTime}s | FTS ${ftsTime}s | Embeddings ${embeddingSkipped ? embeddingSkipReason : embeddingTime + 's'}`, + ` ${(s.nodes ?? 0).toLocaleString()} nodes | ${(s.edges ?? 0).toLocaleString()} edges | ${s.communities ?? 0} clusters | ${s.processes ?? 0} flows`, ); console.log(` ${repoPath}`); - if (aiContext.files.length > 0) { - console.log(` Context: ${aiContext.files.join(', ')}`); - } - - // Show a quiet summary if some edge types needed fallback insertion - if (lbugWarnings.length > 0) { - const totalFallback = lbugWarnings.reduce((sum, w) => { - const m = w.match(/\((\d+) edges\)/); - return sum + (m ? parseInt(m[1]) : 0); - }, 0); - console.log( - ` Note: ${totalFallback} edges across ${lbugWarnings.length} types inserted via fallback (schema will be updated in next release)`, - ); - } - try { await fs.access(getGlobalRegistryPath()); } catch { @@ -505,9 +282,6 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption console.warn = origWarn; console.error = origError; bar.stop(); - try { - await closeLbug(); - } catch {} console.error(`\n Analysis failed: ${err.message}\n`); process.exitCode = 1; return;