Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
167036a
feat: track parser coverage and expose unsupported file stats (#1076)
ZakAnun May 7, 2026
9b0046f
Merge branch 'abhigyanpatwari:main' into fix/1076-parser-coverage
ZakAnun May 7, 2026
30b8182
feat(cli): add --json analyze output and parserCoverage regression test
ZakAnun May 8, 2026
8976f58
Merge branch 'main' into fix/1076-parser-coverage
magyargergo May 8, 2026
0a54d07
Merge branch 'main' into fix/1076-parser-coverage
magyargergo May 8, 2026
ab9c64f
style: format pipeline.ts for Prettier CI check
ZakAnun May 8, 2026
2ab7e9b
Merge branch 'main' into fix/1076-parser-coverage
magyargergo May 8, 2026
9d65078
Merge branch 'main' into fix/1076-parser-coverage
magyargergo May 9, 2026
2d68fa8
Merge branch 'main' into fix/1076-parser-coverage
magyargergo May 9, 2026
571f185
Merge branch 'main' into fix/1076-parser-coverage
magyargergo May 9, 2026
7af3a79
fix(parser): use logger.warn for unsupported extension skip message
ZakAnun May 10, 2026
a93a71e
Merge branch 'main' into fix/1076-parser-coverage
ZakAnun May 11, 2026
292972e
merge upstream/main to resolve PR conflicts
ZakAnun May 12, 2026
8e36bf4
Merge origin/fix/1076-parser-coverage into resolved branch
ZakAnun May 12, 2026
5ae00b1
Merge branch 'main' into fix/1076-parser-coverage
ZakAnun May 12, 2026
3ae6c7a
Merge branch 'main' into fix/1076-parser-coverage
ZakAnun May 31, 2026
cfa0bb3
chore(autofix): apply prettier + eslint fixes via /autofix command
github-actions[bot] Jun 1, 2026
983f23e
merge main into fix/1076-parser-coverage (sync for PR #1428)
ZakAnun Jun 1, 2026
dbabdfa
merge upstream/main into fix/1076-parser-coverage
ZakAnun Jun 10, 2026
6322565
Merge branch 'main' into fix/1076-parser-coverage
magyargergo Jun 10, 2026
52f9e8e
merge upstream/main into fix/1076-parser-coverage
ZakAnun Jun 11, 2026
e0f17bc
Merge branch 'main' into fix/1076-parser-coverage
ZakAnun Jun 11, 2026
7d1447a
Merge branch 'main' into fix/1076-parser-coverage
ZakAnun Jun 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions gitnexus/src/cli/analyze.ts
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,7 @@ export interface AnalyzeOptions {
embeddingBatchSize?: string;
embeddingSubBatchSize?: string;
embeddingDevice?: string;
json?: boolean;
/**
* Extra fetch-wrapper function names to treat as HTTP consumers (#1589/#1852
* residual). Supplied via `.gitnexusrc` `fetchWrappers: [...]`. Threaded into
Expand Down Expand Up @@ -1310,11 +1311,24 @@ const analyzeCommandImpl = async (

// ── Summary ────────────────────────────────────────────────────
const s = result.stats;
console.log(`\n Repository indexed successfully (${totalTime}s)\n`);
console.log(
` ${(s.nodes ?? 0).toLocaleString()} nodes | ${(s.edges ?? 0).toLocaleString()} edges | ${s.communities ?? 0} clusters | ${s.processes ?? 0} flows`,
);
console.log(` ${repoPath}`);
if (options?.json) {
console.log(JSON.stringify({ repoPath, totalTime, ...s }, null, 2));
} else {
console.log(`\n Repository indexed successfully (${totalTime}s)\n`);
console.log(
` ${(s.nodes ?? 0).toLocaleString()} nodes | ${(s.edges ?? 0).toLocaleString()} edges | ${s.communities ?? 0} clusters | ${s.processes ?? 0} flows`,
);
if (s.parserCoverage && s.parserCoverage.unsupportedFiles > 0) {
const pc = s.parserCoverage;
const topExts = pc.unsupportedByExtension
.slice(0, 5)
.map((e) => `${e.extension}: ${e.count}`);
console.log(
` Skipped ${pc.unsupportedFiles} files with unsupported extensions (${topExts.join(', ')}${pc.unsupportedByExtension.length > 5 ? ', ...' : ''})`,
);
}
console.log(` ${repoPath}`);
}

// Persistent (non-scrolling) warning when FTS indexing was skipped — the
// progress-bar log() that fired mid-run has already scrolled away, so the
Expand Down
1 change: 1 addition & 0 deletions gitnexus/src/cli/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ program
.option('--embedding-batch-size <n>', 'Number of nodes per embedding batch')
.option('--embedding-sub-batch-size <n>', 'Number of chunks per embedding model call')
.option('--embedding-device <device>', 'Embedding device: auto, cpu, dml, cuda, or wasm')
.option('--json', 'Output analysis result as JSON (includes parserCoverage stats)')
.addHelpText('after', () => t('help.analyze.environment'))
.action(createLbugLazyAction(() => import('./analyze.js'), 'analyzeCommand'));

Expand Down
38 changes: 38 additions & 0 deletions gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,13 @@ export async function runChunkedParseAndResolve(
* files. There is no sequential parser — the pool is the sole parse path
* whenever a chunk misses the cache. */
usedWorkerPool: boolean;
/** Parser coverage — which files were parsed vs skipped */
parserCoverage: {
totalFiles: number;
supportedFiles: number;
unsupportedFiles: number;
unsupportedByExtension: Array<{ extension: string; count: number }>;
};
/** Worker-produced ParsedFile artifacts aggregated across chunks.
* Threaded into scope-resolution as a re-extract cache so the warm-
* cache analyze run can skip the dominant `extractParsedFile` cost
Expand All @@ -270,6 +277,28 @@ export async function runChunkedParseAndResolve(
return lang && isLanguageAvailable(lang);
});

// ── Parser coverage stats ──────────────────────────────────────────
const unsupportedExtCounts = new Map<string, number>();
for (const f of scannedFiles) {
const lang = getLanguageFromFilename(f.path);
if (!lang) {
const ext = path.extname(f.path).toLowerCase() || '(no extension)';
unsupportedExtCounts.set(ext, (unsupportedExtCounts.get(ext) || 0) + 1);
}
}
const unsupportedByExtension = Array.from(unsupportedExtCounts.entries())
.map(([extension, count]) => ({ extension, count }))
.sort((a, b) => b.count - a.count);
const unsupportedFiles = unsupportedByExtension.reduce((sum, e) => sum + e.count, 0);
const supportedFiles = parseableScanned.length;

const parserCoverage = {
totalFiles: scannedFiles.length,
supportedFiles,
unsupportedFiles,
unsupportedByExtension,
};

// Warn about files skipped due to unavailable parsers
const skippedByLang = new Map<string, number>();
for (const f of scannedFiles) {
Expand All @@ -293,6 +322,14 @@ export async function runChunkedParseAndResolve(
}
}

// Warn about files with unsupported extensions (no grammar at all)
if (unsupportedFiles > 0) {
const topExts = unsupportedByExtension.slice(0, 5).map((e) => `${e.extension}: ${e.count}`);
logger.warn(
`Skipped ${unsupportedFiles} files with unsupported extensions (${topExts.join(', ')}${unsupportedByExtension.length > 5 ? ', ...' : ''})`,
);
}

// Sort parseableScanned alphabetically for stable chunk membership
// across runs (Finding 4). Without this, filesystem-scan order can
// shift between runs (notably on macOS APFS where directory entry
Expand Down Expand Up @@ -1162,6 +1199,7 @@ export async function runChunkedParseAndResolve(
// no pool was needed: a warm all-cache-hit run replays cached worker output
// without spawning workers, or there were no parseable files.
usedWorkerPool: workerPool !== undefined,
parserCoverage,
// Per-file ParsedFile artifacts produced by workers' calls to
// `extractParsedFile`. Consumed by scope-resolution as a re-extraction
// cache: when the file's ParsedFile is here, scope-resolution skips its own
Expand Down
3 changes: 3 additions & 0 deletions gitnexus/src/core/ingestion/pipeline-phases/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import type {
} from '../workers/parse-worker.js';
import { runChunkedParseAndResolve } from './parse-impl.js';
import type { MutableSemanticModel } from '../model/index.js';
import type { ParserCoverage } from '../../../types/pipeline.js';

export interface ParseOutput {
/**
Expand Down Expand Up @@ -77,6 +78,8 @@ export interface ParseOutput {
* costing ~58s on a 1000-file repo).
*/
readonly parsedFiles: readonly ParsedFile[];
/** Parser coverage — which files were parsed vs skipped */
readonly parserCoverage: ParserCoverage;
}

export const parsePhase: PipelinePhase<ParseOutput> = {
Expand Down
10 changes: 6 additions & 4 deletions gitnexus/src/core/ingestion/pipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import {
type PipelinePhase,
type CommunitiesOutput,
type ProcessesOutput,
type ParseOutput,
} from './pipeline-phases/index.js';

export interface PipelineOptions {
Expand Down Expand Up @@ -237,10 +238,10 @@ export const runPipelineFromRepo = async (
});

// Extract final results for the PipelineResult contract
const { totalFiles, usedWorkerPool } = getPhaseOutput<{
totalFiles: number;
usedWorkerPool: boolean;
}>(results, 'parse');
const { totalFiles, usedWorkerPool, parserCoverage } = getPhaseOutput<ParseOutput>(
results,
'parse',
);

let communityResult: CommunitiesOutput['communityResult'] | undefined;
let processResult: ProcessesOutput['processResult'] | undefined;
Expand Down Expand Up @@ -276,5 +277,6 @@ export const runPipelineFromRepo = async (
processResult,
resolutionOutcomes,
usedWorkerPool,
parserCoverage,
};
};
7 changes: 7 additions & 0 deletions gitnexus/src/core/run-analyze.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,12 @@ export interface AnalyzeResult {
communities?: number;
processes?: number;
embeddings?: number;
parserCoverage?: {
totalFiles: number;
supportedFiles: number;
unsupportedFiles: number;
unsupportedByExtension: Array<{ extension: string; count: number }>;
};
};
alreadyUpToDate?: boolean;
/** The raw pipeline result — only populated when needed by callers (e.g. skill generation). */
Expand Down Expand Up @@ -1221,6 +1227,7 @@ export async function runFullAnalysis(
communities: pipelineResult.communityResult?.stats.totalCommunities,
processes: pipelineResult.processResult?.stats.totalProcesses,
embeddings: embeddingCount,
parserCoverage: pipelineResult.parserCoverage,
},
capabilities: {
graph: { provider: 'ladybugdb', status: runtimeCapabilities.graph },
Expand Down
7 changes: 7 additions & 0 deletions gitnexus/src/mcp/local/local-backend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,12 @@ export interface CodebaseContext {
communityCount: number;
processCount: number;
};
parserCoverage?: {
totalFiles: number;
supportedFiles: number;
unsupportedFiles: number;
unsupportedByExtension: Array<{ extension: string; count: number }>;
};
}

interface RepoHandle {
Expand Down Expand Up @@ -601,6 +607,7 @@ export class LocalBackend {
communityCount: s.communities || 0,
processCount: s.processes || 0,
},
parserCoverage: s.parserCoverage,
});
}

Expand Down
18 changes: 18 additions & 0 deletions gitnexus/src/mcp/resources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,9 @@ async function getReposResource(backend: LocalBackend): Promise<string> {
lines.push(` files: ${repo.stats.files || 0}`);
lines.push(` symbols: ${repo.stats.nodes || 0}`);
lines.push(` processes: ${repo.stats.processes || 0}`);
if (repo.stats.parserCoverage?.unsupportedFiles) {
lines.push(` unsupported_files: ${repo.stats.parserCoverage.unsupportedFiles}`);
}
}
}

Expand Down Expand Up @@ -330,6 +333,21 @@ async function getContextResource(backend: LocalBackend, repoName?: string): Pro
lines.push(` files: ${context.stats.fileCount}`);
lines.push(` symbols: ${context.stats.functionCount}`);
lines.push(` processes: ${context.stats.processCount}`);

if (context.parserCoverage && context.parserCoverage.unsupportedFiles > 0) {
const pc = context.parserCoverage;
lines.push('');
lines.push('parser_coverage:');
lines.push(` total_files: ${pc.totalFiles}`);
lines.push(` supported: ${pc.supportedFiles}`);
lines.push(` unsupported: ${pc.unsupportedFiles}`);
lines.push(' unsupported_by_extension:');
for (const ext of pc.unsupportedByExtension.slice(0, 10)) {
lines.push(` - extension: "${ext.extension}"`);
lines.push(` count: ${ext.count}`);
}
}

lines.push('');
lines.push('tools_available:');
lines.push(' - query: Process-grouped code intelligence (execution flows related to a concept)');
Expand Down
6 changes: 6 additions & 0 deletions gitnexus/src/storage/repo-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ export interface RepoMeta {
communities?: number;
processes?: number;
embeddings?: number;
parserCoverage?: {
totalFiles: number;
supportedFiles: number;
unsupportedFiles: number;
unsupportedByExtension: Array<{ extension: string; count: number }>;
};
};
/**
* Bumped whenever incremental-indexing invariants change in an
Expand Down
20 changes: 20 additions & 0 deletions gitnexus/src/types/pipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,24 @@ import { CommunityDetectionResult } from '../core/ingestion/community-processor.
import { ProcessDetectionResult } from '../core/ingestion/process-processor.js';
import type { ResolutionOutcome } from '../core/ingestion/scope-resolution/resolution-outcome.js';

/** Per-extension breakdown of unsupported files */
export interface UnsupportedExtension {
extension: string;
count: number;
}

/** Parser coverage stats — tracks which files were parsed vs skipped */
export interface ParserCoverage {
/** Total source files in repo (before language filtering) */
totalFiles: number;
/** Files with supported extensions that entered the parse pipeline */
supportedFiles: number;
/** Files with unsupported extensions (no grammar defined) */
unsupportedFiles: number;
/** Per-extension breakdown of unsupported files, sorted by count desc */
unsupportedByExtension: UnsupportedExtension[];
}

// CLI-specific: in-memory result with graph + detection results
export interface PipelineResult {
graph: KnowledgeGraph;
Expand All @@ -27,4 +45,6 @@ export interface PipelineResult {
* affordance so regression suites can prove the pool engaged.
*/
usedWorkerPool: boolean;
/** Parser coverage stats — which files were parsed vs skipped */
parserCoverage?: ParserCoverage;
}
38 changes: 38 additions & 0 deletions gitnexus/test/unit/language-availability-skip.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,42 @@ describe('native parser availability — unavailable language is skipped, not cr
);
expect(warned).toBe(true);
});

it('reports parser coverage for unsupported extensions without spawning a pool', async () => {
const files: Record<string, string> = {
'scripts/bootstrap.sh': '#!/usr/bin/env bash\necho hi\n',
'data/report.csv': 'a,b,c\n',
};
for (const [rel, content] of Object.entries(files)) {
const abs = path.join(repoDir, rel);
fs.mkdirSync(path.dirname(abs), { recursive: true });
fs.writeFileSync(abs, content);
}

const scanned = Object.keys(files).map((rel) => ({
path: rel,
size: fs.statSync(path.join(repoDir, rel)).size,
}));

const result = await runChunkedParseAndResolve(
createKnowledgeGraph(),
scanned,
Object.keys(files),
Object.keys(files).length,
repoDir,
Date.now(),
() => {},
);

expect(result.parserCoverage).toEqual({
totalFiles: 2,
supportedFiles: 0,
unsupportedFiles: 2,
unsupportedByExtension: [
{ extension: '.sh', count: 1 },
{ extension: '.csv', count: 1 },
],
});
expect(result.usedWorkerPool).toBe(false);
});
});
Loading