Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion gitnexus/src/core/ingestion/call-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,15 @@ export const processCalls = async (
importedRawReturnTypesMap?: ReadonlyMap<string, ReadonlyMap<string, string>>,
heritageMap?: HeritageMap,
bindingAccumulator?: BindingAccumulator,
/**
* Optional cache for compiled `Parser.Query` objects keyed by language name.
* When provided, compiled queries are reused across calls instead of being
* re-compiled from the query string for every file. Callers that invoke
* `processCalls` many times with single-file batches (e.g. the cross-file
* propagation phase) should pass a long-lived map here to avoid O(N)
* query recompilation overhead.
*/
compiledQueryCache?: Map<SupportedLanguages, Parser.Query>,
): Promise<ExtractedHeritage[]> => {
const parser = await loadParser();
const collectedHeritage: ExtractedHeritage[] = [];
Expand Down Expand Up @@ -843,7 +852,11 @@ export const processCalls = async (
let matches;
try {
const lang = parser.getLanguage();
const query = new Parser.Query(lang, queryStr);
let query = compiledQueryCache?.get(language);
if (!query) {
query = new Parser.Query(lang, queryStr);
compiledQueryCache?.set(language, query);
}
matches = query.matches(tree.rootNode);
} catch (queryError) {
logger.warn({ queryError }, `Query error for ${file.path}:`);
Expand Down
58 changes: 57 additions & 1 deletion gitnexus/src/core/ingestion/pipeline-phases/cross-file-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,18 @@ import {
} from '../call-processor.js';
import type { createResolutionContext } from '../model/resolution-context.js';
import { createASTCache } from '../ast-cache.js';
import { type PipelineProgress, getLanguageFromFilename } from 'gitnexus-shared';
import {
type PipelineProgress,
getLanguageFromFilename,
type SupportedLanguages,
} from 'gitnexus-shared';
import { readFileContents } from '../filesystem-walker.js';
import { isLanguageAvailable } from '../../tree-sitter/parser-loader.js';
import { isRegistryPrimary } from '../registry-primary-flag.js';
import { topologicalLevelSort } from '../utils/graph-sort.js';
import type { KnowledgeGraph } from '../../graph/types.js';
import { isDev } from '../utils/env.js';
import type Parser from 'tree-sitter';

import { logger } from '../../logger.js';
/** Max AST trees to keep in LRU cache for cross-file binding propagation. */
Expand Down Expand Up @@ -114,6 +120,36 @@ export async function runCrossFileBindingPropagation(
let crossFileResolved = 0;
const crossFileStart = Date.now();
const astCache = createASTCache(AST_CACHE_CAP);
// Compiled query objects keyed by language name. Shared across all processCalls
// invocations in this phase so the same tree-sitter query string is only
// compiled once per language instead of once per file (O(1) vs O(N)).
const compiledQueryCache = new Map<SupportedLanguages, Parser.Query>();

// Snapshot total topological candidates for progress math. We walk the
// levels once more here (fast — no I/O) so we can report meaningful
// percentages rather than a frozen display.
let totalCandidates = 0;
for (const level of levels) {
for (const filePath of level) {
if (totalCandidates >= MAX_CROSS_FILE_REPROCESS) break;
const imports = ctx.namedImportMap.get(filePath);
if (!imports) continue;
if (!allPathSet.has(filePath)) continue;
const lang = getLanguageFromFilename(filePath);
if (!lang || !isLanguageAvailable(lang)) continue;
// Registry-primary languages have their call resolution handled by the
// scope-resolution pipeline — processCalls skips them immediately. Skip
// here too so we avoid the I/O cost (readFileContents) and map-building
// overhead for files that would be no-ops anyway.
if (isRegistryPrimary(lang)) continue;
totalCandidates++;
}
if (totalCandidates >= MAX_CROSS_FILE_REPROCESS) break;
}
const cappedTotal = Math.min(totalCandidates, MAX_CROSS_FILE_REPROCESS);

/** Emit a progress event every PROGRESS_INTERVAL files so the UI stays alive. */
const PROGRESS_INTERVAL = 25;

for (const level of levels) {
const levelCandidates: {
Expand Down Expand Up @@ -151,6 +187,10 @@ export async function runCrossFileBindingPropagation(

const lang = getLanguageFromFilename(filePath);
if (!lang || !isLanguageAvailable(lang)) continue;
// Registry-primary languages have their call resolution handled by the
// scope-resolution pipeline — processCalls skips them immediately. Skip
// here to avoid readFileContents I/O and map-building for no-op files.
if (isRegistryPrimary(lang)) continue;

levelCandidates.push({ filePath, seeded, importedReturns, importedRawReturns });
}
Expand Down Expand Up @@ -188,8 +228,24 @@ export async function runCrossFileBindingPropagation(
bindings.size > 0 ? bindings : undefined,
importedReturnTypesMap.size > 0 ? importedReturnTypesMap : undefined,
importedRawReturnTypesMap.size > 0 ? importedRawReturnTypesMap : undefined,
undefined,
undefined,
compiledQueryCache,
);
crossFileResolved++;

// Emit progress every PROGRESS_INTERVAL files so the UI shows real
// movement instead of a frozen display (cross-file can take minutes
// on large repos with many cross-file imports).
if (crossFileResolved % PROGRESS_INTERVAL === 0 || crossFileResolved === cappedTotal) {
const pct = cappedTotal > 0 ? Math.round((crossFileResolved / cappedTotal) * 8) : 0;
onProgress({
phase: 'parsing',
percent: 82 + pct,
message: `Cross-file type propagation (${crossFileResolved}/${cappedTotal} files)...`,
stats: { filesProcessed: crossFileResolved, totalFiles, nodesCreated: graph.nodeCount },
});
}
}

if (crossFileResolved >= MAX_CROSS_FILE_REPROCESS) {
Expand Down
159 changes: 159 additions & 0 deletions gitnexus/test/unit/cross-file-impl.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,36 @@ vi.mock('../../src/core/tree-sitter/parser-loader.js', async (importOriginal) =>
};
});

// Default to non-registry-primary so existing tests (which use .ts files) are
// not affected by the isRegistryPrimary guard added in cross-file-impl. Tests
// that verify the skip behavior can override this with mockReturnValue(true).
vi.mock('../../src/core/ingestion/registry-primary-flag.js', () => ({
isRegistryPrimary: vi.fn(() => false),
}));

import { runCrossFileBindingPropagation } from '../../src/core/ingestion/pipeline-phases/cross-file-impl.js';
import { processCalls } from '../../src/core/ingestion/call-processor.js';
import { isRegistryPrimary } from '../../src/core/ingestion/registry-primary-flag.js';
import { createResolutionContext } from '../../src/core/ingestion/model/resolution-context.js';
import { createKnowledgeGraph } from '../../src/core/graph/graph.js';
import type { ExportedTypeMap } from '../../src/core/ingestion/call-processor.js';

const processCallsMock = vi.mocked(processCalls);
const isRegistryPrimaryMock = vi.mocked(isRegistryPrimary);

/**
* Index of the `compiledQueryCache` parameter in the `processCalls` signature.
* graph(0), files(1), astCache(2), ctx(3), onProgress?(4), exportedTypeMap?(5),
* importedBindingsMap?(6), importedReturnTypesMap?(7),
* importedRawReturnTypesMap?(8), heritageMap?(9), bindingAccumulator?(10),
* compiledQueryCache?(11).
*/
const COMPILED_QUERY_CACHE_ARG_INDEX = 11;

describe('runCrossFileBindingPropagation', () => {
beforeEach(() => {
processCallsMock.mockClear();
isRegistryPrimaryMock.mockReturnValue(false); // reset to non-primary before each test
});

it('returns 0 immediately when namedImportMap is empty', async () => {
Expand Down Expand Up @@ -162,6 +181,103 @@ describe('runCrossFileBindingPropagation', () => {
}
});

it('passes the same compiledQueryCache Map instance to every processCalls call', async () => {
// Verifies that the O(N)→O(1) query-cache fix is correctly wired: the
// `compiledQueryCache` created in runCrossFileBindingPropagation is shared
// across all processCalls invocations so each language's Parser.Query is
// compiled exactly once, not once per file.
const graph = createKnowledgeGraph();
const ctx = createResolutionContext();

const exportedTypeMap: ExportedTypeMap = new Map([
['upstream.ts', new Map([['User', 'User']])],
]);
ctx.importMap.set('upstream.ts', new Set());

const allPaths = ['upstream.ts'];
for (let i = 0; i < 3; i++) {
const file = `downstream${i}.ts`;
allPaths.push(file);
const bindings = new Map();
bindings.set('User', { sourcePath: 'upstream.ts', exportedName: 'User' });
ctx.namedImportMap.set(file, bindings);
ctx.importMap.set(file, new Set(['upstream.ts']));
}

await runCrossFileBindingPropagation(
graph,
ctx,
exportedTypeMap,
new Set(allPaths),
allPaths.length,
'/repo',
Date.now(),
() => {},
);

expect(processCallsMock).toHaveBeenCalledTimes(3);

// Argument index 11 is compiledQueryCache — see COMPILED_QUERY_CACHE_ARG_INDEX.
const caches = processCallsMock.mock.calls.map((call) => call[COMPILED_QUERY_CACHE_ARG_INDEX]);
// Every call must receive a non-null Map (not undefined).
for (const cache of caches) {
expect(cache).toBeDefined();
expect(cache).toBeInstanceOf(Map);
}
// All calls share the SAME instance — the whole point of the cache.
expect(caches[1]).toBe(caches[0]);
expect(caches[2]).toBe(caches[0]);
});

it('emits live onProgress events every 25 files with N/M format', async () => {
// Verifies that the frozen-progress-display fix is correctly wired:
// onProgress must be called multiple times from the processing loop,
// not just once at phase start, so large repos show real movement in
// the UI instead of a frozen percentage bar.
const graph = createKnowledgeGraph();
const ctx = createResolutionContext();

const exportedTypeMap: ExportedTypeMap = new Map([
['upstream.ts', new Map([['User', 'User']])],
]);
ctx.importMap.set('upstream.ts', new Set());

const allPaths = ['upstream.ts'];
for (let i = 0; i < 50; i++) {
const file = `downstream${i}.ts`;
allPaths.push(file);
const bindings = new Map();
bindings.set('User', { sourcePath: 'upstream.ts', exportedName: 'User' });
ctx.namedImportMap.set(file, bindings);
ctx.importMap.set(file, new Set(['upstream.ts']));
}

const progressMessages: string[] = [];
const onProgress = vi.fn((p: { phase: string; percent: number; message: string }) => {
progressMessages.push(p.message);
});

await runCrossFileBindingPropagation(
graph,
ctx,
exportedTypeMap,
new Set(allPaths),
allPaths.length,
'/repo',
Date.now(),
onProgress,
);

// 1 initial call at phase start + 2 loop calls (at 25 and 50 files).
expect(onProgress).toHaveBeenCalledTimes(3);

// Loop messages must carry the "N/M files" format so the UI is informative.
const loopMessages = progressMessages.filter((m) => m.match(/\(\d+\/\d+ files\)/));
expect(loopMessages).toHaveLength(2);
expect(loopMessages[0]).toContain('(25/50 files)');
expect(loopMessages[1]).toContain('(50/50 files)');
});

it('caps processing at MAX_CROSS_FILE_REPROCESS (2000)', async () => {
const graph = createKnowledgeGraph();
const ctx = createResolutionContext();
Expand Down Expand Up @@ -203,4 +319,47 @@ describe('runCrossFileBindingPropagation', () => {
expect(result).toBe(2000);
expect(processCallsMock).toHaveBeenCalledTimes(2000);
});

it('skips registry-primary language files without calling processCalls', async () => {
// Finding 3: on large TypeScript/C++ repos (registry-primary since v1.6.4+)
// cross-file-impl was calling processCalls 595× per candidate only for
// processCalls to immediately return (isRegistryPrimary guard inside).
// Now cross-file-impl filters them out BEFORE readFileContents so we avoid
// the I/O cost and map-building overhead entirely.
const graph = createKnowledgeGraph();
const ctx = createResolutionContext();

const exportedTypeMap: ExportedTypeMap = new Map([
['upstream.ts', new Map([['User', 'User']])],
]);
ctx.importMap.set('upstream.ts', new Set());

const allPaths = ['upstream.ts'];
for (let i = 0; i < 5; i++) {
const file = `downstream${i}.ts`;
allPaths.push(file);
const bindings = new Map();
bindings.set('User', { sourcePath: 'upstream.ts', exportedName: 'User' });
ctx.namedImportMap.set(file, bindings);
ctx.importMap.set(file, new Set(['upstream.ts']));
}

// Simulate all files being registry-primary (e.g. TypeScript on main branch).
isRegistryPrimaryMock.mockReturnValue(true);

const result = await runCrossFileBindingPropagation(
graph,
ctx,
exportedTypeMap,
new Set(allPaths),
allPaths.length,
'/repo',
Date.now(),
() => {},
);

// No files are candidates; no processCalls invocations.
expect(result).toBe(0);
expect(processCallsMock).not.toHaveBeenCalled();
});
});
Loading