Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions gitnexus/src/cli/analyze.ts
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,7 @@ const ANALYZE_CLI_ENV_KEYS = [
'GITNEXUS_VERBOSE',
'GITNEXUS_PROFILE_DEFERRED',
'GITNEXUS_PROFILE_DEFERRED_SLOW_MS',
'GITNEXUS_DEBUG_HEAP',
'GITNEXUS_MAX_FILE_SIZE',
'GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS',
'GITNEXUS_WAL_CHECKPOINT_THRESHOLD',
Expand Down
52 changes: 29 additions & 23 deletions gitnexus/src/core/ingestion/call-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,34 @@ const MAX_TYPE_NAME_LENGTH = 256;
* Consumed by the cross-file re-resolution / enrichment pass. */
export type ExportedTypeMap = Map<string, Map<string, string>>;

/** Record one exported graph node into the incremental ExportedTypeMap. */
export const accumulateExportedTypesFromParsedNode = (
result: ExportedTypeMap,
node: { id: string; properties?: Record<string, unknown> },
symbolTable: SymbolTableReader,
): void => {
if (!node.properties?.isExported) return;
if (!node.properties?.filePath || !node.properties?.name) return;
const filePath = node.properties.filePath as string;
const name = node.properties.name as string;
if (!name || name.length > MAX_TYPE_NAME_LENGTH) return;
const defs = symbolTable.lookupExactAll(filePath, name);
const def = defs.find((d) => d.nodeId === node.id) ?? defs[0];
if (!def) return;
const typeName = def.returnType ?? def.declaredType;
if (!typeName || typeName.length > MAX_TYPE_NAME_LENGTH) return;
const simpleType = extractReturnTypeName(typeName) ?? typeName;
if (!simpleType) return;
let fileExports = result.get(filePath);
if (!fileExports) {
fileExports = new Map();
result.set(filePath, fileExports);
}
if (fileExports.size < MAX_EXPORTS_PER_FILE) {
fileExports.set(name, simpleType);
}
};

/** Build ExportedTypeMap from graph nodes — used for the worker path where the
* sequential TypeEnv is not available in the main thread. Collects
* returnType/declaredType from exported symbols with known types. */
Expand All @@ -48,29 +76,7 @@ export function buildExportedTypeMapFromGraph(
): ExportedTypeMap {
const result: ExportedTypeMap = new Map();
graph.forEachNode((node) => {
if (!node.properties?.isExported) return;
if (!node.properties?.filePath || !node.properties?.name) return;
const filePath = node.properties.filePath as string;
const name = node.properties.name as string;
if (!name || name.length > MAX_TYPE_NAME_LENGTH) return;
// For callable symbols, use returnType; for properties/variables, use declaredType.
// Use lookupExactAll + nodeId match to handle same-name methods in different classes.
const defs = symbolTable.lookupExactAll(filePath, name);
const def = defs.find((d) => d.nodeId === node.id) ?? defs[0];
if (!def) return;
const typeName = def.returnType ?? def.declaredType;
if (!typeName || typeName.length > MAX_TYPE_NAME_LENGTH) return;
// Extract simple type name (strip Promise<>, etc.) — reuse shared utility
const simpleType = extractReturnTypeName(typeName) ?? typeName;
if (!simpleType) return;
let fileExports = result.get(filePath);
if (!fileExports) {
fileExports = new Map();
result.set(filePath, fileExports);
}
if (fileExports.size < MAX_EXPORTS_PER_FILE) {
fileExports.set(name, simpleType);
}
accumulateExportedTypesFromParsedNode(result, node, symbolTable);
});
return result;
}
Expand Down
24 changes: 14 additions & 10 deletions gitnexus/src/core/ingestion/parsing-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import {
} from './utils/ast-helpers.js';
import { detectFrameworkFromAST } from './framework-detection.js';
import { buildTypeEnv } from './type-env.js';
import { accumulateExportedTypesFromParsedNode, type ExportedTypeMap } from './call-processor.js';
import type { FieldInfo, FieldExtractorContext } from './field-types.js';
import type { MethodInfo } from './method-types.js';
import {
Expand Down Expand Up @@ -120,9 +121,8 @@ export const mergeChunkResults = (
graph: KnowledgeGraph,
symbolTable: SymbolTableWriter,
chunkResults: readonly ParseWorkerResult[],
exportedTypeMap?: ExportedTypeMap,
): WorkerExtractedData => {
const allCalls: ExtractedCall[] = [];
const allAssignments: ExtractedAssignment[] = [];
const allRoutes: ExtractedRoute[] = [];
const allFetchCalls: ExtractedFetchCall[] = [];
const allFetchWrapperDefs: FetchWrapperDef[] = [];
Expand All @@ -132,7 +132,6 @@ export const mergeChunkResults = (
const allRouterModuleAliases: ExtractedRouterModuleAlias[] = [];
const allToolDefs: ExtractedToolDef[] = [];
const allORMQueries: ExtractedORMQuery[] = [];
const allConstructorBindings: FileConstructorBindings[] = [];
const fileScopeBindingsByFile: FileScopeBindings[] = [];
const allParsedFiles: ParsedFile[] = [];

Expand Down Expand Up @@ -160,8 +159,11 @@ export const mergeChunkResults = (
qualifiedName: sym.qualifiedName,
});
}
for (const item of result.calls) allCalls.push(item);
for (const item of result.assignments) allAssignments.push(item);
if (exportedTypeMap) {
for (const node of result.nodes) {
accumulateExportedTypesFromParsedNode(exportedTypeMap, node, symbolTable);
}
}
for (const item of result.routes) allRoutes.push(item);
for (const item of result.fetchCalls) allFetchCalls.push(item);
for (const item of result.fetchWrapperDefs ?? []) allFetchWrapperDefs.push(item);
Expand All @@ -171,15 +173,14 @@ export const mergeChunkResults = (
for (const item of result.routerModuleAliases ?? []) allRouterModuleAliases.push(item);
for (const item of result.toolDefs) allToolDefs.push(item);
if (result.ormQueries) for (const item of result.ormQueries) allORMQueries.push(item);
for (const item of result.constructorBindings) allConstructorBindings.push(item);
if (result.fileScopeBindings)
for (const item of result.fileScopeBindings) fileScopeBindingsByFile.push(item);
if (result.parsedFiles) for (const item of result.parsedFiles) allParsedFiles.push(item);
}

return {
calls: allCalls,
assignments: allAssignments,
calls: [],
assignments: [],
routes: allRoutes,
fetchCalls: allFetchCalls,
fetchWrapperDefs: allFetchWrapperDefs,
Expand All @@ -189,7 +190,7 @@ export const mergeChunkResults = (
routerModuleAliases: allRouterModuleAliases,
toolDefs: allToolDefs,
ormQueries: allORMQueries,
constructorBindings: allConstructorBindings,
constructorBindings: [],
fileScopeBindings: fileScopeBindingsByFile,
parsedFiles: allParsedFiles,
};
Expand All @@ -210,6 +211,7 @@ const processParsingWithWorkers = async (
* `gitnexus/src/storage/parse-cache.ts`.
*/
outRawResults?: ParseWorkerResult[],
exportedTypeMap?: ExportedTypeMap,
): Promise<WorkerExtractedData> => {
// Filter to parseable files only
const parseableFiles: ParseWorkerInput[] = [];
Expand Down Expand Up @@ -254,7 +256,7 @@ const processParsingWithWorkers = async (
}

// Merge results from all workers into graph and symbol table.
const merged = mergeChunkResults(graph, symbolTable, chunkResults);
const merged = mergeChunkResults(graph, symbolTable, chunkResults, exportedTypeMap);

// Merge and log skipped languages from workers
const skippedLanguages = new Map<string, number>();
Expand Down Expand Up @@ -1017,6 +1019,7 @@ export const processParsing = async (
* artifact to cache there). See `gitnexus/src/storage/parse-cache.ts`.
*/
outRawResults?: ParseWorkerResult[],
exportedTypeMap?: ExportedTypeMap,
): Promise<WorkerExtractedData | null> => {
let lastProgress = 0;
const reportProgress: FileProgressCallback | undefined = onFileProgress
Expand Down Expand Up @@ -1062,6 +1065,7 @@ export const processParsing = async (
workerPool,
reportProgress,
outRawResults,
exportedTypeMap,
);
// Session-scoped quarantine (worker-pool resilience Layer 3): surface
// any files this pool has decided are unsafe for workers so the
Expand Down
35 changes: 30 additions & 5 deletions gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ import {
type BindingEntry,
} from '../binding-accumulator.js';
import { processParsing, mergeChunkResults } from '../parsing-processor.js';
import { fileContentHash, computeChunkHash } from '../../../storage/parse-cache.js';
import {
fileContentHash,
computeChunkHash,
loadParseCacheChunk,
persistParseCacheChunk,
} from '../../../storage/parse-cache.js';
import type { ParseWorkerResult } from '../workers/parse-worker.js';
import type { WorkerExtractedData } from '../parsing-processor.js';
import {
Expand Down Expand Up @@ -59,14 +64,15 @@ import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath, pathToFileURL } from 'node:url';

import { isDev } from '../utils/env.js';
import { isDev, parseTruthyEnv } from '../utils/env.js';
import { isVerboseIngestionEnabled } from '../utils/verbose.js';
import {
endTimer,
isDeferredResolutionProfileEnabled,
logDeferredProfile,
startTimer,
} from '../utils/deferred-resolution-profile.js';
import { logHeapProbe } from '../utils/heap-probe.js';
import { extractORMQueriesInline } from './orm-extraction.js';

import { logger } from '../../logger.js';
Expand Down Expand Up @@ -470,8 +476,18 @@ export async function runChunkedParseAndResolve(
// body, which re-read process.env on every iteration even though
// the env can't change mid-run.
const verboseThroughputLog = isDev || isVerboseIngestionEnabled();
const heapProbeEveryN =
parseTruthyEnv(process.env.GITNEXUS_DEBUG_HEAP) || isDeferredResolutionProfileEnabled()
? 25
: 0;

for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
if (heapProbeEveryN > 0 && chunkIdx > 0 && chunkIdx % heapProbeEveryN === 0) {
logHeapProbe(
`parse-chunk-${chunkIdx}`,
`nodes=${graph.nodeCount} parsedFiles=${allParsedFiles.length}`,
);
}
const chunkPaths = chunks[chunkIdx];
// Start wall-clock for the per-chunk throughput log emitted at end
// of this iteration. The gate is computed once above; here we just
Expand Down Expand Up @@ -506,7 +522,8 @@ export async function runChunkedParseAndResolve(
}

let chunkWorkerData: WorkerExtractedData | null;
const cachedRaw = chunkHash && parseCache ? parseCache.entries.get(chunkHash) : undefined;
const cachedRaw =
chunkHash && parseCache ? await loadParseCacheChunk(parseCache, chunkHash) : undefined;

// Track every chunk hash we touched so the orchestrator can
// prune stale entries (chunks whose composition no longer
Expand All @@ -517,7 +534,7 @@ export async function runChunkedParseAndResolve(
// Cache hit: replay the cached worker output through the same
// merge logic the live worker path uses.
chunkCacheHits++;
chunkWorkerData = mergeChunkResults(graph, symbolTable, cachedRaw);
chunkWorkerData = mergeChunkResults(graph, symbolTable, cachedRaw, exportedTypeMap);
if (isDev) {
logger.info(
`📦 parse-cache HIT: chunk ${chunkIdx + 1}/${numChunks} (${chunkFiles.length} files, ${chunkHash?.slice(0, 8) ?? 'unknown'})`,
Expand Down Expand Up @@ -573,6 +590,7 @@ export async function runChunkedParseAndResolve(
// Capture raw results only when we have a cache to write to —
// otherwise we'd retain extra arrays for nothing.
parseCache && chunkHash && activeWorkerPool ? rawResults : undefined,
exportedTypeMap,
);
} catch (err) {
if (!(err instanceof WorkerPoolInitializationError)) throw err;
Expand Down Expand Up @@ -621,7 +639,7 @@ export async function runChunkedParseAndResolve(
);
}
} else {
parseCache.entries.set(chunkHash, rawResults);
await persistParseCacheChunk(parseCache, chunkHash, rawResults);
if (isDev) {
logger.info(
`📦 parse-cache MISS+store: chunk ${chunkIdx + 1}/${numChunks} (${chunkFiles.length} files, ${chunkHash.slice(0, 8)})`,
Expand Down Expand Up @@ -724,6 +742,11 @@ export async function runChunkedParseAndResolve(
);
}

logHeapProbe(
'post-parse-chunks',
`routes=${allExtractedRoutes.length} nodes=${graph.nodeCount} parsedFiles=${allParsedFiles.length}`,
);

// Deferred end-of-loop extraction (moved out of the per-chunk block):
// 1. route resolution on all chunks' routes
// Resolution sees the full repo graph instead of just current-and-earlier
Expand All @@ -740,8 +763,10 @@ export async function runChunkedParseAndResolve(
// Populate `exportedTypeMap` from the in-progress graph so the post-parse
// enrichment pass (enrichExportedTypeMap) sees cross-file export types.
if (exportedTypeMap.size === 0 && graph.nodeCount > 0) {
logHeapProbe('pre-buildExportedTypeMapFromGraph');
const graphExports = buildExportedTypeMapFromGraph(graph, model.symbols);
for (const [fp, exports] of graphExports) exportedTypeMap.set(fp, exports);
logHeapProbe('post-buildExportedTypeMapFromGraph');
}
if (allExtractedRoutes.length > 0) {
const tRoutes = startTimer(deferredProfile);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
* Languages that resolve via the scope-resolution pipeline (RFC #909 Ring 3).
*
* Kept free of `ScopeResolver` imports so worker threads can gate
* `ParsedFile` emission without pulling in resolver implementations.
* Keep in sync with `SCOPE_RESOLVERS` in `registry.ts`.
*/

import { SupportedLanguages } from 'gitnexus-shared';

export const SCOPE_RESOLUTION_LANGUAGES: ReadonlySet<SupportedLanguages> = new Set([
SupportedLanguages.Python,
SupportedLanguages.CSharp,
SupportedLanguages.TypeScript,
SupportedLanguages.Go,
SupportedLanguages.Java,
SupportedLanguages.C,
SupportedLanguages.CPlusPlus,
SupportedLanguages.PHP,
SupportedLanguages.Rust,
SupportedLanguages.JavaScript,
SupportedLanguages.Kotlin,
SupportedLanguages.Ruby,
SupportedLanguages.Cobol,
SupportedLanguages.Swift,
SupportedLanguages.Dart,
SupportedLanguages.Vue,
]);

export const isScopeResolutionLanguage = (
lang: SupportedLanguages | null,
): lang is SupportedLanguages => lang !== null && SCOPE_RESOLUTION_LANGUAGES.has(lang);
21 changes: 21 additions & 0 deletions gitnexus/src/core/ingestion/utils/heap-probe.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/**
* Synchronous heap probes for large-repo OOM investigation (#1983).
*
* Writes to stderr (not pino) so lines flush under CI=1 / gdb attach.
* Enabled with `GITNEXUS_DEBUG_HEAP=1` or `GITNEXUS_PROFILE_DEFERRED=1`.
*/

import { parseTruthyEnv } from './env.js';
import { isDeferredResolutionProfileEnabled } from './deferred-resolution-profile.js';

export const isDebugHeapEnabled = (): boolean =>
parseTruthyEnv(process.env.GITNEXUS_DEBUG_HEAP) || isDeferredResolutionProfileEnabled();

export const heapUsedMb = (): number => Math.round(process.memoryUsage().heapUsed / 1024 / 1024);

/** Flush a one-line heap snapshot to stderr. */
export const logHeapProbe = (label: string, detail?: string): void => {
if (!isDebugHeapEnabled()) return;
const suffix = detail ? ` ${detail}` : '';
process.stderr.write(`[gitnexus-heap] ${label} used_mb=${heapUsedMb()}${suffix}\n`);
};
40 changes: 21 additions & 19 deletions gitnexus/src/core/ingestion/workers/parse-worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ import { extractTemplateArguments, templateArgumentsIdTag } from '../utils/templ
import type { LanguageProvider } from '../language-provider.js';
import type { ParsedFile } from 'gitnexus-shared';
import { extractParsedFile, type ScopeCaptureSourceKind } from '../scope-extractor-bridge.js';
import { isScopeResolutionLanguage } from '../scope-resolution/pipeline/migrated-languages.js';
import { extractLaravelRoutes, type ExtractedRoute } from '../route-extractors/laravel.js';

import { logger } from '../../logger.js';
Expand Down Expand Up @@ -1142,25 +1143,26 @@ const processFileGroup = (
const provider = getProvider(language);

// RFC #909 Ring 2: produce a `ParsedFile` for the new scope-based
// resolution pipeline. No-op (returns undefined) for every language
// today — only fires once a provider implements `emitScopeCaptures`.
// Runs BEFORE legacy extraction and its result is independent: a
// failure here is caught inside `extractParsedFile` and does NOT
// affect the legacy DAG path that follows.
const parsedFile = extractParsedFile(
provider,
parseContent,
file.path,
(message) => {
if (parentPort) {
parentPort.postMessage({ type: 'warning', message });
} else {
logger.warn(message);
}
},
tree,
scopeSourceKind,
);
// resolution pipeline. Skipped for registry-primary languages — the
// scope-resolution phase re-extracts from source on the main thread,
// which avoids retaining ~2× semantic model in RAM on huge repos (#1983).
let parsedFile: import('gitnexus-shared').ParsedFile | undefined;
if (!isScopeResolutionLanguage(language)) {
parsedFile = extractParsedFile(
provider,
parseContent,
file.path,
(message) => {
if (parentPort) {
parentPort.postMessage({ type: 'warning', message });
} else {
logger.warn(message);
}
},
tree,
scopeSourceKind,
);
}
if (parsedFile !== undefined) result.parsedFiles.push(parsedFile);

// Build per-file type environment + constructor bindings in a single AST walk.
Expand Down
Loading
Loading