Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 79 additions & 1 deletion gitnexus/src/core/ingestion/import-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import fs from 'fs/promises';
import path from 'path';
import { KnowledgeGraph } from '../graph/types.js';
import { ASTCache } from './ast-cache.js';
import { SymbolTable } from './symbol-table.js';
import Parser from 'tree-sitter';
import { loadParser, loadLanguage } from '../tree-sitter/parser-loader.js';
import { LANGUAGE_QUERIES } from './tree-sitter-queries.js';
Expand Down Expand Up @@ -728,6 +729,7 @@ export const processImports = async (
onProgress?: (current: number, total: number) => void,
repoRoot?: string,
allPaths?: string[],
symbolTable?: SymbolTable,
) => {
// Use allPaths (full repo) when available for cross-chunk resolution, else fall back to chunk files
const allFileList = allPaths ?? files.map(f => f.path);
Expand Down Expand Up @@ -773,6 +775,57 @@ export const processImports = async (
importMap.get(filePath)!.add(resolvedPath);
};

// Helper: add symbol-level IMPORTS edges for named imports
const addSymbolImportEdges = (filePath: string, resolvedPath: string, symbolNames?: string[]) => {
if (!symbolNames || !symbolTable) return;
const sourceId = generateId('File', filePath);
for (const name of symbolNames) {
const targetNodeId = symbolTable.lookupExact(resolvedPath, name);
if (!targetNodeId) continue;
const relId = generateId('IMPORTS', `${filePath}:${name}->${resolvedPath}`);
graph.addRelationship({
id: relId,
sourceId,
targetId: targetNodeId,
type: 'IMPORTS',
confidence: 1.0,
reason: '',
});
}
};

// Helper: extract imported symbol names from AST node (for sequential path)
const extractSymbolNames = (importNode: any, language: string): string[] => {
const names: string[] = [];
if (language === SupportedLanguages.Python) {
for (const child of importNode.namedChildren) {
if (child.type === 'module_name') continue;
if (child.type === 'wildcard_import') continue;
if (child.type === 'dotted_name' || child.type === 'identifier') {
names.push(child.text);
} else if (child.type === 'aliased_import') {
const nameNode = child.childForFieldName?.('name') || child.namedChildren?.[0];
if (nameNode) names.push(nameNode.text);
}
}
return names;
}
if (language === SupportedLanguages.TypeScript || language === SupportedLanguages.JavaScript) {
const importClause = importNode.namedChildren?.find((c: any) => c.type === 'import_clause');
const namedImports = importClause?.namedChildren?.find((c: any) => c.type === 'named_imports');
if (namedImports) {
for (const spec of namedImports.namedChildren) {
if (spec.type === 'import_specifier') {
const nameNode = spec.childForFieldName?.('name');
if (nameNode) names.push(nameNode.text);
}
}
}
return names;
}
return names;
};

for (let i = 0; i < files.length; i++) {
const file = files[i];
onProgress?.(i + 1, files.length);
Expand Down Expand Up @@ -844,6 +897,9 @@ export const processImports = async (
: sourceNode.text.replace(/['"<>]/g, '');
totalImportsFound++;

// Extract imported symbol names for symbol-level edges
const symbolNames = extractSymbolNames(captureMap['import'], language);

// ---- JVM languages (Java + Kotlin): handle wildcards and member imports ----
if (language === SupportedLanguages.Java || language === SupportedLanguages.Kotlin) {
const exts = language === SupportedLanguages.Java ? ['.java'] : KOTLIN_EXTENSIONS;
Expand Down Expand Up @@ -932,6 +988,7 @@ export const processImports = async (

if (resolvedPath) {
addImportEdge(file.path, resolvedPath);
addSymbolImportEdges(file.path, resolvedPath, symbolNames);
}
}
});
Expand All @@ -956,6 +1013,7 @@ export const processImportsFromExtracted = async (
onProgress?: (current: number, total: number) => void,
repoRoot?: string,
prebuiltCtx?: ImportResolutionContext,
symbolTable?: SymbolTable,
) => {
const ctx = prebuiltCtx ?? buildImportResolutionContext(files.map(f => f.path));
const { allFilePaths, allFileList, normalizedFileList, suffixIndex: index, resolveCache } = ctx;
Expand Down Expand Up @@ -991,6 +1049,25 @@ export const processImportsFromExtracted = async (
importMap.get(filePath)!.add(resolvedPath);
};

// Helper: add symbol-level IMPORTS edges for named imports
const addSymbolImportEdges = (filePath: string, resolvedPath: string, symbolNames?: string[]) => {
if (!symbolNames || !symbolTable) return;
const sourceId = generateId('File', filePath);
for (const name of symbolNames) {
const targetNodeId = symbolTable.lookupExact(resolvedPath, name);
if (!targetNodeId) continue;
const relId = generateId('IMPORTS', `${filePath}:${name}->${resolvedPath}`);
graph.addRelationship({
id: relId,
sourceId,
targetId: targetNodeId,
type: 'IMPORTS',
confidence: 1.0,
reason: '',
});
}
};

// Group by file for progress reporting (users see file count, not import count)
const importsByFile = new Map<string, ExtractedImport[]>();
for (const imp of extractedImports) {
Expand Down Expand Up @@ -1027,7 +1104,7 @@ export const processImportsFromExtracted = async (
await yieldToEventLoop();
}

for (const { rawImportPath, language } of fileImports) {
for (const { rawImportPath, language, symbolNames } of fileImports) {
totalImportsFound++;

// Check resolve cache first
Expand Down Expand Up @@ -1120,6 +1197,7 @@ export const processImportsFromExtracted = async (

if (resolvedPath) {
addImportEdge(filePath, resolvedPath);
addSymbolImportEdges(filePath, resolvedPath, symbolNames);
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions gitnexus/src/core/ingestion/parsing-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const DEFINITION_CAPTURE_KEYS = [
'definition.annotation',
'definition.constructor',
'definition.template',
'definition.instance',
] as const;

const getDefinitionNodeFromCaptures = (captureMap: Record<string, any>): any | null => {
Expand Down Expand Up @@ -365,6 +366,7 @@ const processParsingSequential = async (
else if (captureMap['definition.annotation']) nodeLabel = 'Annotation';
else if (captureMap['definition.constructor']) nodeLabel = 'Constructor';
else if (captureMap['definition.template']) nodeLabel = 'Template';
else if (captureMap['definition.instance']) nodeLabel = 'CodeElement';

const definitionNodeForRange = getDefinitionNodeFromCaptures(captureMap);
const startLine = definitionNodeForRange ? definitionNodeForRange.startPosition.row : (nameNode ? nameNode.startPosition.row : 0);
Expand Down
4 changes: 2 additions & 2 deletions gitnexus/src/core/ingestion/pipeline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ export const runPipelineFromRepo = async (

if (chunkWorkerData) {
// Imports
await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, importMap, undefined, repoPath, importCtx);
await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, importMap, undefined, repoPath, importCtx, symbolTable);
// Calls — resolve immediately, then free the array
if (chunkWorkerData.calls.length > 0) {
await processCallsFromExtracted(graph, chunkWorkerData.calls, symbolTable, importMap);
Expand All @@ -227,7 +227,7 @@ export const runPipelineFromRepo = async (
await processRoutesFromExtracted(graph, chunkWorkerData.routes, symbolTable, importMap);
}
} else {
await processImports(graph, chunkFiles, astCache, importMap, undefined, repoPath, allPaths);
await processImports(graph, chunkFiles, astCache, importMap, undefined, repoPath, allPaths, symbolTable);
sequentialChunkPaths.push(chunkPaths);
}

Expand Down
7 changes: 7 additions & 0 deletions gitnexus/src/core/ingestion/tree-sitter-queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,13 @@ export const PYTHON_QUERIES = `
function: (attribute
attribute: (identifier) @call.name)) @call

; Module-level singleton instances: service = ServiceClass()
(module
(expression_statement
(assignment
left: (identifier) @name
right: (call))) @definition.instance)

; Heritage queries - Python class inheritance
(class_definition
name: (identifier) @heritage.class
Expand Down
49 changes: 49 additions & 0 deletions gitnexus/src/core/ingestion/workers/parse-worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ export interface ExtractedImport {
filePath: string;
rawImportPath: string;
language: string;
symbolNames?: string[];
}

export interface ExtractedCall {
Expand Down Expand Up @@ -280,6 +281,47 @@ const FUNCTION_NODE_TYPES = new Set([
'init_declaration', 'deinit_declaration',
]);

/** Extract the specific symbol names from an import AST node.
* Python: `from X import foo, bar` → ['foo', 'bar']
* JS/TS: `import { foo, bar } from 'X'` → ['foo', 'bar']
* Returns empty array for bare module imports or unsupported patterns. */
const extractImportedSymbolNames = (importNode: any, language: string): string[] => {
const names: string[] = [];

if (language === SupportedLanguages.Python) {
// import_from_statement children: module_name (dotted_name) + name fields (dotted_name | aliased_import)
for (const child of importNode.namedChildren) {
if (child.type === 'module_name') continue;
if (child.type === 'wildcard_import') continue;
if (child.type === 'dotted_name' || child.type === 'identifier') {
names.push(child.text);
} else if (child.type === 'aliased_import') {
// from X import foo as bar — use original name 'foo'
const nameNode = child.childForFieldName?.('name') || child.namedChildren?.[0];
if (nameNode) names.push(nameNode.text);
}
}
return names;
}

if (language === SupportedLanguages.TypeScript || language === SupportedLanguages.JavaScript) {
// import_statement > import_clause > named_imports > import_specifier*
const importClause = importNode.namedChildren?.find((c: any) => c.type === 'import_clause');
const namedImports = importClause?.namedChildren?.find((c: any) => c.type === 'named_imports');
if (namedImports) {
for (const spec of namedImports.namedChildren) {
if (spec.type === 'import_specifier') {
const nameNode = spec.childForFieldName?.('name');
if (nameNode) names.push(nameNode.text);
}
}
}
return names;
}

return names;
};

/** Walk up AST to find enclosing function, return its generateId or null for top-level */
const findEnclosingFunctionId = (node: any, filePath: string): string | null => {
let current = node.parent;
Expand Down Expand Up @@ -480,6 +522,7 @@ const getLabelFromCaptures = (captureMap: Record<string, any>): string | null =>
if (captureMap['definition.annotation']) return 'Annotation';
if (captureMap['definition.constructor']) return 'Constructor';
if (captureMap['definition.template']) return 'Template';
if (captureMap['definition.instance']) return 'CodeElement';
return 'CodeElement';
};

Expand All @@ -506,6 +549,7 @@ const DEFINITION_CAPTURE_KEYS = [
'definition.annotation',
'definition.constructor',
'definition.template',
'definition.instance',
] as const;

const getDefinitionNodeFromCaptures = (captureMap: Record<string, any>): any | null => {
Expand Down Expand Up @@ -1135,10 +1179,15 @@ const processFileGroup = (
const rawImportPath = language === SupportedLanguages.Kotlin
? appendKotlinWildcard(captureMap['import.source'].text.replace(/['"<>]/g, ''), captureMap['import'])
: captureMap['import.source'].text.replace(/['"<>]/g, '');

// Extract imported symbol names from the AST node
const symbolNames = extractImportedSymbolNames(captureMap['import'], language);

result.imports.push({
filePath: file.path,
rawImportPath,
language: language,
symbolNames: symbolNames.length > 0 ? symbolNames : undefined,
});
continue;
}
Expand Down
Loading