From 62f42359d8b41306f59d26e54c7769fd084a1aeb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 08:36:00 +0000 Subject: [PATCH 1/7] Initial plan From 1d7772570352a3e8666c8ce0b4dd90ddd00dc4ad Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 08:48:24 +0000 Subject: [PATCH 2/7] feat(ingestion): add call-types, call-extractors factory, per-language configs, and wire into providers Agent-Logs-Url: https://github.com/abhigyanpatwari/GitNexus/sessions/893afa77-5b34-4e6b-a1dc-03034261fb36 Co-authored-by: magyargergo <11230420+magyargergo@users.noreply.github.com> --- .../call-extractors/configs/c-cpp.ts | 12 +++ .../call-extractors/configs/csharp.ts | 9 ++ .../ingestion/call-extractors/configs/dart.ts | 8 ++ .../ingestion/call-extractors/configs/go.ts | 8 ++ .../ingestion/call-extractors/configs/jvm.ts | 59 +++++++++++++ .../ingestion/call-extractors/configs/php.ts | 8 ++ .../call-extractors/configs/python.ts | 8 ++ .../ingestion/call-extractors/configs/ruby.ts | 8 ++ .../ingestion/call-extractors/configs/rust.ts | 8 ++ .../call-extractors/configs/swift.ts | 8 ++ .../configs/typescript-javascript.ts | 12 +++ .../core/ingestion/call-extractors/generic.ts | 84 +++++++++++++++++++ gitnexus/src/core/ingestion/call-types.ts | 80 ++++++++++++++++++ .../src/core/ingestion/language-provider.ts | 6 ++ .../src/core/ingestion/languages/c-cpp.ts | 4 + .../src/core/ingestion/languages/csharp.ts | 3 + gitnexus/src/core/ingestion/languages/dart.ts | 3 + gitnexus/src/core/ingestion/languages/go.ts | 3 + gitnexus/src/core/ingestion/languages/java.ts | 3 + .../src/core/ingestion/languages/kotlin.ts | 3 + gitnexus/src/core/ingestion/languages/php.ts | 3 + .../src/core/ingestion/languages/python.ts | 3 + gitnexus/src/core/ingestion/languages/ruby.ts | 3 + gitnexus/src/core/ingestion/languages/rust.ts | 3 + .../src/core/ingestion/languages/swift.ts | 3 + .../core/ingestion/languages/typescript.ts | 7 ++ gitnexus/src/core/ingestion/languages/vue.ts | 3 + 27 files changed, 362 insertions(+) create mode 100644 gitnexus/src/core/ingestion/call-extractors/configs/c-cpp.ts create mode 100644 gitnexus/src/core/ingestion/call-extractors/configs/csharp.ts create mode 100644 gitnexus/src/core/ingestion/call-extractors/configs/dart.ts create mode 100644 gitnexus/src/core/ingestion/call-extractors/configs/go.ts create mode 100644 gitnexus/src/core/ingestion/call-extractors/configs/jvm.ts create mode 100644 gitnexus/src/core/ingestion/call-extractors/configs/php.ts create mode 100644 gitnexus/src/core/ingestion/call-extractors/configs/python.ts create mode 100644 gitnexus/src/core/ingestion/call-extractors/configs/ruby.ts create mode 100644 gitnexus/src/core/ingestion/call-extractors/configs/rust.ts create mode 100644 gitnexus/src/core/ingestion/call-extractors/configs/swift.ts create mode 100644 gitnexus/src/core/ingestion/call-extractors/configs/typescript-javascript.ts create mode 100644 gitnexus/src/core/ingestion/call-extractors/generic.ts create mode 100644 gitnexus/src/core/ingestion/call-types.ts diff --git a/gitnexus/src/core/ingestion/call-extractors/configs/c-cpp.ts b/gitnexus/src/core/ingestion/call-extractors/configs/c-cpp.ts new file mode 100644 index 0000000000..02a6ed60f2 --- /dev/null +++ b/gitnexus/src/core/ingestion/call-extractors/configs/c-cpp.ts @@ -0,0 +1,12 @@ +// gitnexus/src/core/ingestion/call-extractors/configs/c-cpp.ts + +import { SupportedLanguages } from 'gitnexus-shared'; +import type { CallExtractionConfig } from '../../call-types.js'; + +export const cCallConfig: CallExtractionConfig = { + language: SupportedLanguages.C, +}; + +export const cppCallConfig: CallExtractionConfig = { + language: SupportedLanguages.CPlusPlus, +}; diff --git a/gitnexus/src/core/ingestion/call-extractors/configs/csharp.ts b/gitnexus/src/core/ingestion/call-extractors/configs/csharp.ts new file mode 100644 index 0000000000..e2c0415c21 --- /dev/null +++ b/gitnexus/src/core/ingestion/call-extractors/configs/csharp.ts @@ -0,0 +1,9 @@ +// gitnexus/src/core/ingestion/call-extractors/configs/csharp.ts + +import { SupportedLanguages } from 'gitnexus-shared'; +import type { CallExtractionConfig } from '../../call-types.js'; + +export const csharpCallConfig: CallExtractionConfig = { + language: SupportedLanguages.CSharp, + typeAsReceiverHeuristic: true, +}; diff --git a/gitnexus/src/core/ingestion/call-extractors/configs/dart.ts b/gitnexus/src/core/ingestion/call-extractors/configs/dart.ts new file mode 100644 index 0000000000..9d3c08def3 --- /dev/null +++ b/gitnexus/src/core/ingestion/call-extractors/configs/dart.ts @@ -0,0 +1,8 @@ +// gitnexus/src/core/ingestion/call-extractors/configs/dart.ts + +import { SupportedLanguages } from 'gitnexus-shared'; +import type { CallExtractionConfig } from '../../call-types.js'; + +export const dartCallConfig: CallExtractionConfig = { + language: SupportedLanguages.Dart, +}; diff --git a/gitnexus/src/core/ingestion/call-extractors/configs/go.ts b/gitnexus/src/core/ingestion/call-extractors/configs/go.ts new file mode 100644 index 0000000000..870fc88e02 --- /dev/null +++ b/gitnexus/src/core/ingestion/call-extractors/configs/go.ts @@ -0,0 +1,8 @@ +// gitnexus/src/core/ingestion/call-extractors/configs/go.ts + +import { SupportedLanguages } from 'gitnexus-shared'; +import type { CallExtractionConfig } from '../../call-types.js'; + +export const goCallConfig: CallExtractionConfig = { + language: SupportedLanguages.Go, +}; diff --git a/gitnexus/src/core/ingestion/call-extractors/configs/jvm.ts b/gitnexus/src/core/ingestion/call-extractors/configs/jvm.ts new file mode 100644 index 0000000000..51de04228d --- /dev/null +++ b/gitnexus/src/core/ingestion/call-extractors/configs/jvm.ts @@ -0,0 +1,59 @@ +// gitnexus/src/core/ingestion/call-extractors/configs/jvm.ts + +import { SupportedLanguages } from 'gitnexus-shared'; +import type { CallExtractionConfig, ExtractedCallSite } from '../../call-types.js'; +import type { SyntaxNode } from '../../utils/ast-helpers.js'; + +// --------------------------------------------------------------------------- +// Java method_reference (::) parsing — absorbs call-sites/java.ts +// --------------------------------------------------------------------------- + +/** + * Parse Java `method_reference` nodes (`expr::method`, `Type::new`, + * `this::m`, `super::m`). + */ +function parseJavaMethodReference(callNode: SyntaxNode): ExtractedCallSite | null { + if (callNode.type !== 'method_reference') return null; + + const recv = callNode.namedChild(0); + if (!recv) return null; + + // Type::new → constructor call + for (const c of callNode.children) { + if (c.type === 'new') { + if (recv.type !== 'identifier') return null; + return { calledName: recv.text, callForm: 'constructor' }; + } + } + + // expr::method → member call with receiver + const rhs = callNode.child(callNode.childCount - 1); + if (!rhs || rhs.type !== 'identifier') return null; + const methodName = rhs.text; + + if (recv.type === 'identifier') { + return { calledName: methodName, callForm: 'member', receiverName: recv.text }; + } + if (recv.type === 'this') { + return { calledName: methodName, callForm: 'member', receiverName: 'this' }; + } + if (recv.type === 'super') { + return { calledName: methodName, callForm: 'member', receiverName: 'super' }; + } + return null; +} + +// --------------------------------------------------------------------------- +// Configs +// --------------------------------------------------------------------------- + +export const javaCallConfig: CallExtractionConfig = { + language: SupportedLanguages.Java, + extractLanguageCallSite: parseJavaMethodReference, + typeAsReceiverHeuristic: true, +}; + +export const kotlinCallConfig: CallExtractionConfig = { + language: SupportedLanguages.Kotlin, + typeAsReceiverHeuristic: true, +}; diff --git a/gitnexus/src/core/ingestion/call-extractors/configs/php.ts b/gitnexus/src/core/ingestion/call-extractors/configs/php.ts new file mode 100644 index 0000000000..25ed0b9abc --- /dev/null +++ b/gitnexus/src/core/ingestion/call-extractors/configs/php.ts @@ -0,0 +1,8 @@ +// gitnexus/src/core/ingestion/call-extractors/configs/php.ts + +import { SupportedLanguages } from 'gitnexus-shared'; +import type { CallExtractionConfig } from '../../call-types.js'; + +export const phpCallConfig: CallExtractionConfig = { + language: SupportedLanguages.PHP, +}; diff --git a/gitnexus/src/core/ingestion/call-extractors/configs/python.ts b/gitnexus/src/core/ingestion/call-extractors/configs/python.ts new file mode 100644 index 0000000000..35ab873058 --- /dev/null +++ b/gitnexus/src/core/ingestion/call-extractors/configs/python.ts @@ -0,0 +1,8 @@ +// gitnexus/src/core/ingestion/call-extractors/configs/python.ts + +import { SupportedLanguages } from 'gitnexus-shared'; +import type { CallExtractionConfig } from '../../call-types.js'; + +export const pythonCallConfig: CallExtractionConfig = { + language: SupportedLanguages.Python, +}; diff --git a/gitnexus/src/core/ingestion/call-extractors/configs/ruby.ts b/gitnexus/src/core/ingestion/call-extractors/configs/ruby.ts new file mode 100644 index 0000000000..d829c5c894 --- /dev/null +++ b/gitnexus/src/core/ingestion/call-extractors/configs/ruby.ts @@ -0,0 +1,8 @@ +// gitnexus/src/core/ingestion/call-extractors/configs/ruby.ts + +import { SupportedLanguages } from 'gitnexus-shared'; +import type { CallExtractionConfig } from '../../call-types.js'; + +export const rubyCallConfig: CallExtractionConfig = { + language: SupportedLanguages.Ruby, +}; diff --git a/gitnexus/src/core/ingestion/call-extractors/configs/rust.ts b/gitnexus/src/core/ingestion/call-extractors/configs/rust.ts new file mode 100644 index 0000000000..03c48f7817 --- /dev/null +++ b/gitnexus/src/core/ingestion/call-extractors/configs/rust.ts @@ -0,0 +1,8 @@ +// gitnexus/src/core/ingestion/call-extractors/configs/rust.ts + +import { SupportedLanguages } from 'gitnexus-shared'; +import type { CallExtractionConfig } from '../../call-types.js'; + +export const rustCallConfig: CallExtractionConfig = { + language: SupportedLanguages.Rust, +}; diff --git a/gitnexus/src/core/ingestion/call-extractors/configs/swift.ts b/gitnexus/src/core/ingestion/call-extractors/configs/swift.ts new file mode 100644 index 0000000000..28f2c180cb --- /dev/null +++ b/gitnexus/src/core/ingestion/call-extractors/configs/swift.ts @@ -0,0 +1,8 @@ +// gitnexus/src/core/ingestion/call-extractors/configs/swift.ts + +import { SupportedLanguages } from 'gitnexus-shared'; +import type { CallExtractionConfig } from '../../call-types.js'; + +export const swiftCallConfig: CallExtractionConfig = { + language: SupportedLanguages.Swift, +}; diff --git a/gitnexus/src/core/ingestion/call-extractors/configs/typescript-javascript.ts b/gitnexus/src/core/ingestion/call-extractors/configs/typescript-javascript.ts new file mode 100644 index 0000000000..20a63cda93 --- /dev/null +++ b/gitnexus/src/core/ingestion/call-extractors/configs/typescript-javascript.ts @@ -0,0 +1,12 @@ +// gitnexus/src/core/ingestion/call-extractors/configs/typescript-javascript.ts + +import { SupportedLanguages } from 'gitnexus-shared'; +import type { CallExtractionConfig } from '../../call-types.js'; + +export const typescriptCallConfig: CallExtractionConfig = { + language: SupportedLanguages.TypeScript, +}; + +export const javascriptCallConfig: CallExtractionConfig = { + language: SupportedLanguages.JavaScript, +}; diff --git a/gitnexus/src/core/ingestion/call-extractors/generic.ts b/gitnexus/src/core/ingestion/call-extractors/generic.ts new file mode 100644 index 0000000000..1151cf2853 --- /dev/null +++ b/gitnexus/src/core/ingestion/call-extractors/generic.ts @@ -0,0 +1,84 @@ +// gitnexus/src/core/ingestion/call-extractors/generic.ts + +/** + * Generic table-driven call extractor factory. + * + * Mirrors method-extractors/generic.ts and field-extractors/generic.ts — + * define a config per language and generate extractors from configs. + * + * The factory converts a declarative {@link CallExtractionConfig} into a + * runtime {@link CallExtractor} whose `extract()` method: + * 1. Tries `config.extractLanguageCallSite(callNode)` for non-standard shapes. + * 2. Falls through to the generic path using shared utilities from + * `utils/call-analysis.ts` (`inferCallForm`, `extractReceiverName`, etc.). + */ + +import type { SyntaxNode } from '../utils/ast-helpers.js'; +import { + inferCallForm, + extractReceiverName, + extractReceiverNode, + extractMixedChain, + countCallArguments, +} from '../utils/call-analysis.js'; +import type { CallExtractor, CallExtractionConfig, ExtractedCallSite } from '../call-types.js'; + +/** + * Create a CallExtractor from a declarative config. + */ +export function createCallExtractor(config: CallExtractionConfig): CallExtractor { + return { + language: config.language, + + extract( + callNode: SyntaxNode, + callNameNode: SyntaxNode | undefined, + ): ExtractedCallSite | null { + // ── Path 1: Language-specific call site ────────────────────────── + // Non-standard call shapes (e.g. Java `::` method references) are + // handled entirely by the config hook. When it returns a result, + // the generic path is skipped — no argCount, no mixed chain. + if (config.extractLanguageCallSite) { + const seed = config.extractLanguageCallSite(callNode); + if (seed) { + return { + ...seed, + ...(config.typeAsReceiverHeuristic ? { typeAsReceiverHeuristic: true } : {}), + }; + } + } + + // ── Path 2: Generic extraction via @call.name ──────────────────── + if (!callNameNode) return null; + + const calledName = callNameNode.text; + const callForm = inferCallForm(callNode, callNameNode); + let receiverName = + callForm === 'member' ? extractReceiverName(callNameNode) : undefined; + let receiverMixedChain: ExtractedCallSite['receiverMixedChain']; + + // When the receiver is a complex expression (call chain, field chain, + // or mixed), extractReceiverName returns undefined. Walk the receiver + // node to build a unified mixed chain for deferred resolution. + if (callForm === 'member' && receiverName === undefined) { + const receiverNode = extractReceiverNode(callNameNode); + if (receiverNode) { + const extracted = extractMixedChain(receiverNode); + if (extracted && extracted.chain.length > 0) { + receiverMixedChain = extracted.chain; + receiverName = extracted.baseReceiverName; + } + } + } + + return { + calledName, + ...(callForm !== undefined ? { callForm } : {}), + ...(receiverName !== undefined ? { receiverName } : {}), + argCount: countCallArguments(callNode), + ...(receiverMixedChain !== undefined ? { receiverMixedChain } : {}), + ...(config.typeAsReceiverHeuristic ? { typeAsReceiverHeuristic: true } : {}), + }; + }, + }; +} diff --git a/gitnexus/src/core/ingestion/call-types.ts b/gitnexus/src/core/ingestion/call-types.ts new file mode 100644 index 0000000000..da175720de --- /dev/null +++ b/gitnexus/src/core/ingestion/call-types.ts @@ -0,0 +1,80 @@ +// gitnexus/src/core/ingestion/call-types.ts + +/** + * Types for the language-agnostic call extraction pipeline. + * + * Mirrors method-types.ts / field-types.ts: defines the domain interfaces + * consumed by createCallExtractor() and the per-language configs. + */ + +import type { SupportedLanguages } from 'gitnexus-shared'; +import type { SyntaxNode } from './utils/ast-helpers.js'; +import type { MixedChainStep } from './utils/call-analysis.js'; + +// --------------------------------------------------------------------------- +// Extracted result +// --------------------------------------------------------------------------- + +/** + * Per-node call extraction result. The parse worker enriches this with + * file-level context (filePath, sourceId, TypeEnv lookups, arg types) to + * produce the final `ExtractedCall` that enters the resolution pipeline. + */ +export interface ExtractedCallSite { + calledName: string; + callForm?: 'free' | 'member' | 'constructor'; + receiverName?: string; + argCount?: number; + /** Unified mixed chain for complex receivers (field + call chains). */ + receiverMixedChain?: MixedChainStep[]; + /** When true, the type-as-receiver heuristic applies: if receiverName + * starts with an uppercase letter and has no TypeEnv binding, treat it + * as a type name (e.g. Java `User::getName`). */ + typeAsReceiverHeuristic?: boolean; +} + +// --------------------------------------------------------------------------- +// Extractor interface (produced by createCallExtractor) +// --------------------------------------------------------------------------- + +export interface CallExtractor { + readonly language: SupportedLanguages; + /** + * Extract a call site from captured AST nodes. + * + * @param callNode The @call capture (call_expression, method_invocation, …) + * @param callNameNode The @call.name capture (identifier inside the call). + * May be undefined when the call shape has no name capture + * (e.g. Java method_reference via `::`). + * @returns Extracted call site, or null when no call can be derived. + */ + extract(callNode: SyntaxNode, callNameNode: SyntaxNode | undefined): ExtractedCallSite | null; +} + +// --------------------------------------------------------------------------- +// Config interface (one per language / language group) +// --------------------------------------------------------------------------- + +export interface CallExtractionConfig { + language: SupportedLanguages; + + /** + * Language-specific call site extraction. Called **before** the generic + * path. If it returns non-null, the generic `inferCallForm` / + * `extractReceiverName` path is skipped entirely. + * + * Use this for call shapes that don't follow the standard `@call` / + * `@call.name` pattern (e.g. Java `method_reference` via `::`). + */ + extractLanguageCallSite?: (callNode: SyntaxNode) => ExtractedCallSite | null; + + /** + * Whether the type-as-receiver heuristic applies for this language. + * When true and the receiver name starts with an uppercase letter, + * the receiver is treated as a type name when no TypeEnv binding exists. + * + * Applies to JVM and C# languages where `Type.method()` and `Type::method` + * are common patterns. + */ + typeAsReceiverHeuristic?: boolean; +} diff --git a/gitnexus/src/core/ingestion/language-provider.ts b/gitnexus/src/core/ingestion/language-provider.ts index 736c3b6660..239c1d4c56 100644 --- a/gitnexus/src/core/ingestion/language-provider.ts +++ b/gitnexus/src/core/ingestion/language-provider.ts @@ -12,6 +12,7 @@ import type { SupportedLanguages, MroStrategy } from 'gitnexus-shared'; import type { LanguageTypeConfig } from './type-extractors/types.js'; import type { CallRouter } from './call-routing.js'; +import type { CallExtractor } from './call-types.js'; import type { ClassExtractor } from './class-types.js'; import type { ExportChecker } from './export-detection.js'; import type { FieldExtractor } from './field-extractor.js'; @@ -155,6 +156,11 @@ interface LanguageProviderConfig { readonly mroStrategy?: MroStrategy; // ── Language-specific extraction hooks ──────────────────────────── + /** Call extractor for extracting call site information (calledName, callForm, + * receiverName, argCount, mixed chains) from @call / @call.name captures. + * Produced by createCallExtractor() with a per-language CallExtractionConfig. + * Default: undefined (falls back to inline extraction). */ + readonly callExtractor?: CallExtractor; /** Field extractor for extracting field/property definitions from class/struct * declarations. Produces FieldInfo[] with name, type, visibility, static, * readonly metadata. Default: undefined (no field extraction). */ diff --git a/gitnexus/src/core/ingestion/languages/c-cpp.ts b/gitnexus/src/core/ingestion/languages/c-cpp.ts index 5b887634fa..4ce8105fae 100644 --- a/gitnexus/src/core/ingestion/languages/c-cpp.ts +++ b/gitnexus/src/core/ingestion/languages/c-cpp.ts @@ -37,6 +37,8 @@ import { } from '../field-extractors/configs/c-cpp.js'; import { createMethodExtractor } from '../method-extractors/generic.js'; import { cMethodConfig, cppMethodConfig } from '../method-extractors/configs/c-cpp.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { cCallConfig, cppCallConfig } from '../call-extractors/configs/c-cpp.js'; const C_BUILT_INS: ReadonlySet = new Set([ 'printf', @@ -322,6 +324,7 @@ export const cProvider = defineLanguage({ exportChecker: cCppExportChecker, importResolver: resolveCImport, importSemantics: 'wildcard-transitive', + callExtractor: createCallExtractor(cCallConfig), fieldExtractor: createFieldExtractor(cFieldConfig), methodExtractor: createMethodExtractor({ ...cMethodConfig, @@ -341,6 +344,7 @@ export const cppProvider = defineLanguage({ importResolver: resolveCppImport, importSemantics: 'wildcard-transitive', mroStrategy: 'leftmost-base', + callExtractor: createCallExtractor(cppCallConfig), fieldExtractor: createFieldExtractor(cppFieldConfig), methodExtractor: createMethodExtractor({ ...cppMethodConfig, diff --git a/gitnexus/src/core/ingestion/languages/csharp.ts b/gitnexus/src/core/ingestion/languages/csharp.ts index 6ec3a6a8b3..4a0bee727a 100644 --- a/gitnexus/src/core/ingestion/languages/csharp.ts +++ b/gitnexus/src/core/ingestion/languages/csharp.ts @@ -14,6 +14,8 @@ import { csharpExportChecker } from '../export-detection.js'; import { resolveCSharpImport } from '../import-resolvers/csharp.js'; import { extractCSharpNamedBindings } from '../named-bindings/csharp.js'; import { CSHARP_QUERIES } from '../tree-sitter-queries.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { csharpCallConfig } from '../call-extractors/configs/csharp.js'; import { createFieldExtractor } from '../field-extractors/generic.js'; import { csharpConfig as csharpFieldConfig } from '../field-extractors/configs/csharp.js'; import { createMethodExtractor } from '../method-extractors/generic.js'; @@ -124,6 +126,7 @@ export const csharpProvider = defineLanguage({ namedBindingExtractor: extractCSharpNamedBindings, interfaceNamePattern: /^I[A-Z]/, mroStrategy: 'implements-split', + callExtractor: createCallExtractor(csharpCallConfig), fieldExtractor: createFieldExtractor(csharpFieldConfig), methodExtractor: createMethodExtractor(csharpMethodConfig), classExtractor: createClassExtractor({ diff --git a/gitnexus/src/core/ingestion/languages/dart.ts b/gitnexus/src/core/ingestion/languages/dart.ts index 7dc6769c6d..34a8467232 100644 --- a/gitnexus/src/core/ingestion/languages/dart.ts +++ b/gitnexus/src/core/ingestion/languages/dart.ts @@ -24,6 +24,8 @@ import { createFieldExtractor } from '../field-extractors/generic.js'; import { dartConfig as dartFieldConfig } from '../field-extractors/configs/dart.js'; import { createMethodExtractor } from '../method-extractors/generic.js'; import { dartMethodConfig } from '../method-extractors/configs/dart.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { dartCallConfig } from '../call-extractors/configs/dart.js'; /** * Resolve the enclosing function from a `function_body` node by looking at its @@ -91,6 +93,7 @@ export const dartProvider = defineLanguage({ exportChecker: dartExportChecker, importResolver: resolveDartImport, importSemantics: 'wildcard-leaf', + callExtractor: createCallExtractor(dartCallConfig), fieldExtractor: createFieldExtractor(dartFieldConfig), methodExtractor: createMethodExtractor(dartMethodConfig), classExtractor: createClassExtractor({ diff --git a/gitnexus/src/core/ingestion/languages/go.ts b/gitnexus/src/core/ingestion/languages/go.ts index 2a2b35f503..27c5304736 100644 --- a/gitnexus/src/core/ingestion/languages/go.ts +++ b/gitnexus/src/core/ingestion/languages/go.ts @@ -20,6 +20,8 @@ import { createFieldExtractor } from '../field-extractors/generic.js'; import { goConfig as goFieldConfig } from '../field-extractors/configs/go.js'; import { createMethodExtractor } from '../method-extractors/generic.js'; import { goMethodConfig } from '../method-extractors/configs/go.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { goCallConfig } from '../call-extractors/configs/go.js'; export const goProvider = defineLanguage({ id: SupportedLanguages.Go, @@ -29,6 +31,7 @@ export const goProvider = defineLanguage({ exportChecker: goExportChecker, importResolver: resolveGoImport, importSemantics: 'wildcard-leaf', + callExtractor: createCallExtractor(goCallConfig), fieldExtractor: createFieldExtractor(goFieldConfig), methodExtractor: createMethodExtractor(goMethodConfig), classExtractor: createClassExtractor({ diff --git a/gitnexus/src/core/ingestion/languages/java.ts b/gitnexus/src/core/ingestion/languages/java.ts index b9fab77f1e..a2e1dba03e 100644 --- a/gitnexus/src/core/ingestion/languages/java.ts +++ b/gitnexus/src/core/ingestion/languages/java.ts @@ -15,6 +15,8 @@ import { javaExportChecker } from '../export-detection.js'; import { resolveJavaImport } from '../import-resolvers/jvm.js'; import { extractJavaNamedBindings } from '../named-bindings/java.js'; import { JAVA_QUERIES } from '../tree-sitter-queries.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { javaCallConfig } from '../call-extractors/configs/jvm.js'; import { createFieldExtractor } from '../field-extractors/generic.js'; import { javaConfig } from '../field-extractors/configs/jvm.js'; import { createMethodExtractor } from '../method-extractors/generic.js'; @@ -30,6 +32,7 @@ export const javaProvider = defineLanguage({ namedBindingExtractor: extractJavaNamedBindings, interfaceNamePattern: /^I[A-Z]/, mroStrategy: 'implements-split', + callExtractor: createCallExtractor(javaCallConfig), fieldExtractor: createFieldExtractor(javaConfig), methodExtractor: createMethodExtractor(javaMethodConfig), classExtractor: createClassExtractor({ diff --git a/gitnexus/src/core/ingestion/languages/kotlin.ts b/gitnexus/src/core/ingestion/languages/kotlin.ts index 94c47e7ab7..12bde5d1d1 100644 --- a/gitnexus/src/core/ingestion/languages/kotlin.ts +++ b/gitnexus/src/core/ingestion/languages/kotlin.ts @@ -17,6 +17,8 @@ import { extractKotlinNamedBindings } from '../named-bindings/kotlin.js'; import { appendKotlinWildcard } from '../import-resolvers/jvm.js'; import { KOTLIN_QUERIES } from '../tree-sitter-queries.js'; import type { SyntaxNode } from '../utils/ast-helpers.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { kotlinCallConfig } from '../call-extractors/configs/jvm.js'; import { createFieldExtractor } from '../field-extractors/generic.js'; import { kotlinConfig } from '../field-extractors/configs/jvm.js'; import { createMethodExtractor } from '../method-extractors/generic.js'; @@ -105,6 +107,7 @@ export const kotlinProvider = defineLanguage({ namedBindingExtractor: extractKotlinNamedBindings, importPathPreprocessor: appendKotlinWildcard, mroStrategy: 'implements-split', + callExtractor: createCallExtractor(kotlinCallConfig), fieldExtractor: createFieldExtractor(kotlinConfig), methodExtractor: createMethodExtractor(kotlinMethodConfig), classExtractor: createClassExtractor({ diff --git a/gitnexus/src/core/ingestion/languages/php.ts b/gitnexus/src/core/ingestion/languages/php.ts index 642c6bd83d..0544022181 100644 --- a/gitnexus/src/core/ingestion/languages/php.ts +++ b/gitnexus/src/core/ingestion/languages/php.ts @@ -20,6 +20,8 @@ import { createFieldExtractor } from '../field-extractors/generic.js'; import { phpConfig as phpFieldConfig } from '../field-extractors/configs/php.js'; import { createMethodExtractor } from '../method-extractors/generic.js'; import { phpMethodConfig } from '../method-extractors/configs/php.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { phpCallConfig } from '../call-extractors/configs/php.js'; const BUILT_INS: ReadonlySet = new Set([ 'echo', @@ -237,6 +239,7 @@ export const phpProvider = defineLanguage({ exportChecker: phpExportChecker, importResolver: resolvePhpImport, namedBindingExtractor: extractPhpNamedBindings, + callExtractor: createCallExtractor(phpCallConfig), fieldExtractor: createFieldExtractor(phpFieldConfig), methodExtractor: createMethodExtractor(phpMethodConfig), classExtractor: createClassExtractor({ diff --git a/gitnexus/src/core/ingestion/languages/python.ts b/gitnexus/src/core/ingestion/languages/python.ts index 8c776d4a06..6b103c0ce8 100644 --- a/gitnexus/src/core/ingestion/languages/python.ts +++ b/gitnexus/src/core/ingestion/languages/python.ts @@ -22,6 +22,8 @@ import { createFieldExtractor } from '../field-extractors/generic.js'; import { pythonConfig as pythonFieldConfig } from '../field-extractors/configs/python.js'; import { createMethodExtractor } from '../method-extractors/generic.js'; import { pythonMethodConfig } from '../method-extractors/configs/python.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { pythonCallConfig } from '../call-extractors/configs/python.js'; const BUILT_INS: ReadonlySet = new Set([ 'print', @@ -63,6 +65,7 @@ export const pythonProvider = defineLanguage({ namedBindingExtractor: extractPythonNamedBindings, importSemantics: 'namespace', mroStrategy: 'c3', + callExtractor: createCallExtractor(pythonCallConfig), fieldExtractor: createFieldExtractor(pythonFieldConfig), methodExtractor: createMethodExtractor(pythonMethodConfig), classExtractor: createClassExtractor({ diff --git a/gitnexus/src/core/ingestion/languages/ruby.ts b/gitnexus/src/core/ingestion/languages/ruby.ts index a15b5439ca..6d4dbb44e6 100644 --- a/gitnexus/src/core/ingestion/languages/ruby.ts +++ b/gitnexus/src/core/ingestion/languages/ruby.ts @@ -21,6 +21,8 @@ import { createFieldExtractor } from '../field-extractors/generic.js'; import { rubyConfig as rubyFieldConfig } from '../field-extractors/configs/ruby.js'; import { createMethodExtractor } from '../method-extractors/generic.js'; import { rubyMethodConfig } from '../method-extractors/configs/ruby.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { rubyCallConfig } from '../call-extractors/configs/ruby.js'; /** Ruby method/singleton_method: extract name from 'name' field, label as Method. */ const rubyExtractFunctionName = ( @@ -108,6 +110,7 @@ export const rubyProvider = defineLanguage({ importResolver: resolveRubyImport, callRouter: routeRubyCall, importSemantics: 'wildcard-leaf', + callExtractor: createCallExtractor(rubyCallConfig), resolveEnclosingOwner(node) { // Ruby singleton_class (class << self) should resolve to the enclosing // class or module for owner/container resolution (HAS_METHOD edges, class IDs). diff --git a/gitnexus/src/core/ingestion/languages/rust.ts b/gitnexus/src/core/ingestion/languages/rust.ts index 5e664ef8f4..e7a5b5f050 100644 --- a/gitnexus/src/core/ingestion/languages/rust.ts +++ b/gitnexus/src/core/ingestion/languages/rust.ts @@ -24,6 +24,8 @@ import { createFieldExtractor } from '../field-extractors/generic.js'; import { rustConfig as rustFieldConfig } from '../field-extractors/configs/rust.js'; import { createMethodExtractor } from '../method-extractors/generic.js'; import { rustMethodConfig } from '../method-extractors/configs/rust.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { rustCallConfig } from '../call-extractors/configs/rust.js'; /** Rust impl_item: find the function_item child and extract its name as a Method. */ const rustExtractFunctionName = ( @@ -120,6 +122,7 @@ export const rustProvider = defineLanguage({ importResolver: resolveRustImport, namedBindingExtractor: extractRustNamedBindings, mroStrategy: 'qualified-syntax', + callExtractor: createCallExtractor(rustCallConfig), fieldExtractor: createFieldExtractor(rustFieldConfig), methodExtractor: createMethodExtractor({ ...rustMethodConfig, diff --git a/gitnexus/src/core/ingestion/languages/swift.ts b/gitnexus/src/core/ingestion/languages/swift.ts index 314c27c1dc..031d331c4e 100644 --- a/gitnexus/src/core/ingestion/languages/swift.ts +++ b/gitnexus/src/core/ingestion/languages/swift.ts @@ -24,6 +24,8 @@ import { createFieldExtractor } from '../field-extractors/generic.js'; import { swiftConfig as swiftFieldConfig } from '../field-extractors/configs/swift.js'; import { createMethodExtractor } from '../method-extractors/generic.js'; import { swiftMethodConfig } from '../method-extractors/configs/swift.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { swiftCallConfig } from '../call-extractors/configs/swift.js'; /** * Group Swift files by SPM target for implicit module visibility. @@ -240,6 +242,7 @@ export const swiftProvider = defineLanguage({ importResolver: resolveSwiftImport, importSemantics: 'wildcard-leaf', heritageDefaultEdge: 'IMPLEMENTS', + callExtractor: createCallExtractor(swiftCallConfig), fieldExtractor: createFieldExtractor(swiftFieldConfig), methodExtractor: createMethodExtractor({ ...swiftMethodConfig, diff --git a/gitnexus/src/core/ingestion/languages/typescript.ts b/gitnexus/src/core/ingestion/languages/typescript.ts index b680c5aa10..b1aa385cfd 100644 --- a/gitnexus/src/core/ingestion/languages/typescript.ts +++ b/gitnexus/src/core/ingestion/languages/typescript.ts @@ -26,6 +26,11 @@ import { typescriptMethodConfig, javascriptMethodConfig, } from '../method-extractors/configs/typescript-javascript.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { + typescriptCallConfig, + javascriptCallConfig, +} from '../call-extractors/configs/typescript-javascript.js'; /** * TypeScript/JavaScript: arrow_function and function_expression get their name @@ -173,6 +178,7 @@ export const typescriptProvider = defineLanguage({ exportChecker: tsExportChecker, importResolver: resolveTypescriptImport, namedBindingExtractor: extractTsNamedBindings, + callExtractor: createCallExtractor(typescriptCallConfig), fieldExtractor: typescriptFieldExtractor, methodExtractor: createMethodExtractor({ ...typescriptMethodConfig, @@ -190,6 +196,7 @@ export const javascriptProvider = defineLanguage({ exportChecker: tsExportChecker, importResolver: resolveJavascriptImport, namedBindingExtractor: extractTsNamedBindings, + callExtractor: createCallExtractor(javascriptCallConfig), fieldExtractor: createFieldExtractor(javascriptConfig), methodExtractor: createMethodExtractor({ ...javascriptMethodConfig, diff --git a/gitnexus/src/core/ingestion/languages/vue.ts b/gitnexus/src/core/ingestion/languages/vue.ts index 20ccdfa18d..ba5e5016b7 100644 --- a/gitnexus/src/core/ingestion/languages/vue.ts +++ b/gitnexus/src/core/ingestion/languages/vue.ts @@ -21,6 +21,8 @@ import { extractTsNamedBindings } from '../named-bindings/typescript.js'; import { TYPESCRIPT_QUERIES } from '../tree-sitter-queries.js'; import { typescriptFieldExtractor } from '../field-extractors/typescript.js'; import { BUILT_INS as TS_BUILT_INS } from './typescript.js'; +import { createCallExtractor } from '../call-extractors/generic.js'; +import { typescriptCallConfig } from '../call-extractors/configs/typescript-javascript.js'; const VUE_SPECIFIC_BUILT_INS = [ 'ref', @@ -80,6 +82,7 @@ export const vueProvider = defineLanguage({ exportChecker: tsExportChecker, importResolver: resolveVueImport, namedBindingExtractor: extractTsNamedBindings, + callExtractor: createCallExtractor(typescriptCallConfig), fieldExtractor: typescriptFieldExtractor, classExtractor: vueClassExtractor, builtInNames: VUE_BUILT_INS, From 9ad0c88581dde0c3b649104ae255ed4c895c17b5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 08:54:43 +0000 Subject: [PATCH 3/7] feat(ingestion): replace inline call extraction in parse-worker and call-processor, delete call-sites/ Agent-Logs-Url: https://github.com/abhigyanpatwari/GitNexus/sessions/893afa77-5b34-4e6b-a1dc-03034261fb36 Co-authored-by: magyargergo <11230420+magyargergo@users.noreply.github.com> --- gitnexus/src/core/ingestion/call-processor.ts | 132 ++++--- .../call-sites/extract-language-call-site.ts | 33 -- .../src/core/ingestion/call-sites/java.ts | 41 -- .../core/ingestion/workers/parse-worker.ts | 370 +++++++++--------- 4 files changed, 253 insertions(+), 323 deletions(-) delete mode 100644 gitnexus/src/core/ingestion/call-sites/extract-language-call-site.ts delete mode 100644 gitnexus/src/core/ingestion/call-sites/java.ts diff --git a/gitnexus/src/core/ingestion/call-processor.ts b/gitnexus/src/core/ingestion/call-processor.ts index ce0364a4d6..253974d629 100644 --- a/gitnexus/src/core/ingestion/call-processor.ts +++ b/gitnexus/src/core/ingestion/call-processor.ts @@ -48,7 +48,6 @@ import { extractTemplateComponents } from './vue-sfc-extractor.js'; import { extractReturnTypeName, stripNullable } from './type-extractors/shared.js'; import type { LiteralTypeInferrer } from './type-extractors/types.js'; import type { SyntaxNode } from './utils/ast-helpers.js'; -import { extractParsedCallSite } from './call-sites/extract-language-call-site.js'; import { lookupMethodByOwnerWithMRO } from './model/resolve.js'; /** Per-file resolved type bindings for exported symbols. @@ -910,74 +909,85 @@ export const processCalls = async ( if (!captureMap['call']) return; const callNode = captureMap['call']; - const languageSeed = extractParsedCallSite(language, callNode); - if (languageSeed) { - if (provider.isBuiltInName(languageSeed.calledName)) return; - - const sourceId = - findEnclosingFunction(callNode, file.path, ctx, provider) || - generateId('File', file.path); - const receiverName = - languageSeed.callForm === 'member' ? languageSeed.receiverName : undefined; - let receiverTypeName = - receiverName && typeEnv ? typeEnv.lookup(receiverName, callNode) : undefined; + const callExtractor = provider.callExtractor; + + // ── Language-specific call site (e.g. Java :: method references) ── + if (callExtractor) { + const langCallSite = callExtractor.extract(callNode, undefined); + if (langCallSite) { + if (provider.isBuiltInName(langCallSite.calledName)) return; + + const sourceId = + findEnclosingFunction(callNode, file.path, ctx, provider) || + generateId('File', file.path); + const receiverName = + langCallSite.callForm === 'member' ? langCallSite.receiverName : undefined; + let receiverTypeName = + receiverName && typeEnv ? typeEnv.lookup(receiverName, callNode) : undefined; - if ( - receiverName !== undefined && - receiverTypeName === undefined && - languageSeed.callForm === 'member' && - (language === 'java' || language === 'csharp' || language === 'kotlin') - ) { - const c0 = receiverName.charCodeAt(0); - if (c0 >= 65 && c0 <= 90) receiverTypeName = receiverName; - } - - const resolved = resolveCallTarget( - { - calledName: languageSeed.calledName, - callForm: languageSeed.callForm, - ...(receiverTypeName !== undefined ? { receiverTypeName } : {}), - ...(receiverName !== undefined ? { receiverName } : {}), - }, - file.path, - ctx, - undefined, - widenCache, - undefined, - heritageMap, - ); - - if (!resolved) return; - graph.addRelationship({ - id: generateId('CALLS', `${sourceId}:${languageSeed.calledName}->${resolved.nodeId}`), - sourceId, - targetId: resolved.nodeId, - type: 'CALLS', - confidence: resolved.confidence, - reason: resolved.reason, - }); + if ( + langCallSite.typeAsReceiverHeuristic && + receiverName !== undefined && + receiverTypeName === undefined && + langCallSite.callForm === 'member' + ) { + const c0 = receiverName.charCodeAt(0); + if (c0 >= 65 && c0 <= 90) receiverTypeName = receiverName; + } - if (heritageMap && languageSeed.callForm === 'member' && receiverTypeName) { - const implTargets = findInterfaceDispatchTargets( - languageSeed.calledName, - receiverTypeName, + const resolved = resolveCallTarget( + { + calledName: langCallSite.calledName, + callForm: langCallSite.callForm, + ...(receiverTypeName !== undefined ? { receiverTypeName } : {}), + ...(receiverName !== undefined ? { receiverName } : {}), + }, file.path, ctx, + undefined, + widenCache, + undefined, heritageMap, - resolved.nodeId, ); - for (const impl of implTargets) { - graph.addRelationship({ - id: generateId('CALLS', `${sourceId}:${languageSeed.calledName}->${impl.nodeId}`), - sourceId, - targetId: impl.nodeId, - type: 'CALLS', - confidence: impl.confidence, - reason: impl.reason, - }); + + if (!resolved) return; + graph.addRelationship({ + id: generateId( + 'CALLS', + `${sourceId}:${langCallSite.calledName}->${resolved.nodeId}`, + ), + sourceId, + targetId: resolved.nodeId, + type: 'CALLS', + confidence: resolved.confidence, + reason: resolved.reason, + }); + + if (heritageMap && langCallSite.callForm === 'member' && receiverTypeName) { + const implTargets = findInterfaceDispatchTargets( + langCallSite.calledName, + receiverTypeName, + file.path, + ctx, + heritageMap, + resolved.nodeId, + ); + for (const impl of implTargets) { + graph.addRelationship({ + id: generateId( + 'CALLS', + `${sourceId}:${langCallSite.calledName}->${impl.nodeId}`, + ), + sourceId, + targetId: impl.nodeId, + type: 'CALLS', + confidence: impl.confidence, + reason: impl.reason, + }); + } } + return; } - return; } const nameNode = captureMap['call.name']; diff --git a/gitnexus/src/core/ingestion/call-sites/extract-language-call-site.ts b/gitnexus/src/core/ingestion/call-sites/extract-language-call-site.ts deleted file mode 100644 index feed2cd70c..0000000000 --- a/gitnexus/src/core/ingestion/call-sites/extract-language-call-site.ts +++ /dev/null @@ -1,33 +0,0 @@ -/** Non-generic @call shapes → { calledName, callForm, receiverName? } (used from call-processor / parse-worker). */ - -import { SupportedLanguages } from '../../../config/supported-languages.js'; -import type { SyntaxNode } from '../utils/ast-helpers.js'; -import { parseJavaMethodReference } from './java.js'; - -export type ParsedCallSite = { - calledName: string; - callForm: 'free' | 'member' | 'constructor'; - receiverName?: string; -}; - -/** Non-null → seed replaces @call.name; null → use @call.name + inferCallForm / extractReceiverName. */ -export function extractParsedCallSite( - language: SupportedLanguages, - callNode: SyntaxNode, -): ParsedCallSite | null { - switch (language) { - case SupportedLanguages.Java: - if (callNode.type === 'method_reference') { - const parsed = parseJavaMethodReference(callNode); - if (!parsed) return null; - return { - calledName: parsed.calledName, - callForm: parsed.callForm, - ...(parsed.receiverName !== undefined ? { receiverName: parsed.receiverName } : {}), - }; - } - return null; - default: - return null; - } -} diff --git a/gitnexus/src/core/ingestion/call-sites/java.ts b/gitnexus/src/core/ingestion/call-sites/java.ts deleted file mode 100644 index e22c71cca3..0000000000 --- a/gitnexus/src/core/ingestion/call-sites/java.ts +++ /dev/null @@ -1,41 +0,0 @@ -/** Java `method_reference` (`::`) nodes (tree-sitter-java). `super::` still lacks TypeEnv receiver typing. */ - -import type { SyntaxNode } from '../utils/ast-helpers.js'; - -export type ParsedJavaMethodReference = { - calledName: string; - callForm: 'member' | 'constructor'; - receiverName?: string; -}; - -/** Parse `expr::method`, `Type::new`, `this::m`, `super::m`. */ -export const parseJavaMethodReference = ( - callNode: SyntaxNode, -): ParsedJavaMethodReference | null => { - if (callNode.type !== 'method_reference') return null; - - const recv = callNode.namedChild(0); - if (!recv) return null; - - for (const c of callNode.children) { - if (c.type === 'new') { - if (recv.type !== 'identifier') return null; - return { calledName: recv.text, callForm: 'constructor' }; - } - } - - const rhs = callNode.child(callNode.childCount - 1); - if (!rhs || rhs.type !== 'identifier') return null; - const methodName = rhs.text; - - if (recv.type === 'identifier') { - return { calledName: methodName, callForm: 'member', receiverName: recv.text }; - } - if (recv.type === 'this') { - return { calledName: methodName, callForm: 'member', receiverName: 'this' }; - } - if (recv.type === 'super') { - return { calledName: methodName, callForm: 'member', receiverName: 'super' }; - } - return null; -}; diff --git a/gitnexus/src/core/ingestion/workers/parse-worker.ts b/gitnexus/src/core/ingestion/workers/parse-worker.ts index bcafa38d2f..f36aef8e01 100644 --- a/gitnexus/src/core/ingestion/workers/parse-worker.ts +++ b/gitnexus/src/core/ingestion/workers/parse-worker.ts @@ -54,15 +54,9 @@ import { type SyntaxNode, } from '../utils/ast-helpers.js'; import { - countCallArguments, - inferCallForm, - extractReceiverName, - extractReceiverNode, - extractMixedChain, extractCallArgTypes, type MixedChainStep, } from '../utils/call-analysis.js'; -import { extractParsedCallSite } from '../call-sites/extract-language-call-site.js'; import { buildTypeEnv } from '../type-env.js'; import type { ConstructorBinding } from '../type-env.js'; import { detectFrameworkFromAST } from '../framework-detection.js'; @@ -1656,109 +1650,137 @@ const processFileGroup = ( // Extract call sites if (captureMap['call']) { - const callNode0 = captureMap['call']; - const languageSeed = extractParsedCallSite(language, callNode0); - if (languageSeed) { - if (!provider.isBuiltInName(languageSeed.calledName)) { - const sourceId = - findEnclosingFunctionId(callNode0, file.path, provider) || - generateId('File', file.path); - const receiverName = - languageSeed.callForm === 'member' ? languageSeed.receiverName : undefined; - let receiverTypeName = receiverName - ? typeEnv.lookup(receiverName, callNode0) - : undefined; - // Type-as-receiver (e.g. Java `User::getName`): no TypeEnv binding for the class name - if ( - receiverName !== undefined && - receiverTypeName === undefined && - languageSeed.callForm === 'member' && - (language === SupportedLanguages.Java || - language === SupportedLanguages.CSharp || - language === SupportedLanguages.Kotlin) - ) { - const c0 = receiverName.charCodeAt(0); - if (c0 >= 65 && c0 <= 90) receiverTypeName = receiverName; - } - result.calls.push({ - filePath: file.path, - calledName: languageSeed.calledName, - sourceId, - callForm: languageSeed.callForm, - ...(receiverName !== undefined ? { receiverName } : {}), - ...(receiverTypeName !== undefined ? { receiverTypeName } : {}), - }); - } - continue; - } - + const callNode = captureMap['call']; const callNameNode = captureMap['call.name']; - if (callNameNode) { - const calledName = callNameNode.text; - - // Dispatch: route language-specific calls (heritage, properties, imports) - const routed = callRouter?.(calledName, captureMap['call']); - if (routed) { - if (routed.kind === 'skip') continue; - - if (routed.kind === 'import') { - result.imports.push({ + const callExtractor = provider.callExtractor; + + if (callExtractor) { + // ── Path 1: Language-specific call site (bypasses routing) ──── + // Try language-specific extraction (e.g. Java `::` method references) + // without callNameNode. If successful, skip routing and the generic + // path entirely — mirrors the old extractParsedCallSite() behavior. + const langCallSite = callExtractor.extract(callNode, undefined); + if (langCallSite) { + if (!provider.isBuiltInName(langCallSite.calledName)) { + const sourceId = + findEnclosingFunctionId(callNode, file.path, provider) || + generateId('File', file.path); + const receiverName = + langCallSite.callForm === 'member' ? langCallSite.receiverName : undefined; + let receiverTypeName = receiverName + ? typeEnv.lookup(receiverName, callNode) + : undefined; + // Type-as-receiver heuristic (e.g. Java `User::getName`) + if ( + langCallSite.typeAsReceiverHeuristic && + receiverName !== undefined && + receiverTypeName === undefined && + langCallSite.callForm === 'member' + ) { + const c0 = receiverName.charCodeAt(0); + if (c0 >= 65 && c0 <= 90) receiverTypeName = receiverName; + } + result.calls.push({ filePath: file.path, - rawImportPath: routed.importPath, - language, + calledName: langCallSite.calledName, + sourceId, + callForm: langCallSite.callForm, + ...(receiverName !== undefined ? { receiverName } : {}), + ...(receiverTypeName !== undefined ? { receiverTypeName } : {}), }); - continue; } + continue; + } - if (routed.kind === 'heritage') { - for (const item of routed.items) { - result.heritage.push({ + // ── Path 2: Generic extraction via @call.name ──────────────── + if (callNameNode) { + const calledName = callNameNode.text; + + // Dispatch: route language-specific calls (heritage, properties, imports) + const routed = callRouter?.(calledName, captureMap['call']); + if (routed) { + if (routed.kind === 'skip') continue; + + if (routed.kind === 'import') { + result.imports.push({ filePath: file.path, - className: item.enclosingClass, - parentName: item.mixinName, - kind: item.heritageKind, + rawImportPath: routed.importPath, + language, }); + continue; } - continue; - } - if (routed.kind === 'properties') { - const propEnclosingInfo = cachedFindEnclosingClassInfo( - captureMap['call'], - file.path, - provider.resolveEnclosingOwner, - ); - const propEnclosingClassId = propEnclosingInfo?.classId ?? null; - // Enrich routed properties with FieldExtractor metadata - let routedFieldMap: Map | undefined; - if (provider.fieldExtractor && typeEnv) { - const classNode = findEnclosingClassNode(captureMap['call']); - if (classNode) { - routedFieldMap = getFieldInfo(classNode, provider, { - typeEnv, - symbolTable: NOOP_SYMBOL_TABLE, + if (routed.kind === 'heritage') { + for (const item of routed.items) { + result.heritage.push({ filePath: file.path, - language, + className: item.enclosingClass, + parentName: item.mixinName, + kind: item.heritageKind, }); } + continue; } - for (const item of routed.items) { - const routedFieldInfo = routedFieldMap?.get(item.propName); - const propQualifiedName = propEnclosingInfo - ? `${propEnclosingInfo.className}.${item.propName}` - : item.propName; - const nodeId = generateId('Property', `${file.path}:${propQualifiedName}`); - result.nodes.push({ - id: nodeId, - label: 'Property', - properties: { - name: item.propName, + + if (routed.kind === 'properties') { + const propEnclosingInfo = cachedFindEnclosingClassInfo( + captureMap['call'], + file.path, + provider.resolveEnclosingOwner, + ); + const propEnclosingClassId = propEnclosingInfo?.classId ?? null; + // Enrich routed properties with FieldExtractor metadata + let routedFieldMap: Map | undefined; + if (provider.fieldExtractor && typeEnv) { + const classNode = findEnclosingClassNode(captureMap['call']); + if (classNode) { + routedFieldMap = getFieldInfo(classNode, provider, { + typeEnv, + symbolTable: NOOP_SYMBOL_TABLE, + filePath: file.path, + language, + }); + } + } + for (const item of routed.items) { + const routedFieldInfo = routedFieldMap?.get(item.propName); + const propQualifiedName = propEnclosingInfo + ? `${propEnclosingInfo.className}.${item.propName}` + : item.propName; + const nodeId = generateId('Property', `${file.path}:${propQualifiedName}`); + result.nodes.push({ + id: nodeId, + label: 'Property', + properties: { + name: item.propName, + filePath: file.path, + startLine: item.startLine, + endLine: item.endLine, + language, + isExported: true, + description: item.accessorType, + ...(item.declaredType + ? { declaredType: item.declaredType } + : routedFieldInfo?.type + ? { declaredType: routedFieldInfo.type } + : {}), + ...(routedFieldInfo?.visibility !== undefined + ? { visibility: routedFieldInfo.visibility } + : {}), + ...(routedFieldInfo?.isStatic !== undefined + ? { isStatic: routedFieldInfo.isStatic } + : {}), + ...(routedFieldInfo?.isReadonly !== undefined + ? { isReadonly: routedFieldInfo.isReadonly } + : {}), + }, + }); + result.symbols.push({ filePath: file.path, - startLine: item.startLine, - endLine: item.endLine, - language, - isExported: true, - description: item.accessorType, + name: item.propName, + nodeId, + type: 'Property', + ...(propEnclosingClassId ? { ownerId: propEnclosingClassId } : {}), ...(item.declaredType ? { declaredType: item.declaredType } : routedFieldInfo?.type @@ -1773,111 +1795,83 @@ const processFileGroup = ( ...(routedFieldInfo?.isReadonly !== undefined ? { isReadonly: routedFieldInfo.isReadonly } : {}), - }, - }); - result.symbols.push({ - filePath: file.path, - name: item.propName, - nodeId, - type: 'Property', - ...(propEnclosingClassId ? { ownerId: propEnclosingClassId } : {}), - ...(item.declaredType - ? { declaredType: item.declaredType } - : routedFieldInfo?.type - ? { declaredType: routedFieldInfo.type } - : {}), - ...(routedFieldInfo?.visibility !== undefined - ? { visibility: routedFieldInfo.visibility } - : {}), - ...(routedFieldInfo?.isStatic !== undefined - ? { isStatic: routedFieldInfo.isStatic } - : {}), - ...(routedFieldInfo?.isReadonly !== undefined - ? { isReadonly: routedFieldInfo.isReadonly } - : {}), - }); - const fileId = generateId('File', file.path); - const relId = generateId('DEFINES', `${fileId}->${nodeId}`); - result.relationships.push({ - id: relId, - sourceId: fileId, - targetId: nodeId, - type: 'DEFINES', - confidence: 1.0, - reason: '', - }); - if (propEnclosingClassId) { + }); + const fileId = generateId('File', file.path); + const relId = generateId('DEFINES', `${fileId}->${nodeId}`); result.relationships.push({ - id: generateId('HAS_PROPERTY', `${propEnclosingClassId}->${nodeId}`), - sourceId: propEnclosingClassId, + id: relId, + sourceId: fileId, targetId: nodeId, - type: 'HAS_PROPERTY', + type: 'DEFINES', confidence: 1.0, reason: '', }); + if (propEnclosingClassId) { + result.relationships.push({ + id: generateId('HAS_PROPERTY', `${propEnclosingClassId}->${nodeId}`), + sourceId: propEnclosingClassId, + targetId: nodeId, + type: 'HAS_PROPERTY', + confidence: 1.0, + reason: '', + }); + } } + continue; } - continue; - } - // kind === 'call' — fall through to normal call processing below - } + // kind === 'call' — fall through to normal call processing below + } - if (!provider.isBuiltInName(calledName)) { - const callNode = captureMap['call']; - const sourceId = - findEnclosingFunctionId(callNode, file.path, provider) || - generateId('File', file.path); - const callForm = inferCallForm(callNode, callNameNode); - let receiverName = - callForm === 'member' ? extractReceiverName(callNameNode) : undefined; - let receiverTypeName = receiverName - ? typeEnv.lookup(receiverName, callNode) - : undefined; - let receiverMixedChain: MixedChainStep[] | undefined; - - // When the receiver is a complex expression (call chain, field chain, or mixed), - // extractReceiverName returns undefined. Walk the receiver node to build a unified - // mixed chain for deferred resolution in processCallsFromExtracted. - if (callForm === 'member' && receiverName === undefined && !receiverTypeName) { - const receiverNode = extractReceiverNode(callNameNode); - if (receiverNode) { - const extracted = extractMixedChain(receiverNode); - if (extracted && extracted.chain.length > 0) { - receiverMixedChain = extracted.chain; - receiverName = extracted.baseReceiverName; - // Try the type environment immediately for the base receiver - // (covers explicitly-typed locals and annotated parameters). - if (receiverName) { - receiverTypeName = typeEnv.lookup(receiverName, callNode); - } + if (!provider.isBuiltInName(calledName)) { + const callSite = callExtractor.extract(callNode, callNameNode); + if (callSite) { + const sourceId = + findEnclosingFunctionId(callNode, file.path, provider) || + generateId('File', file.path); + let receiverTypeName = callSite.receiverName + ? typeEnv.lookup(callSite.receiverName, callNode) + : undefined; + + // Type-as-receiver heuristic + if ( + callSite.typeAsReceiverHeuristic && + callSite.receiverName !== undefined && + receiverTypeName === undefined && + callSite.callForm === 'member' + ) { + const c0 = callSite.receiverName.charCodeAt(0); + if (c0 >= 65 && c0 <= 90) receiverTypeName = callSite.receiverName; } + + const inferLiteralType = provider.typeConfig?.inferLiteralType; + // Skip when no arg list / zero args: nothing to infer for overload typing + const argTypes = + inferLiteralType && + callSite.argCount !== undefined && + callSite.argCount > 0 + ? extractCallArgTypes(callNode, inferLiteralType, (varName, cn) => + typeEnv.lookup(varName, cn), + ) + : undefined; + + result.calls.push({ + filePath: file.path, + calledName: callSite.calledName, + sourceId, + ...(callSite.argCount !== undefined ? { argCount: callSite.argCount } : {}), + ...(callSite.callForm !== undefined ? { callForm: callSite.callForm } : {}), + ...(callSite.receiverName !== undefined + ? { receiverName: callSite.receiverName } + : {}), + ...(receiverTypeName !== undefined ? { receiverTypeName } : {}), + ...(callSite.receiverMixedChain !== undefined + ? { receiverMixedChain: callSite.receiverMixedChain } + : {}), + ...(argTypes !== undefined ? { argTypes } : {}), + }); } } - - const inferLiteralType = provider.typeConfig?.inferLiteralType; - const argCountForOverloadHints = countCallArguments(callNode); - // Skip when no arg list / zero args: nothing to infer for overload typing; saves AST walks + payload size. - const argTypes = - inferLiteralType && - argCountForOverloadHints !== undefined && - argCountForOverloadHints > 0 - ? extractCallArgTypes(callNode, inferLiteralType, (varName, cn) => - typeEnv.lookup(varName, cn), - ) - : undefined; - - result.calls.push({ - filePath: file.path, - calledName, - sourceId, - argCount: countCallArguments(callNode), - ...(callForm !== undefined ? { callForm } : {}), - ...(receiverName !== undefined ? { receiverName } : {}), - ...(receiverTypeName !== undefined ? { receiverTypeName } : {}), - ...(receiverMixedChain !== undefined ? { receiverMixedChain } : {}), - ...(argTypes !== undefined ? { argTypes } : {}), - }); } } continue; From 71171e2982e7d718d07f216bd6ec482490962dde Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 08:56:46 +0000 Subject: [PATCH 4/7] test(ingestion): add unit tests for call extraction configs and factory Agent-Logs-Url: https://github.com/abhigyanpatwari/GitNexus/sessions/893afa77-5b34-4e6b-a1dc-03034261fb36 Co-authored-by: magyargergo <11230420+magyargergo@users.noreply.github.com> --- gitnexus/test/unit/call-extraction.test.ts | 525 +++++++++++++++++++++ 1 file changed, 525 insertions(+) create mode 100644 gitnexus/test/unit/call-extraction.test.ts diff --git a/gitnexus/test/unit/call-extraction.test.ts b/gitnexus/test/unit/call-extraction.test.ts new file mode 100644 index 0000000000..dd7e40c56c --- /dev/null +++ b/gitnexus/test/unit/call-extraction.test.ts @@ -0,0 +1,525 @@ +import { describe, it, expect } from 'vitest'; +import { createCallExtractor } from '../../src/core/ingestion/call-extractors/generic.js'; +import { + javaCallConfig, + kotlinCallConfig, +} from '../../src/core/ingestion/call-extractors/configs/jvm.js'; +import { csharpCallConfig } from '../../src/core/ingestion/call-extractors/configs/csharp.js'; +import { + typescriptCallConfig, + javascriptCallConfig, +} from '../../src/core/ingestion/call-extractors/configs/typescript-javascript.js'; +import { + cCallConfig, + cppCallConfig, +} from '../../src/core/ingestion/call-extractors/configs/c-cpp.js'; +import { pythonCallConfig } from '../../src/core/ingestion/call-extractors/configs/python.js'; +import { rubyCallConfig } from '../../src/core/ingestion/call-extractors/configs/ruby.js'; +import { rustCallConfig } from '../../src/core/ingestion/call-extractors/configs/rust.js'; +import { dartCallConfig } from '../../src/core/ingestion/call-extractors/configs/dart.js'; +import { phpCallConfig } from '../../src/core/ingestion/call-extractors/configs/php.js'; +import { swiftCallConfig } from '../../src/core/ingestion/call-extractors/configs/swift.js'; +import { goCallConfig } from '../../src/core/ingestion/call-extractors/configs/go.js'; +import type { CallExtractionConfig } from '../../src/core/ingestion/call-types.js'; +import type { SyntaxNode } from '../../src/core/ingestion/utils/ast-helpers.js'; +import { SupportedLanguages } from '../../src/config/supported-languages.js'; +import { getProvider } from '../../src/core/ingestion/languages/index.js'; +import Parser from 'tree-sitter'; +import TypeScript from 'tree-sitter-typescript'; +import Python from 'tree-sitter-python'; +import Java from 'tree-sitter-java'; +import CSharp from 'tree-sitter-c-sharp'; +import Go from 'tree-sitter-go'; +import Rust from 'tree-sitter-rust'; +import CPP from 'tree-sitter-cpp'; +import PHP from 'tree-sitter-php'; +import Ruby from 'tree-sitter-ruby'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Parse code with a tree-sitter language and run the language's query to find + * @call / @call.name captures. + */ +function extractCallCaptures( + parser: Parser, + code: string, + language: SupportedLanguages, +): Array<{ callNode: SyntaxNode; nameNode: SyntaxNode | undefined; calledName: string | undefined }> { + const provider = getProvider(language); + const queryStr = provider.treeSitterQueries; + if (!queryStr) throw new Error(`No query for ${language}`); + + const tree = parser.parse(code); + const lang = parser.getLanguage(); + const query = new Parser.Query(lang, queryStr); + const matches = query.matches(tree.rootNode); + + const results: Array<{ + callNode: SyntaxNode; + nameNode: SyntaxNode | undefined; + calledName: string | undefined; + }> = []; + + for (const match of matches) { + const captureMap: Record = {}; + for (const c of match.captures) { + captureMap[c.name] = c.node; + } + if (captureMap['call']) { + results.push({ + callNode: captureMap['call'], + nameNode: captureMap['call.name'], + calledName: captureMap['call.name']?.text, + }); + } + } + + return results; +} + +// --------------------------------------------------------------------------- +// Factory construction tests +// --------------------------------------------------------------------------- + +describe('createCallExtractor', () => { + it('constructs all currently registered language configs', () => { + const configs: CallExtractionConfig[] = [ + javaCallConfig, + kotlinCallConfig, + csharpCallConfig, + typescriptCallConfig, + javascriptCallConfig, + cCallConfig, + cppCallConfig, + pythonCallConfig, + rubyCallConfig, + rustCallConfig, + dartCallConfig, + phpCallConfig, + swiftCallConfig, + goCallConfig, + ]; + for (const cfg of configs) { + expect( + () => createCallExtractor(cfg), + `config for ${cfg.language} must construct cleanly`, + ).not.toThrow(); + } + }); + + it('preserves language on the extractor', () => { + const extractor = createCallExtractor(javaCallConfig); + expect(extractor.language).toBe(SupportedLanguages.Java); + }); + + it('returns null when no callNameNode and no language seed', () => { + const extractor = createCallExtractor(typescriptCallConfig); + // A minimal stub SyntaxNode — extract should return null since + // there's no callNameNode and no language-specific hook + const stub = { type: 'call_expression' } as unknown as SyntaxNode; + expect(extractor.extract(stub, undefined)).toBeNull(); + }); +}); + +// --------------------------------------------------------------------------- +// LanguageProvider.callExtractor wiring +// --------------------------------------------------------------------------- + +describe('callExtractor on LanguageProvider', () => { + it('all tree-sitter providers have callExtractor defined', () => { + const languages: SupportedLanguages[] = [ + SupportedLanguages.TypeScript, + SupportedLanguages.JavaScript, + SupportedLanguages.Python, + SupportedLanguages.Java, + SupportedLanguages.Kotlin, + SupportedLanguages.Go, + SupportedLanguages.Rust, + SupportedLanguages.CSharp, + SupportedLanguages.C, + SupportedLanguages.CPlusPlus, + SupportedLanguages.PHP, + SupportedLanguages.Ruby, + SupportedLanguages.Swift, + SupportedLanguages.Dart, + SupportedLanguages.Vue, + ]; + for (const lang of languages) { + const provider = getProvider(lang); + expect(provider.callExtractor, `${lang} should have a callExtractor`).toBeDefined(); + } + }); +}); + +// --------------------------------------------------------------------------- +// Generic extraction via @call.name +// --------------------------------------------------------------------------- + +describe('generic call extraction', () => { + const parser = new Parser(); + + describe('TypeScript', () => { + const extractor = createCallExtractor(typescriptCallConfig); + + it('extracts free function call', () => { + parser.setLanguage(TypeScript.typescript); + const captures = extractCallCaptures(parser, 'doStuff()', SupportedLanguages.TypeScript); + const match = captures.find((c) => c.calledName === 'doStuff'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.calledName).toBe('doStuff'); + expect(result!.callForm).toBe('free'); + expect(result!.receiverName).toBeUndefined(); + }); + + it('extracts member call with receiver', () => { + parser.setLanguage(TypeScript.typescript); + const captures = extractCallCaptures(parser, 'user.save()', SupportedLanguages.TypeScript); + const match = captures.find((c) => c.calledName === 'save'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.calledName).toBe('save'); + expect(result!.callForm).toBe('member'); + expect(result!.receiverName).toBe('user'); + }); + + it('extracts constructor call', () => { + parser.setLanguage(TypeScript.typescript); + const captures = extractCallCaptures(parser, 'new User()', SupportedLanguages.TypeScript); + const match = captures.find((c) => c.calledName === 'User'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.calledName).toBe('User'); + expect(result!.callForm).toBe('constructor'); + }); + + it('extracts argCount', () => { + parser.setLanguage(TypeScript.typescript); + const captures = extractCallCaptures(parser, 'foo(a, b, c)', SupportedLanguages.TypeScript); + const match = captures.find((c) => c.calledName === 'foo'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.argCount).toBe(3); + }); + + it('does not set typeAsReceiverHeuristic', () => { + parser.setLanguage(TypeScript.typescript); + const captures = extractCallCaptures( + parser, + 'User.find()', + SupportedLanguages.TypeScript, + ); + const match = captures.find((c) => c.calledName === 'find'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result!.typeAsReceiverHeuristic).toBeFalsy(); + }); + }); + + describe('Python', () => { + const extractor = createCallExtractor(pythonCallConfig); + + it('extracts free function call', () => { + parser.setLanguage(Python); + const captures = extractCallCaptures(parser, 'do_stuff()', SupportedLanguages.Python); + const match = captures.find((c) => c.calledName === 'do_stuff'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.calledName).toBe('do_stuff'); + expect(result!.callForm).toBe('free'); + }); + + it('extracts member call', () => { + parser.setLanguage(Python); + const captures = extractCallCaptures( + parser, + 'user.save()', + SupportedLanguages.Python, + ); + const match = captures.find((c) => c.calledName === 'save'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.callForm).toBe('member'); + expect(result!.receiverName).toBe('user'); + }); + }); + + describe('Java', () => { + const extractor = createCallExtractor(javaCallConfig); + + it('extracts free function call', () => { + parser.setLanguage(Java); + const captures = extractCallCaptures( + parser, + 'class A { void m() { doStuff(); } }', + SupportedLanguages.Java, + ); + const match = captures.find((c) => c.calledName === 'doStuff'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.calledName).toBe('doStuff'); + expect(result!.callForm).toBe('free'); + }); + + it('extracts member call with receiver', () => { + parser.setLanguage(Java); + const captures = extractCallCaptures( + parser, + 'class A { void m() { user.save(); } }', + SupportedLanguages.Java, + ); + const match = captures.find((c) => c.calledName === 'save'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.callForm).toBe('member'); + expect(result!.receiverName).toBe('user'); + }); + + it('sets typeAsReceiverHeuristic', () => { + parser.setLanguage(Java); + const captures = extractCallCaptures( + parser, + 'class A { void m() { User.find(); } }', + SupportedLanguages.Java, + ); + const match = captures.find((c) => c.calledName === 'find'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.typeAsReceiverHeuristic).toBe(true); + }); + }); + + describe('C#', () => { + const extractor = createCallExtractor(csharpCallConfig); + + it('sets typeAsReceiverHeuristic', () => { + parser.setLanguage(CSharp); + const captures = extractCallCaptures( + parser, + 'class A { void M() { Console.WriteLine(); } }', + SupportedLanguages.CSharp, + ); + const match = captures.find((c) => c.calledName === 'WriteLine'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.typeAsReceiverHeuristic).toBe(true); + }); + }); + + describe('Go', () => { + const extractor = createCallExtractor(goCallConfig); + + it('extracts free function call', () => { + parser.setLanguage(Go); + const captures = extractCallCaptures( + parser, + 'package main\nfunc main() { doStuff() }', + SupportedLanguages.Go, + ); + const match = captures.find((c) => c.calledName === 'doStuff'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.calledName).toBe('doStuff'); + expect(result!.callForm).toBe('free'); + }); + }); + + describe('Rust', () => { + const extractor = createCallExtractor(rustCallConfig); + + it('extracts free function call', () => { + parser.setLanguage(Rust); + const captures = extractCallCaptures( + parser, + 'fn main() { do_stuff(); }', + SupportedLanguages.Rust, + ); + const match = captures.find((c) => c.calledName === 'do_stuff'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.calledName).toBe('do_stuff'); + }); + }); + + describe('C++', () => { + const extractor = createCallExtractor(cppCallConfig); + + it('extracts free function call', () => { + parser.setLanguage(CPP); + const captures = extractCallCaptures( + parser, + 'void f() { doStuff(); }', + SupportedLanguages.CPlusPlus, + ); + const match = captures.find((c) => c.calledName === 'doStuff'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.calledName).toBe('doStuff'); + expect(result!.callForm).toBe('free'); + }); + }); + + describe('PHP', () => { + const extractor = createCallExtractor(phpCallConfig); + + it('extracts free function call', () => { + parser.setLanguage(PHP.php); + const captures = extractCallCaptures( + parser, + '', + SupportedLanguages.PHP, + ); + const match = captures.find((c) => c.calledName === 'doStuff'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.calledName).toBe('doStuff'); + }); + }); + + describe('Ruby', () => { + const extractor = createCallExtractor(rubyCallConfig); + + it('extracts member call', () => { + parser.setLanguage(Ruby); + const captures = extractCallCaptures( + parser, + 'user.save()', + SupportedLanguages.Ruby, + ); + const match = captures.find((c) => c.calledName === 'save'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + expect(result!.callForm).toBe('member'); + expect(result!.receiverName).toBe('user'); + }); + }); +}); + +// --------------------------------------------------------------------------- +// Language-specific call site extraction (Java :: method references) +// --------------------------------------------------------------------------- + +describe('Java method_reference extraction', () => { + const parser = new Parser(); + parser.setLanguage(Java); + const extractor = createCallExtractor(javaCallConfig); + + it('extracts Type::new as constructor', () => { + const captures = extractCallCaptures( + parser, + 'class A { void m() { stream.map(User::new); } }', + SupportedLanguages.Java, + ); + // The method_reference should be captured as @call + const match = captures.find( + (c) => c.callNode.type === 'method_reference', + ); + if (match) { + const result = extractor.extract(match.callNode, undefined); + expect(result).not.toBeNull(); + expect(result!.calledName).toBe('User'); + expect(result!.callForm).toBe('constructor'); + } + }); + + it('extracts Type::method as member call', () => { + const captures = extractCallCaptures( + parser, + 'class A { void m() { stream.map(User::getName); } }', + SupportedLanguages.Java, + ); + const match = captures.find( + (c) => c.callNode.type === 'method_reference', + ); + if (match) { + const result = extractor.extract(match.callNode, undefined); + expect(result).not.toBeNull(); + expect(result!.calledName).toBe('getName'); + expect(result!.callForm).toBe('member'); + expect(result!.receiverName).toBe('User'); + expect(result!.typeAsReceiverHeuristic).toBe(true); + } + }); + + it('extracts this::method as member call', () => { + const captures = extractCallCaptures( + parser, + 'class A { void m() { stream.map(this::process); } }', + SupportedLanguages.Java, + ); + const match = captures.find( + (c) => c.callNode.type === 'method_reference', + ); + if (match) { + const result = extractor.extract(match.callNode, undefined); + expect(result).not.toBeNull(); + expect(result!.calledName).toBe('process'); + expect(result!.callForm).toBe('member'); + expect(result!.receiverName).toBe('this'); + } + }); + + it('extractLanguageCallSite returns null for non-method_reference nodes', () => { + const captures = extractCallCaptures( + parser, + 'class A { void m() { doStuff(); } }', + SupportedLanguages.Java, + ); + const match = captures.find((c) => c.calledName === 'doStuff'); + expect(match).toBeDefined(); + // Language seed should be null for regular calls + const langSeed = extractor.extract(match!.callNode, undefined); + expect(langSeed).toBeNull(); + // But full extraction with callNameNode should work + const full = extractor.extract(match!.callNode, match!.nameNode!); + expect(full).not.toBeNull(); + expect(full!.calledName).toBe('doStuff'); + }); +}); + +// --------------------------------------------------------------------------- +// typeAsReceiverHeuristic config flag +// --------------------------------------------------------------------------- + +describe('typeAsReceiverHeuristic config', () => { + it('JVM configs set typeAsReceiverHeuristic', () => { + expect(javaCallConfig.typeAsReceiverHeuristic).toBe(true); + expect(kotlinCallConfig.typeAsReceiverHeuristic).toBe(true); + }); + + it('C# config sets typeAsReceiverHeuristic', () => { + expect(csharpCallConfig.typeAsReceiverHeuristic).toBe(true); + }); + + it('other configs do not set typeAsReceiverHeuristic', () => { + expect(typescriptCallConfig.typeAsReceiverHeuristic).toBeFalsy(); + expect(javascriptCallConfig.typeAsReceiverHeuristic).toBeFalsy(); + expect(pythonCallConfig.typeAsReceiverHeuristic).toBeFalsy(); + expect(rubyCallConfig.typeAsReceiverHeuristic).toBeFalsy(); + expect(goCallConfig.typeAsReceiverHeuristic).toBeFalsy(); + expect(rustCallConfig.typeAsReceiverHeuristic).toBeFalsy(); + expect(cCallConfig.typeAsReceiverHeuristic).toBeFalsy(); + expect(cppCallConfig.typeAsReceiverHeuristic).toBeFalsy(); + expect(phpCallConfig.typeAsReceiverHeuristic).toBeFalsy(); + expect(dartCallConfig.typeAsReceiverHeuristic).toBeFalsy(); + expect(swiftCallConfig.typeAsReceiverHeuristic).toBeFalsy(); + }); +}); From a3db736b08c212d914319e6c249c8331d3063bb5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 09:29:42 +0000 Subject: [PATCH 5/7] style: fix prettier formatting in call-extractor files Agent-Logs-Url: https://github.com/abhigyanpatwari/GitNexus/sessions/d4b06f56-03b6-4fa4-801f-7ddcc6e81f13 --- .../core/ingestion/call-extractors/generic.ts | 8 +--- gitnexus/src/core/ingestion/call-processor.ts | 10 +---- .../core/ingestion/workers/parse-worker.ts | 9 +--- gitnexus/test/unit/call-extraction.test.ts | 42 ++++++------------- 4 files changed, 18 insertions(+), 51 deletions(-) diff --git a/gitnexus/src/core/ingestion/call-extractors/generic.ts b/gitnexus/src/core/ingestion/call-extractors/generic.ts index 1151cf2853..6a44398288 100644 --- a/gitnexus/src/core/ingestion/call-extractors/generic.ts +++ b/gitnexus/src/core/ingestion/call-extractors/generic.ts @@ -30,10 +30,7 @@ export function createCallExtractor(config: CallExtractionConfig): CallExtractor return { language: config.language, - extract( - callNode: SyntaxNode, - callNameNode: SyntaxNode | undefined, - ): ExtractedCallSite | null { + extract(callNode: SyntaxNode, callNameNode: SyntaxNode | undefined): ExtractedCallSite | null { // ── Path 1: Language-specific call site ────────────────────────── // Non-standard call shapes (e.g. Java `::` method references) are // handled entirely by the config hook. When it returns a result, @@ -53,8 +50,7 @@ export function createCallExtractor(config: CallExtractionConfig): CallExtractor const calledName = callNameNode.text; const callForm = inferCallForm(callNode, callNameNode); - let receiverName = - callForm === 'member' ? extractReceiverName(callNameNode) : undefined; + let receiverName = callForm === 'member' ? extractReceiverName(callNameNode) : undefined; let receiverMixedChain: ExtractedCallSite['receiverMixedChain']; // When the receiver is a complex expression (call chain, field chain, diff --git a/gitnexus/src/core/ingestion/call-processor.ts b/gitnexus/src/core/ingestion/call-processor.ts index 253974d629..bb7186697c 100644 --- a/gitnexus/src/core/ingestion/call-processor.ts +++ b/gitnexus/src/core/ingestion/call-processor.ts @@ -952,10 +952,7 @@ export const processCalls = async ( if (!resolved) return; graph.addRelationship({ - id: generateId( - 'CALLS', - `${sourceId}:${langCallSite.calledName}->${resolved.nodeId}`, - ), + id: generateId('CALLS', `${sourceId}:${langCallSite.calledName}->${resolved.nodeId}`), sourceId, targetId: resolved.nodeId, type: 'CALLS', @@ -974,10 +971,7 @@ export const processCalls = async ( ); for (const impl of implTargets) { graph.addRelationship({ - id: generateId( - 'CALLS', - `${sourceId}:${langCallSite.calledName}->${impl.nodeId}`, - ), + id: generateId('CALLS', `${sourceId}:${langCallSite.calledName}->${impl.nodeId}`), sourceId, targetId: impl.nodeId, type: 'CALLS', diff --git a/gitnexus/src/core/ingestion/workers/parse-worker.ts b/gitnexus/src/core/ingestion/workers/parse-worker.ts index f36aef8e01..fef916e81f 100644 --- a/gitnexus/src/core/ingestion/workers/parse-worker.ts +++ b/gitnexus/src/core/ingestion/workers/parse-worker.ts @@ -53,10 +53,7 @@ import { CLASS_CONTAINER_TYPES, type SyntaxNode, } from '../utils/ast-helpers.js'; -import { - extractCallArgTypes, - type MixedChainStep, -} from '../utils/call-analysis.js'; +import { extractCallArgTypes, type MixedChainStep } from '../utils/call-analysis.js'; import { buildTypeEnv } from '../type-env.js'; import type { ConstructorBinding } from '../type-env.js'; import { detectFrameworkFromAST } from '../framework-detection.js'; @@ -1847,9 +1844,7 @@ const processFileGroup = ( const inferLiteralType = provider.typeConfig?.inferLiteralType; // Skip when no arg list / zero args: nothing to infer for overload typing const argTypes = - inferLiteralType && - callSite.argCount !== undefined && - callSite.argCount > 0 + inferLiteralType && callSite.argCount !== undefined && callSite.argCount > 0 ? extractCallArgTypes(callNode, inferLiteralType, (varName, cn) => typeEnv.lookup(varName, cn), ) diff --git a/gitnexus/test/unit/call-extraction.test.ts b/gitnexus/test/unit/call-extraction.test.ts index dd7e40c56c..645fbd5782 100644 --- a/gitnexus/test/unit/call-extraction.test.ts +++ b/gitnexus/test/unit/call-extraction.test.ts @@ -47,7 +47,11 @@ function extractCallCaptures( parser: Parser, code: string, language: SupportedLanguages, -): Array<{ callNode: SyntaxNode; nameNode: SyntaxNode | undefined; calledName: string | undefined }> { +): Array<{ + callNode: SyntaxNode; + nameNode: SyntaxNode | undefined; + calledName: string | undefined; +}> { const provider = getProvider(language); const queryStr = provider.treeSitterQueries; if (!queryStr) throw new Error(`No query for ${language}`); @@ -211,11 +215,7 @@ describe('generic call extraction', () => { it('does not set typeAsReceiverHeuristic', () => { parser.setLanguage(TypeScript.typescript); - const captures = extractCallCaptures( - parser, - 'User.find()', - SupportedLanguages.TypeScript, - ); + const captures = extractCallCaptures(parser, 'User.find()', SupportedLanguages.TypeScript); const match = captures.find((c) => c.calledName === 'find'); expect(match).toBeDefined(); const result = extractor.extract(match!.callNode, match!.nameNode!); @@ -239,11 +239,7 @@ describe('generic call extraction', () => { it('extracts member call', () => { parser.setLanguage(Python); - const captures = extractCallCaptures( - parser, - 'user.save()', - SupportedLanguages.Python, - ); + const captures = extractCallCaptures(parser, 'user.save()', SupportedLanguages.Python); const match = captures.find((c) => c.calledName === 'save'); expect(match).toBeDefined(); const result = extractor.extract(match!.callNode, match!.nameNode!); @@ -380,11 +376,7 @@ describe('generic call extraction', () => { it('extracts free function call', () => { parser.setLanguage(PHP.php); - const captures = extractCallCaptures( - parser, - '', - SupportedLanguages.PHP, - ); + const captures = extractCallCaptures(parser, '', SupportedLanguages.PHP); const match = captures.find((c) => c.calledName === 'doStuff'); expect(match).toBeDefined(); const result = extractor.extract(match!.callNode, match!.nameNode!); @@ -398,11 +390,7 @@ describe('generic call extraction', () => { it('extracts member call', () => { parser.setLanguage(Ruby); - const captures = extractCallCaptures( - parser, - 'user.save()', - SupportedLanguages.Ruby, - ); + const captures = extractCallCaptures(parser, 'user.save()', SupportedLanguages.Ruby); const match = captures.find((c) => c.calledName === 'save'); expect(match).toBeDefined(); const result = extractor.extract(match!.callNode, match!.nameNode!); @@ -429,9 +417,7 @@ describe('Java method_reference extraction', () => { SupportedLanguages.Java, ); // The method_reference should be captured as @call - const match = captures.find( - (c) => c.callNode.type === 'method_reference', - ); + const match = captures.find((c) => c.callNode.type === 'method_reference'); if (match) { const result = extractor.extract(match.callNode, undefined); expect(result).not.toBeNull(); @@ -446,9 +432,7 @@ describe('Java method_reference extraction', () => { 'class A { void m() { stream.map(User::getName); } }', SupportedLanguages.Java, ); - const match = captures.find( - (c) => c.callNode.type === 'method_reference', - ); + const match = captures.find((c) => c.callNode.type === 'method_reference'); if (match) { const result = extractor.extract(match.callNode, undefined); expect(result).not.toBeNull(); @@ -465,9 +449,7 @@ describe('Java method_reference extraction', () => { 'class A { void m() { stream.map(this::process); } }', SupportedLanguages.Java, ); - const match = captures.find( - (c) => c.callNode.type === 'method_reference', - ); + const match = captures.find((c) => c.callNode.type === 'method_reference'); if (match) { const result = extractor.extract(match.callNode, undefined); expect(result).not.toBeNull(); From baa61e10ce9b4499c07cfb50544ca86dcf7fa74a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 10:27:00 +0000 Subject: [PATCH 6/7] =?UTF-8?q?fix:=20address=20review=20comments=20?= =?UTF-8?q?=E2=80=94=20doc=20comment,=20idempotency=20note,=20C#=20behavio?= =?UTF-8?q?ral=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent-Logs-Url: https://github.com/abhigyanpatwari/GitNexus/sessions/e53e650b-fae6-4551-ab25-cda28e4d647f --- .../core/ingestion/call-extractors/generic.ts | 6 +++++ .../src/core/ingestion/language-provider.ts | 3 ++- gitnexus/test/unit/call-extraction.test.ts | 22 ++++++++++++++++++- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/gitnexus/src/core/ingestion/call-extractors/generic.ts b/gitnexus/src/core/ingestion/call-extractors/generic.ts index 6a44398288..82e38457b5 100644 --- a/gitnexus/src/core/ingestion/call-extractors/generic.ts +++ b/gitnexus/src/core/ingestion/call-extractors/generic.ts @@ -35,6 +35,12 @@ export function createCallExtractor(config: CallExtractionConfig): CallExtractor // Non-standard call shapes (e.g. Java `::` method references) are // handled entirely by the config hook. When it returns a result, // the generic path is skipped — no argCount, no mixed chain. + // + // Note: `extractLanguageCallSite` is called on every `extract()` + // invocation — both `extract(callNode, undefined)` (parse-worker + // Path 1) and `extract(callNode, callNameNode)` (Path 2). + // Language hooks must therefore be idempotent and cheap (e.g. a + // single node-type check). if (config.extractLanguageCallSite) { const seed = config.extractLanguageCallSite(callNode); if (seed) { diff --git a/gitnexus/src/core/ingestion/language-provider.ts b/gitnexus/src/core/ingestion/language-provider.ts index 239c1d4c56..ef29c477c1 100644 --- a/gitnexus/src/core/ingestion/language-provider.ts +++ b/gitnexus/src/core/ingestion/language-provider.ts @@ -159,7 +159,8 @@ interface LanguageProviderConfig { /** Call extractor for extracting call site information (calledName, callForm, * receiverName, argCount, mixed chains) from @call / @call.name captures. * Produced by createCallExtractor() with a per-language CallExtractionConfig. - * Default: undefined (falls back to inline extraction). */ + * Default: undefined — if unset, no calls are extracted for this language. + * All tree-sitter providers MUST supply this. */ readonly callExtractor?: CallExtractor; /** Field extractor for extracting field/property definitions from class/struct * declarations. Produces FieldInfo[] with name, type, visibility, static, diff --git a/gitnexus/test/unit/call-extraction.test.ts b/gitnexus/test/unit/call-extraction.test.ts index 645fbd5782..b9c23e3631 100644 --- a/gitnexus/test/unit/call-extraction.test.ts +++ b/gitnexus/test/unit/call-extraction.test.ts @@ -300,7 +300,7 @@ describe('generic call extraction', () => { describe('C#', () => { const extractor = createCallExtractor(csharpCallConfig); - it('sets typeAsReceiverHeuristic', () => { + it('extracts member call with receiver and typeAsReceiverHeuristic', () => { parser.setLanguage(CSharp); const captures = extractCallCaptures( parser, @@ -311,7 +311,27 @@ describe('generic call extraction', () => { expect(match).toBeDefined(); const result = extractor.extract(match!.callNode, match!.nameNode!); expect(result).not.toBeNull(); + expect(result!.calledName).toBe('WriteLine'); + expect(result!.callForm).toBe('member'); + expect(result!.receiverName).toBe('Console'); + expect(result!.typeAsReceiverHeuristic).toBe(true); + }); + + it('does not set typeAsReceiverHeuristic flag on lowercase receivers', () => { + parser.setLanguage(CSharp); + const captures = extractCallCaptures( + parser, + 'class A { void M() { logger.Info(); } }', + SupportedLanguages.CSharp, + ); + const match = captures.find((c) => c.calledName === 'Info'); + expect(match).toBeDefined(); + const result = extractor.extract(match!.callNode, match!.nameNode!); + expect(result).not.toBeNull(); + // typeAsReceiverHeuristic is set on the config/extractor level (true for C#), + // but the uppercase check happens in parse-worker, not the extractor itself expect(result!.typeAsReceiverHeuristic).toBe(true); + expect(result!.receiverName).toBe('logger'); }); }); From 6a7145005584cf607910a6e8d3e4e0920bab4b66 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 10:29:25 +0000 Subject: [PATCH 7/7] fix: rename misleading test title, remove stale code reference in comment Agent-Logs-Url: https://github.com/abhigyanpatwari/GitNexus/sessions/e53e650b-fae6-4551-ab25-cda28e4d647f --- gitnexus/src/core/ingestion/workers/parse-worker.ts | 2 +- gitnexus/test/unit/call-extraction.test.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gitnexus/src/core/ingestion/workers/parse-worker.ts b/gitnexus/src/core/ingestion/workers/parse-worker.ts index fef916e81f..dc5f6a62dc 100644 --- a/gitnexus/src/core/ingestion/workers/parse-worker.ts +++ b/gitnexus/src/core/ingestion/workers/parse-worker.ts @@ -1655,7 +1655,7 @@ const processFileGroup = ( // ── Path 1: Language-specific call site (bypasses routing) ──── // Try language-specific extraction (e.g. Java `::` method references) // without callNameNode. If successful, skip routing and the generic - // path entirely — mirrors the old extractParsedCallSite() behavior. + // path entirely. const langCallSite = callExtractor.extract(callNode, undefined); if (langCallSite) { if (!provider.isBuiltInName(langCallSite.calledName)) { diff --git a/gitnexus/test/unit/call-extraction.test.ts b/gitnexus/test/unit/call-extraction.test.ts index b9c23e3631..85921f1f87 100644 --- a/gitnexus/test/unit/call-extraction.test.ts +++ b/gitnexus/test/unit/call-extraction.test.ts @@ -317,7 +317,7 @@ describe('generic call extraction', () => { expect(result!.typeAsReceiverHeuristic).toBe(true); }); - it('does not set typeAsReceiverHeuristic flag on lowercase receivers', () => { + it('sets typeAsReceiverHeuristic flag even for lowercase receivers', () => { parser.setLanguage(CSharp); const captures = extractCallCaptures( parser,