Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
f1f1cea
Initial plan
Copilot May 10, 2026
99f5cf6
feat: implement Java scope-based resolution (RFC #909 Ring 3)
Copilot May 10, 2026
b62db4a
chore(autofix): apply prettier + eslint fixes via /autofix command
github-actions[bot] May 10, 2026
9766536
Merge remote-tracking branch 'origin/main' into copilot/migrate-java-…
Copilot May 11, 2026
9d17f82
Merge branch 'main' into copilot/migrate-java-scope-resolution
magyargergo May 11, 2026
b76a68f
Merge branch 'main' into copilot/migrate-java-scope-resolution
magyargergo May 11, 2026
a197e42
Merge branch 'main' into copilot/migrate-java-scope-resolution
magyargergo May 11, 2026
40b6fa8
fix: address review findings 1-4 — varargs arity, static import resol…
Copilot May 11, 2026
598d444
docs: document registry-primary parity status and CI visibility gap i…
Copilot May 11, 2026
81c8442
Merge branch 'main' into copilot/migrate-java-scope-resolution
magyargergo May 11, 2026
928ec54
fix: add generic type erasure fallback in stripGeneric + update scope…
Copilot May 11, 2026
a72cf6e
fix: improve stripGeneric fallback regex — use valid Java identifier …
Copilot May 11, 2026
2db403f
Merge branch 'main' into copilot/migrate-java-scope-resolution
magyargergo May 11, 2026
e69f4ef
fix: address adversarial review findings 1-6 — flaky test, wildcard i…
Copilot May 11, 2026
7d3ffb4
docs: add inline comment explaining stripQualifier/stripGeneric call …
Copilot May 11, 2026
85c8982
test: add varargs 0-arg fixture and strengthen wildcard import assert…
Copilot May 11, 2026
cc2f37e
chore(autofix): apply prettier + eslint fixes via /autofix command
github-actions[bot] May 12, 2026
ad801e3
Merge branch 'main' into copilot/migrate-java-scope-resolution
magyargergo May 12, 2026
6524fd9
Merge branch 'main' into copilot/migrate-java-scope-resolution
magyargergo May 12, 2026
211b065
Merge branch 'main' into copilot/migrate-java-scope-resolution
magyargergo May 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions gitnexus/src/core/ingestion/languages/java.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ import { javaMethodConfig } from '../method-extractors/configs/jvm.js';
import { createVariableExtractor } from '../variable-extractors/generic.js';
import { javaVariableConfig } from '../variable-extractors/configs/jvm.js';
import { createHeritageExtractor } from '../heritage-extractors/generic.js';
import {
emitJavaScopeCaptures,
interpretJavaImport,
interpretJavaTypeBinding,
javaBindingScopeFor,
javaImportOwningScope,
javaMergeBindings,
javaReceiverBinding,
javaArityCompatibility,
resolveJavaImportTarget,
} from './java/index.js';

export const javaProvider = defineLanguage({
id: SupportedLanguages.Java,
Expand Down Expand Up @@ -65,4 +76,15 @@ export const javaProvider = defineLanguage({
variableExtractor: createVariableExtractor(javaVariableConfig),
classExtractor: createClassExtractor(javaClassConfig),
heritageExtractor: createHeritageExtractor(SupportedLanguages.Java),

// ── RFC #909 Ring 3: scope-based resolution hooks ──
emitScopeCaptures: emitJavaScopeCaptures,
interpretImport: interpretJavaImport,
interpretTypeBinding: interpretJavaTypeBinding,
bindingScopeFor: javaBindingScopeFor,
importOwningScope: javaImportOwningScope,
mergeBindings: (_scope, bindings) => javaMergeBindings(bindings),
receiverBinding: javaReceiverBinding,
arityCompatibility: javaArityCompatibility,
resolveImportTarget: resolveJavaImportTarget,
});
49 changes: 49 additions & 0 deletions gitnexus/src/core/ingestion/languages/java/arity-metadata.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/**
* Extract Java arity metadata from a method-like tree-sitter node —
* `method_declaration` or `constructor_declaration`.
*
* Reuses `javaMethodConfig.extractParameters` so scope-extracted defs
* carry the same arity semantics as the legacy parse-worker path:
* - varargs (`...`) collapses `parameterCount` to `undefined`
* - `parameterTypes` collects declared type names; a literal
* `'varargs'` marker is appended for variadic methods so
* `javaArityCompatibility` can detect them.
*/

import type { SyntaxNode } from '../../utils/ast-helpers.js';
import { javaMethodConfig } from '../../method-extractors/configs/jvm.js';

export interface JavaArityMetadata {
readonly parameterCount: number | undefined;
readonly requiredParameterCount: number | undefined;
readonly parameterTypes: readonly string[] | undefined;
}

export function computeJavaArityMetadata(fnNode: SyntaxNode): JavaArityMetadata {
const params = javaMethodConfig.extractParameters?.(fnNode) ?? [];

let hasVariadic = false;
const types: string[] = [];
for (const p of params) {
if (p.isVariadic) hasVariadic = true;
if (p.type !== null) types.push(p.type);
}
if (hasVariadic) types.push('varargs');

const total = params.length;
// For varargs methods, `parameterCount` (max) is unknown — any number of
// trailing arguments is valid. But the fixed-prefix parameters (everything
// before the variadic `...` param) are still required, so we preserve that
// count in `requiredParameterCount` so `javaArityCompatibility` can reject
// calls that undersupply the fixed prefix (e.g. `f(int x, String... args)`
// called with 0 args).
const fixedCount = params.filter((p) => !p.isVariadic).length;
const parameterCount = hasVariadic ? undefined : total;
const requiredParameterCount = hasVariadic ? fixedCount : total;

return {
parameterCount,
requiredParameterCount,
parameterTypes: types.length > 0 ? types : undefined,
};
}
31 changes: 31 additions & 0 deletions gitnexus/src/core/ingestion/languages/java/arity.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/**
* Java arity check, accommodating varargs (`...`).
*
* Verdicts:
* - `'compatible'` — argCount matches parameterCount, OR varargs present.
* - `'incompatible'` — argCount mismatches with no varargs.
* - `'unknown'` — metadata absent / incomplete.
*/

import type { Callsite, SymbolDefinition } from 'gitnexus-shared';

export function javaArityCompatibility(
def: SymbolDefinition,
callsite: Callsite,
): 'compatible' | 'unknown' | 'incompatible' {
const max = def.parameterCount;
const min = def.requiredParameterCount;
if (max === undefined && min === undefined) return 'unknown';

const argCount = callsite.arity;
if (!Number.isFinite(argCount) || argCount < 0) return 'unknown';

const hasVarArgs =
def.parameterTypes !== undefined &&
def.parameterTypes.some((t) => t === 'varargs' || t.includes('...'));

if (min !== undefined && argCount < min) return 'incompatible';
if (max !== undefined && argCount > max && !hasVarArgs) return 'incompatible';

return 'compatible';
}
30 changes: 30 additions & 0 deletions gitnexus/src/core/ingestion/languages/java/cache-stats.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/**
* Dev-mode counters for the cross-phase scope-captures parse cache
* (Java mirror of `languages/csharp/cache-stats.ts`).
*
* Gated by `PROF_SCOPE_RESOLUTION=1`. Production builds fold every
* increment into dead code via the module-level `PROF` constant, so
* the hot path in `captures.ts` stays branch-free.
*/

const PROF = process.env.PROF_SCOPE_RESOLUTION === '1';

let CACHE_HITS = 0;
let CACHE_MISSES = 0;

export function recordCacheHit(): void {
if (PROF) CACHE_HITS++;
}

export function recordCacheMiss(): void {
if (PROF) CACHE_MISSES++;
}

export function getJavaCaptureCacheStats(): { hits: number; misses: number } {
return { hits: CACHE_HITS, misses: CACHE_MISSES };
}

export function resetJavaCaptureCacheStats(): void {
CACHE_HITS = 0;
CACHE_MISSES = 0;
}
235 changes: 235 additions & 0 deletions gitnexus/src/core/ingestion/languages/java/captures.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
/**
* `emitScopeCaptures` for Java.
*
* Drives the Java scope query against tree-sitter-java and groups raw
* matches into `CaptureMatch[]` for the central extractor. Layers:
*
* 1. **Decomposed import declarations** — each `import_declaration`
* is re-emitted with `@import.kind/source/name` markers.
* 2. **Receiver binding synthesis** — `this`/`super` type-bindings
* on instance methods.
* 3. **Arity metadata** on method/constructor declarations.
* 4. **Reference arity** on call sites.
*
* Pure given the input source text. No I/O, no globals consulted.
*/

import type { Capture, CaptureMatch } from 'gitnexus-shared';
import { findNodeAtRange, nodeToCapture, syntheticCapture } from '../../utils/ast-helpers.js';
import { splitImportDeclaration } from './import-decomposer.js';
import { computeJavaArityMetadata } from './arity-metadata.js';
import { synthesizeJavaReceiverBinding } from './receiver-binding.js';
import { getJavaParser, getJavaScopeQuery } from './query.js';
import { recordCacheHit, recordCacheMiss } from './cache-stats.js';
import { getTreeSitterBufferSize } from '../../constants.js';
import { parseSourceSafe } from '../../../tree-sitter/safe-parse.js';

/** Declaration anchors that carry function-like arity metadata. */
const FUNCTION_DECL_TAGS = ['@declaration.method', '@declaration.constructor'] as const;

/** tree-sitter-java node types that the method extractor accepts. */
const FUNCTION_NODE_TYPES = ['method_declaration', 'constructor_declaration'] as const;

/** Suppress read.member emissions when the field_access is already
* covered by a method_invocation (object of a call) or an
* assignment_expression (write target). */
function shouldEmitReadMember(memberNode: SyntaxNode): boolean {
const parent = memberNode.parent;
if (parent === null) return true;

switch (parent.type) {
case 'method_invocation':
// Don't emit read.member when the field_access is the object of a method_invocation
// (the method call already handles this relationship)
return parent.childForFieldName('object')?.id !== memberNode.id;
case 'assignment_expression':
return parent.childForFieldName('left')?.id !== memberNode.id;
default:
return true;
}
}

export function emitJavaScopeCaptures(
sourceText: string,
_filePath: string,
cachedTree?: unknown,
): readonly CaptureMatch[] {
let tree = cachedTree as ReturnType<ReturnType<typeof getJavaParser>['parse']> | undefined;
if (tree === undefined) {
tree = parseSourceSafe(getJavaParser(), sourceText, undefined, {
bufferSize: getTreeSitterBufferSize(sourceText),
});
recordCacheMiss();
} else {
recordCacheHit();
}

const rawMatches = getJavaScopeQuery().matches(tree.rootNode);
const out: CaptureMatch[] = [];

for (const m of rawMatches) {
const grouped: Record<string, Capture> = {};
for (const c of m.captures) {
const tag = '@' + c.name;
grouped[tag] = nodeToCapture(tag, c.node);
}
if (Object.keys(grouped).length === 0) continue;

// Decompose each `import_declaration`.
if (grouped['@import.statement'] !== undefined) {
const stmtCapture = grouped['@import.statement'];
const stmtNode = findNodeAtRange(tree.rootNode, stmtCapture.range, 'import_declaration');
if (stmtNode !== null) {
const decomposed = splitImportDeclaration(stmtNode);
if (decomposed !== null) {
out.push(decomposed);
continue;
}
}
out.push(grouped);
continue;
}

// Skip free-call matches that are actually member calls. The query
// matches ALL method_invocations as @reference.call.free (without
// negation) because tree-sitter-java's query engine drops !object
// patterns when a positive object: pattern exists for the same node
// type. Filter here: if the match has @reference.call.free but also
// has @reference.receiver, it's a member call — skip the free match
// (the separate @reference.call.member match covers it).
if (
grouped['@reference.call.free'] !== undefined &&
grouped['@reference.receiver'] !== undefined
) {
continue;
}

// Filter read.member when it's a child of method_invocation or assignment.
if (grouped['@reference.read.member'] !== undefined) {
const anchor = grouped['@reference.read.member'];
const memberNode = findNodeAtRange(tree.rootNode, anchor.range, 'field_access');
if (memberNode === null || !shouldEmitReadMember(memberNode)) {
continue;
}
}

// Synthesize `this` / `super` receiver type-bindings on every
// instance method-like.
if (grouped['@scope.function'] !== undefined) {
out.push(grouped);
const anchor = grouped['@scope.function']!;
const fnNode = findFunctionNode(tree.rootNode, anchor.range);
if (fnNode !== null) {
for (const synth of synthesizeJavaReceiverBinding(fnNode)) {
out.push(synth);
}
}
continue;
}

// Synthesize arity metadata on function-like declarations.
const declTag = FUNCTION_DECL_TAGS.find((t) => grouped[t] !== undefined);
if (declTag !== undefined) {
const anchor = grouped[declTag]!;
const fnNode = findFunctionNode(tree.rootNode, anchor.range);
if (fnNode !== null) {
const arity = computeJavaArityMetadata(fnNode);
if (arity.parameterCount !== undefined) {
grouped['@declaration.parameter-count'] = syntheticCapture(
'@declaration.parameter-count',
fnNode,
String(arity.parameterCount),
);
}
if (arity.requiredParameterCount !== undefined) {
grouped['@declaration.required-parameter-count'] = syntheticCapture(
'@declaration.required-parameter-count',
fnNode,
String(arity.requiredParameterCount),
);
}
if (arity.parameterTypes !== undefined) {
grouped['@declaration.parameter-types'] = syntheticCapture(
'@declaration.parameter-types',
fnNode,
JSON.stringify(arity.parameterTypes),
);
}
}
}

// Synthesize `@reference.arity` on every callsite.
const callTag = (
['@reference.call.free', '@reference.call.member', '@reference.call.constructor'] as const
).find((t) => grouped[t] !== undefined);
if (callTag !== undefined && grouped['@reference.arity'] === undefined) {
const anchor = grouped[callTag]!;
const callNode =
findNodeAtRange(tree.rootNode, anchor.range, 'method_invocation') ??
findNodeAtRange(tree.rootNode, anchor.range, 'object_creation_expression');
if (callNode !== null) {
const argList = callNode.childForFieldName('arguments');
const args =
argList === null
? []
: argList.namedChildren.filter((c) => c !== null && c.type !== 'comment');
grouped['@reference.arity'] = syntheticCapture(
'@reference.arity',
callNode,
String(args.length),
);

const argTypes = args.map((arg) => inferArgType(arg!));
grouped['@reference.parameter-types'] = syntheticCapture(
'@reference.parameter-types',
callNode,
JSON.stringify(argTypes),
);
}
}

out.push(grouped);
}

return out;
}

type SyntaxNode = ReturnType<ReturnType<typeof getJavaParser>['parse']>['rootNode'];

/** Infer a Java argument's static type from literal patterns. */
function inferArgType(argNode: SyntaxNode): string {
switch (argNode.type) {
case 'decimal_integer_literal':
case 'hex_integer_literal':
case 'octal_integer_literal':
case 'binary_integer_literal':
return 'int';
case 'decimal_floating_point_literal':
case 'hex_floating_point_literal':
return 'double';
case 'string_literal':
return 'String';
case 'character_literal':
return 'char';
case 'true':
case 'false':
return 'boolean';
case 'null_literal':
return 'null';
case 'object_creation_expression': {
const typeNode = argNode.childForFieldName('type');
return typeNode?.text ?? '';
}
default:
return '';
}
}

/** Find the first Java function-like node at the given range. */
function findFunctionNode(rootNode: SyntaxNode, range: Capture['range']): SyntaxNode | null {
for (const nodeType of FUNCTION_NODE_TYPES) {
const n = findNodeAtRange(rootNode, range, nodeType);
if (n !== null) return n as SyntaxNode;
}
return null;
}
Loading
Loading