Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions gitnexus/bench/scope-capture/baselines.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,10 @@
"_rebaselined": "#1970 review + tri-review follow-ups: constructor-call retag, cascade calls, built-in suppression, enum scope, #1926 F24/F25, named-ctor dedup (crash fix), container-name binding suppression; heritage file-affinity resolution. Fixtures: member-call-contexts, constructor-body, named-constructor-body, heritage-name-collision, construct-cascade."
},
"java": {
"fingerprint": "d5cf68e9faf92fffd928c1ee6e584c72cc65918d1f1b5078abb3bfe09ac699bf",
"fingerprint": "9b29cafe32873b4902bda311bd089ffc04efe08f13557b966d29544be514080a",
"scaling_budget": 1.5,
"_rebaselined": "#1956 synth-widening: + java-iface-extends fixture; synthesizeJavaInheritanceReferences now ALSO walks interface_declaration extends_interfaces (interface IA extends IB, IC<T>), matching the #1940 legacy leg. (Earlier U2+review: java-qualified-base fixture covers 2- AND 3-segment qualified bases guarding the legacy end-anchor; synth tail-resolves scoped bases.) Linear (~1.03). (Earliest: java added to bench, exposed+fixed the O(n^2) findNodeAtRange root-walk; 3.09 -> ~0.99.) | #942: scope-resolution-only cleanup reworded fixture comments; capture byte-positions shift, capture LOGIC unchanged."
"_rebaselined": "#1956 synth-widening: + java-iface-extends fixture; synthesizeJavaInheritanceReferences now ALSO walks interface_declaration extends_interfaces (interface IA extends IB, IC<T>), matching the #1940 legacy leg. (Earlier U2+review: java-qualified-base fixture covers 2- AND 3-segment qualified bases guarding the legacy end-anchor; synth tail-resolves scoped bases.) Linear (~1.03). (Earliest: java added to bench, exposed+fixed the O(n^2) findNodeAtRange root-walk; 3.09 -> ~0.99.) | #942: scope-resolution-only cleanup reworded fixture comments; capture byte-positions shift, capture LOGIC unchanged.",
"_note": "#1928 / #2045: F35 adds qualified + qualified-generic constructor query captures (`new pkg.Foo()`, `new a.b.Foo()`, `new pkg.Box<T>()`); F38 synthesizes `@reference.call.constructor` on `super(...)`/`this(...)` explicit_constructor_invocation nodes; F41 generic-aware stripQualifier in interpret (type-binding normalization). + java-qualified-constructor and java-explicit-constructor fixtures. Pure capture-additive + fixture-corpus drift; scaling stays linear (~1.06)."
},
"typescript": {
"fingerprint": "3f44a4a6892698df2d145c8ff2812c3b318807648983c88aca28fbd694f172f9",
Expand Down
99 changes: 98 additions & 1 deletion gitnexus/src/core/ingestion/languages/java/captures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,104 @@ export function emitJavaScopeCaptures(
out.push(grouped);
}

return [...resolveVarTypeBindings(out), ...synthesizeJavaInheritanceReferences(tree.rootNode)];
return [
...resolveVarTypeBindings(out),
...synthesizeJavaInheritanceReferences(tree.rootNode),
...synthesizeJavaExplicitConstructorReferences(tree.rootNode),
];
}

/**
* Synthesize `@reference.call.constructor` captures for explicit constructor
* invocations — `super(...)` and `this(...)` (F38 #1928). tree-sitter-java
* models these as `explicit_constructor_invocation` nodes, which the scope
* query does not match, so the chained-constructor CALLS edges (subclass ctor →
* superclass ctor; ctor → sibling overload) were silently dropped.
*
* The grammar gives no constructor *name* at the call site (the child is a bare
* `(super)` / `(this)` token), so the target name is resolved structurally:
* - `this(...)` → the enclosing type's own simple name (constructor symbols
* are keyed by the declaring class name).
* - `super(...)` → the enclosing class's superclass simple-name tail (reusing
* `javaBaseLookupNameNode` so qualified/generic supers reduce
* to the bare class name, matching the EXTENDS synth). An
* implicit `Object` super (no `superclass` field) has no
* in-graph symbol, so it is skipped rather than emitting a
* dangling reference.
* Arity is attached so overloaded constructors disambiguate downstream, mirroring
* the call-site arity synthesized for `new X(...)`.
*/
function synthesizeJavaExplicitConstructorReferences(root: SyntaxNode): CaptureMatch[] {
const out: CaptureMatch[] = [];
const stack: SyntaxNode[] = [root];
while (stack.length > 0) {
const node = stack.pop()!;
if (node.type === 'explicit_constructor_invocation') {
emitJavaExplicitConstructorRef(out, node);
}
for (let i = 0; i < node.namedChildCount; i++) {
const child = node.namedChild(i);
if (child !== null) stack.push(child);
}
}
return out;
}

const TYPE_DECL_NODE_TYPES = new Set([
'class_declaration',
'enum_declaration',
'record_declaration',
]);

function emitJavaExplicitConstructorRef(out: CaptureMatch[], node: SyntaxNode): void {
const ctor = node.childForFieldName('constructor');
if (ctor === null) return;

const enclosingType = findEnclosingTypeDeclaration(node);
if (enclosingType === null) return;

let targetNameNode: SyntaxNode | null = null;
if (ctor.type === 'this') {
targetNameNode = enclosingType.childForFieldName('name');
} else if (ctor.type === 'super') {
// Only class_declaration carries a `superclass` field; enum/record cannot
// declare an explicit superclass, so `super(...)` there has no resolvable
// target symbol.
const superclass = enclosingType.childForFieldName('superclass');
if (superclass === null) return;
for (const base of superclass.namedChildren) {
if (base === null) continue;
const nameNode = javaBaseLookupNameNode(base);
if (nameNode !== null) {
targetNameNode = nameNode;
break;
}
}
}
if (targetNameNode === null) return;

const argList = node.childForFieldName('arguments');
const args =
argList === null
? []
: argList.namedChildren.filter(
(c) => c !== null && c.type !== 'block_comment' && c.type !== 'line_comment',
);

out.push({
'@reference.call.constructor': nodeToCapture('@reference.call.constructor', node),
'@reference.name': nodeToCapture('@reference.name', targetNameNode),
'@reference.arity': syntheticCapture('@reference.arity', node, String(args.length)),
});
}

function findEnclosingTypeDeclaration(node: SyntaxNode): SyntaxNode | null {
let cur: SyntaxNode | null = node.parent;
while (cur !== null) {
if (TYPE_DECL_NODE_TYPES.has(cur.type)) return cur;
cur = cur.parent;
}
return null;
}

/**
Expand Down
18 changes: 12 additions & 6 deletions gitnexus/src/core/ingestion/languages/java/interpret.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,14 @@ export function interpretJavaTypeBinding(captures: CaptureMatch): ParsedTypeBind
const typeCap = captures['@type-binding.type'];
if (nameCap === undefined || typeCap === undefined) return null;

// Strip qualifier first so that `com.example.BaseModel<T>` becomes
// `BaseModel<T>` before stripGeneric — the JVM-erasure fallback pattern
// requires an unqualified identifier at the start of the string.
const rawType = stripGeneric(stripQualifier(typeCap.text.trim()));
// Strip generics BEFORE the qualifier (F41 #1928). Stripping the qualifier
// first uses `lastIndexOf('.')`, which for a qualified *type argument*
// (`Map<String, com.example.User>`) cuts inside the generic and yields a
// corrupted `User>`. Unwrapping generics first reduces the string to a single
// (possibly qualified) class name, then the qualifier strip leaves the bare
// simple name. `stripGeneric`'s erasure fallback is qualifier-tolerant so a
// qualified generic base (`com.example.BaseModel<T>`) still reduces correctly.
const rawType = stripQualifier(stripGeneric(typeCap.text.trim()));

// Skip `var` — tree-sitter-java parses `var` as type_identifier with
// text "var". When used without a constructor initializer, there's no
Expand Down Expand Up @@ -127,8 +131,10 @@ function stripGeneric(text: string): string {
// `BaseModel<T>` → `BaseModel`, `Builder<Self>` → `Builder`.
// This mirrors JVM type erasure — the raw class name is the resolvable symbol.
// The pattern matches up to the first `<` to handle nested generics safely
// (e.g. `BaseModel<List<String>>` → `BaseModel`).
const fallback = text.match(/^([A-Za-z_$][A-Za-z0-9_$]*)<.+>$/s);
// (e.g. `BaseModel<List<String>>` → `BaseModel`). The base is allowed to be
// qualified (`com.example.BaseModel<T>` → `com.example.BaseModel`) since the
// caller strips the qualifier afterwards (F41 #1928).
const fallback = text.match(/^((?:[A-Za-z_$][A-Za-z0-9_$]*\.)*[A-Za-z_$][A-Za-z0-9_$]*)<.+>$/s);
if (fallback !== null) return fallback[1].trim();

return text;
Expand Down
17 changes: 16 additions & 1 deletion gitnexus/src/core/ingestion/languages/java/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,23 @@ const JAVA_SCOPE_QUERY = `
type: (generic_type
(type_identifier) @reference.name)) @reference.call.constructor

;; References — qualified constructor calls: new pkg.Foo(), new a.b.Foo() (F35 #1928)
;; tree-sitter-java parses \`pkg.Foo\` as a scoped_type_identifier whose final
;; child is the simple type. Bind that tail as @reference.name (trailing \`.\`
;; anchor = last child) so resolution targets \`Foo\`, not the raw \`pkg.Foo\` text.
;; Mirrors the TS/JS new-expression qualified-constructor capture.
(object_creation_expression
type: (scoped_type_identifier) @reference.call.constructor.qualified) @reference.call.constructor
type: (scoped_type_identifier
(type_identifier) @reference.name .) @reference.call.constructor.qualified) @reference.call.constructor

;; References — qualified + generic constructor calls: new pkg.Box<T>() (F35 #1928)
;; The base is a generic_type whose first child is a scoped_type_identifier, so
;; neither the simple-generic nor the plain-scoped arm above matches it. Bind the
;; scoped tail as @reference.name.
(object_creation_expression
type: (generic_type
(scoped_type_identifier
(type_identifier) @reference.name .) @reference.call.constructor.qualified)) @reference.call.constructor

;; References — method references: User::getName, obj::method
(method_reference
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ function isCallerAnchorLabel(label: NodeLabel): boolean {
);
}

/**
* Callables whose same-name overloads occupy distinct graph nodes keyed by
* parameter types / shape. Must mirror `isOverloadableCallable` in
* `node-lookup.ts` so registration and lookup agree (Constructor included —
* #1928 F38).
*/
function isOverloadableCallable(label: NodeLabel | undefined): boolean {
return label === 'Function' || label === 'Method' || label === 'Constructor';
}

/**
* Look up a `SymbolDefinition` in the graph node lookup.
*
Expand Down Expand Up @@ -107,7 +117,7 @@ export function resolveDefGraphId(
if (cHit !== undefined) return cHit;
}
if (
(def.type === 'Function' || def.type === 'Method') &&
isOverloadableCallable(def.type) &&
def.parameterTypes !== undefined &&
def.parameterTypeClasses !== undefined
) {
Expand All @@ -120,9 +130,12 @@ export function resolveDefGraphId(
}
// Overload disambiguation: when the def carries parameter types,
// try the parameter-typed key first so same-name same-arity
// overloads route to their distinct graph nodes.
// overloads route to their distinct graph nodes. Constructors are
// included so a `this(int)`/`super(int)` chain or `new Foo(int)`
// resolves to the matching ctor overload instead of first-wins
// collapsing onto another `Foo` ctor (a self-loop) — #1928 F38.
if (
(def.type === 'Function' || def.type === 'Method') &&
isOverloadableCallable(def.type) &&
def.parameterTypes !== undefined &&
def.parameterTypes.length > 0
) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,7 @@ export function buildGraphNodeLookup(graph: KnowledgeGraph): GraphNodeLookup {
// a parameter-types-suffixed key so resolveDefGraphId can find
// the right overload by matching its def's parameterTypes.
const pTypes = (props as { parameterTypes?: readonly string[] }).parameterTypes;
if (
pTypes !== undefined &&
pTypes.length > 0 &&
(node.label === 'Function' || node.label === 'Method')
) {
if (pTypes !== undefined && pTypes.length > 0 && isOverloadableCallable(node.label)) {
const pKey = qualifiedKey(
props.filePath,
node.label,
Expand All @@ -115,7 +111,7 @@ export function buildGraphNodeLookup(graph: KnowledgeGraph): GraphNodeLookup {
const pClasses = (props as { parameterTypeClasses?: readonly ParameterTypeClass[] })
.parameterTypeClasses;
const shapeTag = parameterShapeIdTag(pTypes, pClasses);
if (shapeTag !== '' && (node.label === 'Function' || node.label === 'Method')) {
if (shapeTag !== '' && isOverloadableCallable(node.label)) {
const shapeKey = qualifiedKey(props.filePath, node.label, `${keyQualified}${shapeTag}`);
if (!lookup.has(shapeKey)) lookup.set(shapeKey, node.id);
}
Expand Down Expand Up @@ -162,6 +158,18 @@ export function buildGraphNodeLookup(graph: KnowledgeGraph): GraphNodeLookup {
return lookup;
}

/**
* Callables whose same-name overloads must route to distinct graph nodes via
* the parameter-types / shape key. Constructors belong here too: a class with
* `Foo()` and `Foo(int)` mints distinct `#0`/`#1` Constructor nodes, and a
* `this(...)`/`super(...)` edge (or any `new Foo(args)`) must reach the right
* one. Without the overload key both ctor nodes collapse onto the first-wins
* qualified/simple key, turning a `this()` chain into a self-loop (#1928 F38).
*/
function isOverloadableCallable(label: NodeLabel): boolean {
return label === 'Function' || label === 'Method' || label === 'Constructor';
}

export function isLinkableLabel(label: NodeLabel): boolean {
return (
label === 'Function' ||
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package models;

public class Base {
public Base(int x) {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package models;

public class Child extends Base {
public Child() {
super(1);
}

public Child(int x) {
this();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
public class App {
public void make() {
pkg.Foo f = new pkg.Foo();
new pkg.Box<String>();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package pkg;

public class Box<T> {
public Box() {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package pkg;

public class Foo {
public Foo() {}

public void run() {}
}
76 changes: 76 additions & 0 deletions gitnexus/test/integration/resolvers/java-1928.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/**
* Java parsing-layer coverage gaps (#1928) — end-to-end resolution.
*
* - F35: qualified / qualified-generic constructor calls (`new pkg.Foo()`,
* `new pkg.Box<String>()`) resolve to the target constructor instead of
* dropping the edge on a corrupted `pkg.Foo` reference name.
* - F38: `super(...)` / `this(...)` explicit constructor invocations emit CALLS
* edges to the superclass / sibling constructor.
*/
import { describe, it, expect, beforeAll } from 'vitest';
import path from 'path';
import { FIXTURES, getRelationships, runPipelineFromRepo, type PipelineResult } from './helpers.js';

describe('Java qualified constructor resolution (F35 #1928)', () => {
let result: PipelineResult;

beforeAll(async () => {
result = await runPipelineFromRepo(path.join(FIXTURES, 'java-qualified-constructor'), () => {});
}, 60000);

it('resolves `new pkg.Foo()` to the Foo constructor', () => {
const calls = getRelationships(result, 'CALLS');
const fooCtor = calls.find((c) => c.target === 'Foo' && c.source === 'make');
expect(fooCtor).toBeDefined();
expect(fooCtor!.targetLabel).toBe('Constructor');
expect(fooCtor!.targetFilePath).toBe('pkg/Foo.java');
});

it('resolves `new pkg.Box<String>()` to the Box constructor', () => {
const calls = getRelationships(result, 'CALLS');
const boxCtor = calls.find((c) => c.target === 'Box' && c.source === 'make');
expect(boxCtor).toBeDefined();
expect(boxCtor!.targetLabel).toBe('Constructor');
expect(boxCtor!.targetFilePath).toBe('pkg/Box.java');
});

it('does not emit a CALLS edge to a corrupted `pkg.Foo` / `pkg.Box` name', () => {
const calls = getRelationships(result, 'CALLS');
expect(calls.some((c) => c.target === 'pkg.Foo' || c.target === 'pkg.Box')).toBe(false);
});
});

describe('Java explicit constructor invocation resolution (F38 #1928)', () => {
let result: PipelineResult;

beforeAll(async () => {
result = await runPipelineFromRepo(path.join(FIXTURES, 'java-explicit-constructor'), () => {});
}, 60000);

it('resolves `super(1)` in Child() to the Base constructor', () => {
const calls = getRelationships(result, 'CALLS');
const superCall = calls.find((c) => c.target === 'Base' && c.targetLabel === 'Constructor');
expect(superCall).toBeDefined();
expect(superCall!.source).toBe('Child');
expect(superCall!.targetFilePath).toBe('models/Base.java');
// Source is the arity-0 `Child()`, where `super(1)` lives.
expect(superCall!.rel.sourceId).toContain('Child.Child#0');
expect(superCall!.rel.targetId).toContain('Base.Base#1');
});

it('resolves `this()` in Child(int) to a DISTINCT Child constructor (no self-loop)', () => {
const calls = getRelationships(result, 'CALLS');
const thisCall = calls.find(
(c) => c.target === 'Child' && c.targetLabel === 'Constructor' && c.source === 'Child',

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[P2 · reproduced] F38 this() produces a self-loop, and this assertion masks it.

Running the pipeline on this fixture, Child(int x){ this(); } emits CALLS Child()@L3 → Child()@L3 (selfLoop=true), not the intended Child(int)@L7 → Child()@L3 — both endpoints collapse onto the first Child constructor and the real ctor→ctor edge is lost (super(1)→Base resolves correctly).

This find(...) matches by name only (source==='Child' && target==='Child'), which a self-loop satisfies, so the suite stays green on a broken edge.

Root cause is pre-existing and outside this diff: scope-resolution/graph-bridge/node-lookup.ts:101-135 registers overload-disambiguation keys only for Function/Method, never Constructor, so same-name constructors first-wins-collapse to one graph node.

Fix: at minimum assert the edge connects distinct constructors (e.g. by startLine/arity or node id) so this test goes red and exposes the miss; the full fix extends the node-lookup/ids overload keys to include Constructor.

);
expect(thisCall).toBeDefined();
expect(thisCall!.targetFilePath).toBe('models/Child.java');
// The edge must connect DISTINCT constructors: the caller `Child(int)` (#1)
// chains to `Child()` (#0). A self-loop (`#0 → #0`) — the bug this PR's
// review caught (#1928 F38: ctor overload keys missing in node-lookup) —
// satisfies the name-only match above but must NOT pass here.
expect(thisCall!.rel.sourceId).not.toBe(thisCall!.rel.targetId);
expect(thisCall!.rel.sourceId).toContain('Child.Child#1');
expect(thisCall!.rel.targetId).toContain('Child.Child#0');
});
});
Loading
Loading