diff --git a/gitnexus/src/core/ingestion/class-extractors/configs/c-cpp.ts b/gitnexus/src/core/ingestion/class-extractors/configs/c-cpp.ts index d39888aa14..41a4483066 100644 --- a/gitnexus/src/core/ingestion/class-extractors/configs/c-cpp.ts +++ b/gitnexus/src/core/ingestion/class-extractors/configs/c-cpp.ts @@ -45,10 +45,33 @@ export const cClassConfig: ClassExtractionConfig = { export const cppClassConfig: ClassExtractionConfig = { language: SupportedLanguages.CPlusPlus, typeDeclarationNodes: ['class_specifier', 'struct_specifier', 'enum_specifier'], - ancestorScopeNodeTypes: ['namespace_definition', 'class_specifier', 'struct_specifier'], + // #1995: `union_specifier` is included so a type nested in a NAMED union + // (`union U1 { struct Inner {...} }`) qualifies as `U1.Inner`. Anonymous unions + // have no `name` child → extractScopeSegmentsFromNode returns [] → they correctly + // contribute nothing (members inject into the enclosing scope). C uses the + // separate cClassConfig (no qualifiedNodeId), so it is intentionally untouched. + ancestorScopeNodeTypes: [ + 'namespace_definition', + 'class_specifier', + 'struct_specifier', + 'union_specifier', + ], // #1978: key nested-type nodes by their fully-qualified path (Outer.Inner) so // same-tail nested types in one TU stay distinct instead of silently merging. qualifiedNodeId: true, + // #1995: anonymous namespaces have no `name` child, so the generic scope walker + // drops them (empty segment) and two `namespace { struct Inner {} }` blocks in one + // TU collapse onto a single `Inner` node. Give each anonymous namespace_definition + // a deterministic per-block discriminator (its start byte — stable across the + // sequential and worker full-file parses) so the nested types stay distinct. + // Returning `undefined` for every other scope — named namespaces (incl. `inline + // namespace`), classes, structs, named unions — falls through to the default + // name-based extraction, leaving them unchanged. Anonymous UNIONS are not matched + // here (members inject into the enclosing scope), so they keep yielding []. + extractScopeSegments: (node) => + node.type === 'namespace_definition' && !node.childForFieldName?.('name') + ? [`@anon${node.startIndex}`] + : undefined, extractName: (node) => { const nameNode = node.childForFieldName?.('name'); if (!nameNode) return undefined; diff --git a/gitnexus/src/core/ingestion/utils/ast-helpers.ts b/gitnexus/src/core/ingestion/utils/ast-helpers.ts index 52d28169f7..e3da24a7a7 100644 --- a/gitnexus/src/core/ingestion/utils/ast-helpers.ts +++ b/gitnexus/src/core/ingestion/utils/ast-helpers.ts @@ -503,18 +503,44 @@ export const findEnclosingClassInfo = ( // different mods own through DISTINCT nodes. The Impl-node // materialization (parsing-processor / parse-worker) mirrors this, so // the owner id == the Impl node id byte-for-byte (#1982). - const firstType = children.find( - (c: SyntaxNode) => c.type === 'type_identifier' || c.type === 'scoped_type_identifier', + // - GENERIC (`impl Inner`, generic_type): the @definition.impl + // node is materialized only when the generic base is a bare + // `type_identifier` (tree-sitter-queries.ts), qualified the same way — + // so drill into the base and mirror that gate, keeping the owner id == + // the node id byte-for-byte (#1992). A generic over a SCOPED base + // (`impl a::Inner`) materializes NO node, so it must produce NO + // owner (the method orphans — scoped-generic deferred, #1992). + const implTarget = children.find( + (c: SyntaxNode) => + c.type === 'type_identifier' || + c.type === 'scoped_type_identifier' || + c.type === 'generic_type', ); - if (firstType) { - const ownerKey = - firstType.type === 'type_identifier' - ? qualifyRustImplTargetByModScope(current, firstType.text) - : firstType.text; - return { - classId: generateId('Impl', `${filePath}:${ownerKey}`), - className: firstType.text, - }; + if (implTarget) { + const baseType = + implTarget.type === 'generic_type' + ? (implTarget.childForFieldName?.('type') ?? null) + : implTarget; + if (baseType?.type === 'type_identifier') { + // Bare target (`impl Inner` or `impl Inner`): qualify by mod scope. + return { + classId: generateId( + 'Impl', + `${filePath}:${qualifyRustImplTargetByModScope(current, baseType.text)}`, + ), + className: baseType.text, + }; + } + if (baseType?.type === 'scoped_type_identifier' && implTarget.type !== 'generic_type') { + // Top-level scoped `impl a::Inner`: key by full raw text (#1975). + return { + classId: generateId('Impl', `${filePath}:${baseType.text}`), + className: baseType.text, + }; + } + // generic-over-scoped (`impl a::Inner`) and any other base: fall + // through with no owner — no @definition.impl node exists, so attributing + // a method to a synthesized id would orphan it against a phantom owner. } } diff --git a/gitnexus/test/fixtures/lang-resolution/cpp-anon-ns-tail-collision/main.cpp b/gitnexus/test/fixtures/lang-resolution/cpp-anon-ns-tail-collision/main.cpp new file mode 100644 index 0000000000..47250e0b42 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/cpp-anon-ns-tail-collision/main.cpp @@ -0,0 +1,17 @@ +// Same-tail structs in sibling ANONYMOUS namespaces (#1995). +// +// An anonymous `namespace { }` is a namespace_definition with no `name` child, so +// extractScopeSegmentsFromNode returns [] and both `Inner` structs qualified to the +// bare `Inner` and merged onto one node — from_anon_a / from_anon_b cross-wired. A +// deterministic per-block discriminator (derived from the namespace node's start +// byte) keeps the two blocks' types distinct. +namespace { +struct Inner { + void from_anon_a() {} +}; +} +namespace { +struct Inner { + void from_anon_b() {} +}; +} diff --git a/gitnexus/test/fixtures/lang-resolution/cpp-union-nested-tail-collision/main.cpp b/gitnexus/test/fixtures/lang-resolution/cpp-union-nested-tail-collision/main.cpp new file mode 100644 index 0000000000..063da384f8 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/cpp-union-nested-tail-collision/main.cpp @@ -0,0 +1,17 @@ +// Same-tail structs nested in sibling NAMED unions (#1995). +// +// `union_specifier` was omitted from cppClassConfig.ancestorScopeNodeTypes, so a +// struct nested in `union U1` and one nested in `union U2` both qualified to the +// bare `Inner` and merged onto ONE Struct:...:Inner node — from_u1 / from_u2 +// cross-wired (dangling:0 but wrong). With the union scope qualified they must +// materialize distinct `U1.Inner` / `U2.Inner` nodes. +union U1 { + struct Inner { + void from_u1() {} + }; +}; +union U2 { + struct Inner { + void from_u2() {} + }; +}; diff --git a/gitnexus/test/fixtures/lang-resolution/rust-nested-tail-collision-generic/lib.rs b/gitnexus/test/fixtures/lang-resolution/rust-nested-tail-collision-generic/lib.rs new file mode 100644 index 0000000000..2a1afe8230 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/rust-nested-tail-collision-generic/lib.rs @@ -0,0 +1,30 @@ +// #1992: GENERIC inherent-impl ownership. Two same-tail `Inner` types under +// sibling mods, each with a generic inherent impl `impl Inner`. Their +// methods must own through DISTINCT mod-qualified Impl nodes (`a.Inner` / +// `b.Inner`), not orphan to File. +pub mod a { + pub struct Inner { v: T } + impl Inner { + pub fn fa(&self) {} + } +} + +pub mod b { + pub struct Inner { v: T } + impl Inner { + pub fn fb(&self) {} + } +} + +// Scoped-generic inherent impl: `impl crate::c::Scoped` is a `generic_type` +// wrapping a `scoped_type_identifier`. tree-sitter-queries materializes NO +// @definition.impl node for this shape, so `fd` must stay orphaned (scoped-generic +// deferred, #1992) — the owner walk must NOT mint a phantom `c.Scoped` owner. +pub mod c { + pub struct Scoped { v: T } +} +pub mod d { + impl crate::c::Scoped { + pub fn fd(&self) {} + } +} diff --git a/gitnexus/test/fixtures/rust-captures-golden/expected-captures.json b/gitnexus/test/fixtures/rust-captures-golden/expected-captures.json index e9a735553b..e74e2db387 100644 --- a/gitnexus/test/fixtures/rust-captures-golden/expected-captures.json +++ b/gitnexus/test/fixtures/rust-captures-golden/expected-captures.json @@ -319,6 +319,10 @@ "captureGroups": 18, "digest": "3326eb4f82b1559b6afec497dc52cab734e6f3209501a4bd982bf5eab9ec6dba" }, + "rust-nested-tail-collision-generic/lib.rs": { + "captureGroups": 29, + "digest": "1bfdaaf207a83924fc25d2eec0a47e5807754bbd0de499b81adea48342c6e687" + }, "rust-nested-tail-collision/lib.rs": { "captureGroups": 17, "digest": "2fc1fe1eb4e8727a89ab283ae34a0ae8df0c421551a7bd5e6e7ffb9d4aa54189" diff --git a/gitnexus/test/integration/resolvers/cpp.test.ts b/gitnexus/test/integration/resolvers/cpp.test.ts index 5b9dc58cad..0b3ac2a29d 100644 --- a/gitnexus/test/integration/resolvers/cpp.test.ts +++ b/gitnexus/test/integration/resolvers/cpp.test.ts @@ -3915,6 +3915,150 @@ describe('C++ inline nested same-tail collision — worker path parity (issue #1 }); }); +// --------------------------------------------------------------------------- +// Named-union nested same-tail collision — distinct qualified nodes (issue #1995) +// +// `union U1 { struct Inner {...} }` + `union U2 { struct Inner {...} }` must +// materialize TWO distinct Struct nodes (qn U1.Inner / U2.Inner). `union_specifier` +// was missing from cppClassConfig.ancestorScopeNodeTypes, so both Inner structs +// qualified to the bare `Inner` and merged (dangling:0 but wrong). Mirrors the +// #1978 inline-collision template; positive owner-identity, not just dangle-free. +// --------------------------------------------------------------------------- + +describe('C++ named-union nested same-tail collision — distinct qualified nodes (issue #1995)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'cpp-union-nested-tail-collision'), + () => {}, + ); + }, 60000); + + it('materializes U1.Inner and U2.Inner as two distinct Struct nodes [#1995-union]', () => { + const qns = getNodesByLabelFull(result, 'Struct') + .map((n) => n.properties.qualifiedName) + .filter((q) => q === 'U1.Inner' || q === 'U2.Inner') + .sort(); + expect(qns).toEqual(['U1.Inner', 'U2.Inner']); + }); + + it('owns from_u1 / from_u2 through their OWN distinct node (positive identity) [#1995-union]', () => { + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + const hm = getRelationships(result, 'HAS_METHOD'); + const ownerQn = (target: string) => { + const e = hm.find((x) => x.target === target); + expect(e, `HAS_METHOD -> ${target}`).toBeDefined(); + return result.graph.getNode(e!.rel.sourceId)?.properties.qualifiedName; + }; + expect(ownerQn('from_u1')).toBe('U1.Inner'); + expect(ownerQn('from_u2')).toBe('U2.Inner'); + }); +}); + +// Worker-path parity for the named-union collision (parse-worker.ts must qualify +// the union scope byte-identically to the sequential parser). +describe('C++ named-union nested same-tail collision — worker path parity (issue #1995)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'cpp-union-nested-tail-collision'), + () => {}, + { workerThresholdsForTest: { minFiles: 1, minBytes: 1 }, workerPoolSize: 2 }, + ); + }, 120000); + + it('genuinely used the worker pool [#1995-union]', () => { + expect(result.usedWorkerPool).toBe(true); + }); + + it('materializes U1.Inner / U2.Inner and owns each method on the worker path [#1995-union]', () => { + const qns = getNodesByLabelFull(result, 'Struct') + .map((n) => n.properties.qualifiedName) + .filter((q) => q === 'U1.Inner' || q === 'U2.Inner') + .sort(); + expect(qns).toEqual(['U1.Inner', 'U2.Inner']); + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + const hm = getRelationships(result, 'HAS_METHOD'); + const ownerQn = (target: string) => + result.graph.getNode(hm.find((x) => x.target === target)!.rel.sourceId)?.properties + .qualifiedName; + expect(ownerQn('from_u1')).toBe('U1.Inner'); + expect(ownerQn('from_u2')).toBe('U2.Inner'); + }); +}); + +// --------------------------------------------------------------------------- +// Anonymous-namespace nested same-tail collision — distinct nodes (issue #1995) +// +// Two `namespace { struct Inner {...} }` blocks must materialize TWO distinct +// Struct nodes. An anonymous namespace_definition has no `name` child, so both +// Inner structs qualified to the bare `Inner` and merged. A C++ extractScopeSegments +// override gives each anon block a deterministic start-byte discriminator. The +// discriminator value is not portable, so assert on node DISTINCTNESS (count==2 / +// distinct owner ids), never a literal qualifiedName. +// --------------------------------------------------------------------------- + +describe('C++ anonymous-namespace nested same-tail collision — distinct nodes (issue #1995)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo(path.join(FIXTURES, 'cpp-anon-ns-tail-collision'), () => {}); + }, 60000); + + it('materializes two distinct Struct Inner nodes (one per anon namespace) [#1995-anon]', () => { + const innerQns = getNodesByLabelFull(result, 'Struct') + .map((n) => n.properties.qualifiedName) + .filter((q): q is string => typeof q === 'string' && q.endsWith('Inner')); + // Start-byte discriminator → assert DISTINCTNESS, not a literal value. Pre-fix + // both Inner structs merge onto one bare `Inner` node (set size 1). + expect(new Set(innerQns).size).toBe(2); + }); + + it('owns from_anon_a / from_anon_b through DISTINCT nodes (no merge) [#1995-anon]', () => { + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + const hm = getRelationships(result, 'HAS_METHOD'); + const a = hm.find((x) => x.target === 'from_anon_a'); + const b = hm.find((x) => x.target === 'from_anon_b'); + expect(a, 'HAS_METHOD -> from_anon_a').toBeDefined(); + expect(b, 'HAS_METHOD -> from_anon_b').toBeDefined(); + expect(a!.rel.sourceId).not.toBe(b!.rel.sourceId); + }); +}); + +// Worker-path parity for the anonymous-namespace collision: the start-byte +// discriminator must be deterministic across the worker's full-file parse. +describe('C++ anonymous-namespace nested same-tail collision — worker path parity (issue #1995)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'cpp-anon-ns-tail-collision'), + () => {}, + { workerThresholdsForTest: { minFiles: 1, minBytes: 1 }, workerPoolSize: 2 }, + ); + }, 120000); + + it('genuinely used the worker pool [#1995-anon]', () => { + expect(result.usedWorkerPool).toBe(true); + }); + + it('materializes two distinct anon Inner nodes and owns each method on the worker path [#1995-anon]', () => { + const innerQns = getNodesByLabelFull(result, 'Struct') + .map((n) => n.properties.qualifiedName) + .filter((q): q is string => typeof q === 'string' && q.endsWith('Inner')); + expect(new Set(innerQns).size).toBe(2); + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + const hm = getRelationships(result, 'HAS_METHOD'); + const a = hm.find((x) => x.target === 'from_anon_a'); + const b = hm.find((x) => x.target === 'from_anon_b'); + expect(a, 'HAS_METHOD -> from_anon_a').toBeDefined(); + expect(b, 'HAS_METHOD -> from_anon_b').toBeDefined(); + expect(a!.rel.sourceId).not.toBe(b!.rel.sourceId); + }); +}); + // --------------------------------------------------------------------------- // Inline nested same-tail HERITAGE — qualified base resolution (issue #1982) // diff --git a/gitnexus/test/integration/resolvers/rust.test.ts b/gitnexus/test/integration/resolvers/rust.test.ts index 8a99119a09..9348aed85f 100644 --- a/gitnexus/test/integration/resolvers/rust.test.ts +++ b/gitnexus/test/integration/resolvers/rust.test.ts @@ -2102,6 +2102,87 @@ describe('Rust inline mod-nested same-tail collision — distinct nodes (issue # }); }); +// --------------------------------------------------------------------------- +// #1992: GENERIC inherent-impl ownership — `impl Inner` methods own through +// the mod-qualified Impl node, not orphaned to File. +// +// PR #1981 / `bc4a560d` qualified the UNSCOPED bare `impl Inner` target. A GENERIC +// inherent-impl target (`impl Inner`) is a `generic_type` node, which the +// inherent-impl owner walk (ast-helpers `findEnclosingClassInfo`) did not match — +// so the walk returned null and the method got `File -> DEFINES` with NO HAS_METHOD +// (orphaned; invisible to findDanglingEdges). The Impl NODE was already correctly +// mod-qualified (the @name capture drills into the inner type_identifier, +// tree-sitter-queries.ts), so the fix is owner-walk-only and the owner id == the +// node id (`a.Inner` / `b.Inner`) by construction. Holds on both resolver legs +// (structure-phase). +// --------------------------------------------------------------------------- + +describe('Rust generic inherent-impl same-tail ownership — distinct nodes (issue #1992)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'rust-nested-tail-collision-generic'), + () => {}, + ); + }, 60000); + + it('owns fa / fb through distinct mod-qualified Impl nodes (generic impl, no orphan)', () => { + const hm = getRelationships(result, 'HAS_METHOD'); + const a = hm.find((e) => e.target === 'fa'); + const b = hm.find((e) => e.target === 'fb'); + // Pre-fix the generic-impl owner walk returns null, so fa/fb orphan to File + // (File -> DEFINES, no HAS_METHOD) — toBeDefined() fails on the pre-fix base. + expect(a, 'HAS_METHOD -> fa').toBeDefined(); + expect(b, 'HAS_METHOD -> fb').toBeDefined(); + // Owner id is the mod-qualified Impl node, byte-identical to the node id. + expect(a!.rel.sourceId).not.toBe(b!.rel.sourceId); + expect(a!.rel.sourceId).toContain('a.Inner'); + expect(b!.rel.sourceId).toContain('b.Inner'); + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + }); + + // R6: scoped-generic `impl crate::c::Scoped` materializes no Impl node, so + // `fd` must NOT own through a phantom `c.Scoped` node — it stays orphaned + // (deferred). Guards against the owner walk minting an owner id for an + // unmaterialized node. + it('does not mint a phantom owner for a scoped-generic impl (fd orphaned, deferred)', () => { + const hm = getRelationships(result, 'HAS_METHOD'); + expect(hm.find((e) => e.target === 'fd')).toBeUndefined(); + }); +}); + +// Same fixture forced through the WORKER pool (parse-worker.ts). The inherent-impl +// owner walk is shared structure-phase logic, so generic-impl ownership must hold +// on BOTH the sequential and worker paths. +describe('Rust generic inherent-impl ownership — worker path parity (issue #1992)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'rust-nested-tail-collision-generic'), + () => {}, + { workerThresholdsForTest: { minFiles: 1, minBytes: 1 }, workerPoolSize: 2 }, + ); + }, 120000); + + it('genuinely used the worker pool', () => { + expect(result.usedWorkerPool).toBe(true); + }); + + it('owns fa / fb through distinct mod-qualified Impl nodes on the worker path', () => { + const hm = getRelationships(result, 'HAS_METHOD'); + const a = hm.find((e) => e.target === 'fa'); + const b = hm.find((e) => e.target === 'fb'); + expect(a, 'HAS_METHOD -> fa').toBeDefined(); + expect(b, 'HAS_METHOD -> fb').toBeDefined(); + expect(a!.rel.sourceId).not.toBe(b!.rel.sourceId); + expect(a!.rel.sourceId).toContain('a.Inner'); + expect(b!.rel.sourceId).toContain('b.Inner'); + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + }); +}); + // --------------------------------------------------------------------------- // F71 — union declarations resolve as Struct nodes (issue #1934) //