Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion gitnexus/src/core/ingestion/class-extractors/configs/c-cpp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,33 @@ export const cClassConfig: ClassExtractionConfig = {
export const cppClassConfig: ClassExtractionConfig = {
language: SupportedLanguages.CPlusPlus,
typeDeclarationNodes: ['class_specifier', 'struct_specifier', 'enum_specifier'],
ancestorScopeNodeTypes: ['namespace_definition', 'class_specifier', 'struct_specifier'],
// #1995: `union_specifier` is included so a type nested in a NAMED union
// (`union U1 { struct Inner {...} }`) qualifies as `U1.Inner`. Anonymous unions
// have no `name` child → extractScopeSegmentsFromNode returns [] → they correctly
// contribute nothing (members inject into the enclosing scope). C uses the
// separate cClassConfig (no qualifiedNodeId), so it is intentionally untouched.
ancestorScopeNodeTypes: [
'namespace_definition',
'class_specifier',
'struct_specifier',
'union_specifier',
],
// #1978: key nested-type nodes by their fully-qualified path (Outer.Inner) so
// same-tail nested types in one TU stay distinct instead of silently merging.
qualifiedNodeId: true,
// #1995: anonymous namespaces have no `name` child, so the generic scope walker
// drops them (empty segment) and two `namespace { struct Inner {} }` blocks in one
// TU collapse onto a single `Inner` node. Give each anonymous namespace_definition
// a deterministic per-block discriminator (its start byte — stable across the
// sequential and worker full-file parses) so the nested types stay distinct.
// Returning `undefined` for every other scope — named namespaces (incl. `inline
// namespace`), classes, structs, named unions — falls through to the default
// name-based extraction, leaving them unchanged. Anonymous UNIONS are not matched
// here (members inject into the enclosing scope), so they keep yielding [].
extractScopeSegments: (node) =>
node.type === 'namespace_definition' && !node.childForFieldName?.('name')
? [`@anon${node.startIndex}`]
: undefined,
extractName: (node) => {
const nameNode = node.childForFieldName?.('name');
if (!nameNode) return undefined;
Expand Down
48 changes: 37 additions & 11 deletions gitnexus/src/core/ingestion/utils/ast-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -503,18 +503,44 @@ export const findEnclosingClassInfo = (
// different mods own through DISTINCT nodes. The Impl-node
// materialization (parsing-processor / parse-worker) mirrors this, so
// the owner id == the Impl node id byte-for-byte (#1982).
const firstType = children.find(
(c: SyntaxNode) => c.type === 'type_identifier' || c.type === 'scoped_type_identifier',
// - GENERIC (`impl<T> Inner<T>`, generic_type): the @definition.impl
// node is materialized only when the generic base is a bare
// `type_identifier` (tree-sitter-queries.ts), qualified the same way —
// so drill into the base and mirror that gate, keeping the owner id ==
// the node id byte-for-byte (#1992). A generic over a SCOPED base
// (`impl<T> a::Inner<T>`) materializes NO node, so it must produce NO
// owner (the method orphans — scoped-generic deferred, #1992).
const implTarget = children.find(
(c: SyntaxNode) =>
c.type === 'type_identifier' ||
c.type === 'scoped_type_identifier' ||
c.type === 'generic_type',
);
if (firstType) {
const ownerKey =
firstType.type === 'type_identifier'
? qualifyRustImplTargetByModScope(current, firstType.text)
: firstType.text;
return {
classId: generateId('Impl', `${filePath}:${ownerKey}`),
className: firstType.text,
};
if (implTarget) {
const baseType =
implTarget.type === 'generic_type'
? (implTarget.childForFieldName?.('type') ?? null)
: implTarget;
if (baseType?.type === 'type_identifier') {
// Bare target (`impl Inner` or `impl<T> Inner<T>`): qualify by mod scope.
return {
classId: generateId(
'Impl',
`${filePath}:${qualifyRustImplTargetByModScope(current, baseType.text)}`,
),
className: baseType.text,
};
}
if (baseType?.type === 'scoped_type_identifier' && implTarget.type !== 'generic_type') {
// Top-level scoped `impl a::Inner`: key by full raw text (#1975).
return {
classId: generateId('Impl', `${filePath}:${baseType.text}`),
className: baseType.text,
};
}
// generic-over-scoped (`impl<T> a::Inner<T>`) and any other base: fall
// through with no owner — no @definition.impl node exists, so attributing
// a method to a synthesized id would orphan it against a phantom owner.
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Same-tail structs in sibling ANONYMOUS namespaces (#1995).
//
// An anonymous `namespace { }` is a namespace_definition with no `name` child, so
// extractScopeSegmentsFromNode returns [] and both `Inner` structs qualified to the
// bare `Inner` and merged onto one node — from_anon_a / from_anon_b cross-wired. A
// deterministic per-block discriminator (derived from the namespace node's start
// byte) keeps the two blocks' types distinct.
namespace {
struct Inner {
void from_anon_a() {}
};
}
namespace {
struct Inner {
void from_anon_b() {}
};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Same-tail structs nested in sibling NAMED unions (#1995).
//
// `union_specifier` was omitted from cppClassConfig.ancestorScopeNodeTypes, so a
// struct nested in `union U1` and one nested in `union U2` both qualified to the
// bare `Inner` and merged onto ONE Struct:...:Inner node — from_u1 / from_u2
// cross-wired (dangling:0 but wrong). With the union scope qualified they must
// materialize distinct `U1.Inner` / `U2.Inner` nodes.
union U1 {
struct Inner {
void from_u1() {}
};
};
union U2 {
struct Inner {
void from_u2() {}
};
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// #1992: GENERIC inherent-impl ownership. Two same-tail `Inner<T>` types under
// sibling mods, each with a generic inherent impl `impl<T> Inner<T>`. Their
// methods must own through DISTINCT mod-qualified Impl nodes (`a.Inner` /
// `b.Inner`), not orphan to File.
pub mod a {
pub struct Inner<T> { v: T }
impl<T> Inner<T> {
pub fn fa(&self) {}
}
}

pub mod b {
pub struct Inner<T> { v: T }
impl<T> Inner<T> {
pub fn fb(&self) {}
}
}

// Scoped-generic inherent impl: `impl<T> crate::c::Scoped<T>` is a `generic_type`
// wrapping a `scoped_type_identifier`. tree-sitter-queries materializes NO
// @definition.impl node for this shape, so `fd` must stay orphaned (scoped-generic
// deferred, #1992) — the owner walk must NOT mint a phantom `c.Scoped` owner.
pub mod c {
pub struct Scoped<T> { v: T }
}
pub mod d {
impl<T> crate::c::Scoped<T> {
pub fn fd(&self) {}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,10 @@
"captureGroups": 18,
"digest": "3326eb4f82b1559b6afec497dc52cab734e6f3209501a4bd982bf5eab9ec6dba"
},
"rust-nested-tail-collision-generic/lib.rs": {
"captureGroups": 29,
"digest": "1bfdaaf207a83924fc25d2eec0a47e5807754bbd0de499b81adea48342c6e687"
},
"rust-nested-tail-collision/lib.rs": {
"captureGroups": 17,
"digest": "2fc1fe1eb4e8727a89ab283ae34a0ae8df0c421551a7bd5e6e7ffb9d4aa54189"
Expand Down
144 changes: 144 additions & 0 deletions gitnexus/test/integration/resolvers/cpp.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3915,6 +3915,150 @@ describe('C++ inline nested same-tail collision — worker path parity (issue #1
});
});

// ---------------------------------------------------------------------------
// Named-union nested same-tail collision — distinct qualified nodes (issue #1995)
//
// `union U1 { struct Inner {...} }` + `union U2 { struct Inner {...} }` must
// materialize TWO distinct Struct nodes (qn U1.Inner / U2.Inner). `union_specifier`
// was missing from cppClassConfig.ancestorScopeNodeTypes, so both Inner structs
// qualified to the bare `Inner` and merged (dangling:0 but wrong). Mirrors the
// #1978 inline-collision template; positive owner-identity, not just dangle-free.
// ---------------------------------------------------------------------------

describe('C++ named-union nested same-tail collision — distinct qualified nodes (issue #1995)', () => {
let result: PipelineResult;

beforeAll(async () => {
result = await runPipelineFromRepo(
path.join(FIXTURES, 'cpp-union-nested-tail-collision'),
() => {},
);
}, 60000);

it('materializes U1.Inner and U2.Inner as two distinct Struct nodes [#1995-union]', () => {
const qns = getNodesByLabelFull(result, 'Struct')
.map((n) => n.properties.qualifiedName)
.filter((q) => q === 'U1.Inner' || q === 'U2.Inner')
.sort();
expect(qns).toEqual(['U1.Inner', 'U2.Inner']);
});

it('owns from_u1 / from_u2 through their OWN distinct node (positive identity) [#1995-union]', () => {
expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]);
const hm = getRelationships(result, 'HAS_METHOD');
const ownerQn = (target: string) => {
const e = hm.find((x) => x.target === target);
expect(e, `HAS_METHOD -> ${target}`).toBeDefined();
return result.graph.getNode(e!.rel.sourceId)?.properties.qualifiedName;
};
expect(ownerQn('from_u1')).toBe('U1.Inner');
expect(ownerQn('from_u2')).toBe('U2.Inner');
});
});

// Worker-path parity for the named-union collision (parse-worker.ts must qualify
// the union scope byte-identically to the sequential parser).
describe('C++ named-union nested same-tail collision — worker path parity (issue #1995)', () => {
let result: PipelineResult;

beforeAll(async () => {
result = await runPipelineFromRepo(
path.join(FIXTURES, 'cpp-union-nested-tail-collision'),
() => {},
{ workerThresholdsForTest: { minFiles: 1, minBytes: 1 }, workerPoolSize: 2 },
);
}, 120000);

it('genuinely used the worker pool [#1995-union]', () => {
expect(result.usedWorkerPool).toBe(true);
});

it('materializes U1.Inner / U2.Inner and owns each method on the worker path [#1995-union]', () => {
const qns = getNodesByLabelFull(result, 'Struct')
.map((n) => n.properties.qualifiedName)
.filter((q) => q === 'U1.Inner' || q === 'U2.Inner')
.sort();
expect(qns).toEqual(['U1.Inner', 'U2.Inner']);
expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]);
const hm = getRelationships(result, 'HAS_METHOD');
const ownerQn = (target: string) =>
result.graph.getNode(hm.find((x) => x.target === target)!.rel.sourceId)?.properties
.qualifiedName;
expect(ownerQn('from_u1')).toBe('U1.Inner');
expect(ownerQn('from_u2')).toBe('U2.Inner');
});
});

// ---------------------------------------------------------------------------
// Anonymous-namespace nested same-tail collision — distinct nodes (issue #1995)
//
// Two `namespace { struct Inner {...} }` blocks must materialize TWO distinct
// Struct nodes. An anonymous namespace_definition has no `name` child, so both
// Inner structs qualified to the bare `Inner` and merged. A C++ extractScopeSegments
// override gives each anon block a deterministic start-byte discriminator. The
// discriminator value is not portable, so assert on node DISTINCTNESS (count==2 /
// distinct owner ids), never a literal qualifiedName.
// ---------------------------------------------------------------------------

describe('C++ anonymous-namespace nested same-tail collision — distinct nodes (issue #1995)', () => {
let result: PipelineResult;

beforeAll(async () => {
result = await runPipelineFromRepo(path.join(FIXTURES, 'cpp-anon-ns-tail-collision'), () => {});
}, 60000);

it('materializes two distinct Struct Inner nodes (one per anon namespace) [#1995-anon]', () => {
const innerQns = getNodesByLabelFull(result, 'Struct')
.map((n) => n.properties.qualifiedName)
.filter((q): q is string => typeof q === 'string' && q.endsWith('Inner'));
// Start-byte discriminator → assert DISTINCTNESS, not a literal value. Pre-fix
// both Inner structs merge onto one bare `Inner` node (set size 1).
expect(new Set(innerQns).size).toBe(2);
});

it('owns from_anon_a / from_anon_b through DISTINCT nodes (no merge) [#1995-anon]', () => {
expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]);
const hm = getRelationships(result, 'HAS_METHOD');
const a = hm.find((x) => x.target === 'from_anon_a');
const b = hm.find((x) => x.target === 'from_anon_b');
expect(a, 'HAS_METHOD -> from_anon_a').toBeDefined();
expect(b, 'HAS_METHOD -> from_anon_b').toBeDefined();
expect(a!.rel.sourceId).not.toBe(b!.rel.sourceId);
});
});

// Worker-path parity for the anonymous-namespace collision: the start-byte
// discriminator must be deterministic across the worker's full-file parse.
describe('C++ anonymous-namespace nested same-tail collision — worker path parity (issue #1995)', () => {
let result: PipelineResult;

beforeAll(async () => {
result = await runPipelineFromRepo(
path.join(FIXTURES, 'cpp-anon-ns-tail-collision'),
() => {},
{ workerThresholdsForTest: { minFiles: 1, minBytes: 1 }, workerPoolSize: 2 },
);
}, 120000);

it('genuinely used the worker pool [#1995-anon]', () => {
expect(result.usedWorkerPool).toBe(true);
});

it('materializes two distinct anon Inner nodes and owns each method on the worker path [#1995-anon]', () => {
const innerQns = getNodesByLabelFull(result, 'Struct')
.map((n) => n.properties.qualifiedName)
.filter((q): q is string => typeof q === 'string' && q.endsWith('Inner'));
expect(new Set(innerQns).size).toBe(2);
expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]);
const hm = getRelationships(result, 'HAS_METHOD');
const a = hm.find((x) => x.target === 'from_anon_a');
const b = hm.find((x) => x.target === 'from_anon_b');
expect(a, 'HAS_METHOD -> from_anon_a').toBeDefined();
expect(b, 'HAS_METHOD -> from_anon_b').toBeDefined();
expect(a!.rel.sourceId).not.toBe(b!.rel.sourceId);
});
});

// ---------------------------------------------------------------------------
// Inline nested same-tail HERITAGE — qualified base resolution (issue #1982)
//
Expand Down
Loading
Loading