diff --git a/gitnexus/src/core/ingestion/class-extractors/generic.ts b/gitnexus/src/core/ingestion/class-extractors/generic.ts index 6cfc7a3c08..39e060e008 100644 --- a/gitnexus/src/core/ingestion/class-extractors/generic.ts +++ b/gitnexus/src/core/ingestion/class-extractors/generic.ts @@ -164,6 +164,14 @@ export function createClassExtractor(config: ClassExtractionConfig): ClassExtrac return extract(node, { name: simpleName })?.qualifiedName ?? null; }, + // #1991: qualify a non-typeDeclaration scope node (e.g. a Ruby `module` → Trait) + // by the same ancestor-scope walk the node-id path uses, so two same-tail nested + // mixin modules stay distinct. extract()/extractQualifiedName cannot be reused — + // they bail on non-typeDeclarations (a module is not in typeDeclarationNodes). + qualifyScopeName(node: SyntaxNode, simpleName: string): string { + return buildQualifiedName(node, simpleName); + }, + shouldSkipClassCapture(context): boolean { return config.shouldSkipClassCapture?.(context) ?? false; }, diff --git a/gitnexus/src/core/ingestion/class-types.ts b/gitnexus/src/core/ingestion/class-types.ts index 2e3d1f6886..f71fa55316 100644 --- a/gitnexus/src/core/ingestion/class-types.ts +++ b/gitnexus/src/core/ingestion/class-types.ts @@ -44,6 +44,14 @@ export interface ClassExtractor { }, ): ExtractedClassSymbol | null; extractQualifiedName(node: SyntaxNode, simpleName: string): string | null; + /** + * #1991: qualify a scope-defining node that maps to a class-like registry label + * (e.g. a Ruby `module` → Trait) but is NOT a typeDeclaration, so it cannot go + * through extract()/extractQualifiedName (which bail on non-typeDeclarations). + * Walks the same ancestor scopes as the node-id path. Optional — only providers + * that materialize such nodes implement it. + */ + qualifyScopeName?(node: SyntaxNode, simpleName: string): string; shouldSkipClassCapture?( context: ClassCaptureContext & { nodeLabel: ClassLikeNodeLabel }, ): boolean; diff --git a/gitnexus/src/core/ingestion/languages/ruby/scope-resolver.ts b/gitnexus/src/core/ingestion/languages/ruby/scope-resolver.ts index afefb0c9d3..6a3152eed7 100644 --- a/gitnexus/src/core/ingestion/languages/ruby/scope-resolver.ts +++ b/gitnexus/src/core/ingestion/languages/ruby/scope-resolver.ts @@ -13,21 +13,42 @@ import { generateId } from '../../../../lib/utils.js'; const HERITAGE_PREFIX = '__heritage__:'; const PROPERTY_PREFIX = '__property__:'; +/** + * #1991: resolve a BARE mixin reference (`include Loggable`) to a nested module by + * the INCLUDING class's lexical scope — Ruby looks up a constant in the innermost + * enclosing scope first. For owner `App.S`, try `App.Loggable`, then walk outward. + * Returns undefined if no enclosing-scope-qualified module exists. + */ +function qualifyMixinByOwnerScope( + mixinName: string, + ownerName: string, + graphIdByName: ReadonlyMap, +): string | undefined { + let prefix = ownerName; + let dot = prefix.lastIndexOf('.'); + while (dot !== -1) { + prefix = prefix.slice(0, dot); + const g = graphIdByName.get(`${prefix}.${mixinName}`); + if (g !== undefined) return g; + dot = prefix.lastIndexOf('.'); + } + return undefined; +} + function emitRubyMixinEdges( graph: KnowledgeGraph, parsedFiles: readonly ParsedFile[], nodeLookup: GraphNodeLookup, ): void { const graphIdByName = new Map(); - // Secondary tail -> graphId map (first-wins). The `__heritage__` marker carries - // the mixin TARGET as the bare written name (`arg.text`, e.g. `Loggable`), not - // its full qualifiedName, so a nested mixin module included by its short name - // (`include Loggable` where it is `App::Loggable`) misses the full-qn map and - // its IMPLEMENTS edge is silently dropped (#1982 follow-up). The tail fallback - // recovers it. OWNER (`className`) lookups stay full-qn only, preserving - // same-tail owner disambiguation; only the under-qualified mixin reference - // falls back, and a genuine same-tail mixin tie there resolves first-wins. - const graphIdByTail = new Map(); + // Secondary tail -> graphId map. The `__heritage__` marker carries the mixin + // TARGET as the bare written name (`arg.text`, e.g. `Loggable`), not its full + // qualifiedName, so a nested mixin module included by its short name misses the + // full-qn map. We first resolve it lexically by the including class's enclosing + // scope (`qualifyMixinByOwnerScope`); this tail map is the last resort. A genuine + // same-tail collision is mapped to `null` so we REFUSE to guess (#1991) rather + // than the old first-wins, which cross-wired App::Loggable / Web::Loggable. + const graphIdByTail = new Map(); for (const parsed of parsedFiles) { for (const def of parsed.localDefs) { if (!isClassLike(def.type)) continue; @@ -43,7 +64,12 @@ function emitRubyMixinEdges( graphIdByName.set(fullName, graphId); const dot = fullName.lastIndexOf('.'); const tail = dot === -1 ? fullName : fullName.slice(dot + 1); - if (tail.length > 0 && !graphIdByTail.has(tail)) graphIdByTail.set(tail, graphId); + if (tail.length > 0) { + const existingTail = graphIdByTail.get(tail); + if (existingTail === undefined) graphIdByTail.set(tail, graphId); + else if (existingTail !== null && existingTail !== graphId) + graphIdByTail.set(tail, null); // same-tail collision — refuse to guess + } } } } @@ -64,9 +90,15 @@ function emitRubyMixinEdges( if (parts.length < 3) continue; const [kind, mixinName, className] = parts; const classGraphId = graphIdByName.get(className!); - // Owner stays full-qn; the mixin target may be written by short name and - // miss the full-qn map, so fall back to the simple-tail map (#1982). - const mixinGraphId = graphIdByName.get(mixinName!) ?? graphIdByTail.get(mixinName!); + // Owner stays full-qn. The mixin target may be written by short name and miss + // the full-qn map; resolve it lexically by the including class's enclosing + // scope (`App::S` + `Loggable` -> `App::Loggable`), then fall back to the tail + // map ONLY when unambiguous — never first-wins on a collision (#1982/#1991). + const mixinGraphId = + graphIdByName.get(mixinName!) ?? + qualifyMixinByOwnerScope(mixinName!, className!, graphIdByName) ?? + graphIdByTail.get(mixinName!) ?? + undefined; if (classGraphId === undefined || mixinGraphId === undefined) continue; const edgeKey = `${classGraphId}->${mixinGraphId}:${kind}`; if (emitted.has(edgeKey)) continue; diff --git a/gitnexus/src/core/ingestion/parsing-processor.ts b/gitnexus/src/core/ingestion/parsing-processor.ts index b2b7f10184..7dfa4b1083 100644 --- a/gitnexus/src/core/ingestion/parsing-processor.ts +++ b/gitnexus/src/core/ingestion/parsing-processor.ts @@ -617,7 +617,13 @@ const processParsingSequential = async ( const getQualifiedOwnerName = provider.classExtractor?.qualifiedNodeId === true ? (node: SyntaxNode, simpleName: string): string | null => - provider.classExtractor!.extractQualifiedName(node, simpleName) + // #1991: a Ruby `module` owner is not a typeDeclaration, so + // extractQualifiedName returns null; fall back to the scope walk so a + // method inside a nested module owns through the SAME qualified Trait + // id its node uses (App.Loggable), not a dangling bare id. + provider.classExtractor!.extractQualifiedName(node, simpleName) ?? + provider.classExtractor!.qualifyScopeName?.(node, simpleName) ?? + null : undefined; const enclosingClassInfo = needsOwner ? cachedFindEnclosingClassInfo( @@ -644,7 +650,16 @@ const processParsingSequential = async ( extractedClassSymbol?.qualifiedName ?? (classNodeForSymbol && provider.classExtractor?.isTypeDeclaration(classNodeForSymbol) ? (provider.classExtractor.extractQualifiedName(classNodeForSymbol, nodeName) ?? nodeName) - : undefined); + : // #1991: a Ruby `module` maps to Trait (class-like registry) but is not a + // typeDeclaration, so extractQualifiedName bails. Qualify it via the scope + // walk so two same-tail nested mixin modules get distinct ids. Gated on + // qualifiedNodeId, so languages without the flag are unaffected. + nodeLabel === 'Trait' && + provider.classExtractor?.qualifiedNodeId === true && + classNodeForSymbol + ? (provider.classExtractor.qualifyScopeName?.(classNodeForSymbol, nodeName) ?? + undefined) + : undefined); // Qualify method/property IDs with enclosing class name to avoid collisions // e.g. "Method:animal.dart:Animal.speak" vs "Method:animal.dart:Dog.speak". @@ -667,7 +682,10 @@ const processParsingSequential = async ( const qualifiedName = rustImplQualifiedName !== undefined ? rustImplQualifiedName - : isClassLikeLabel && + : // #1991: include Trait so a Ruby mixin module's qualified scope id keys + // the node, mirroring the class-like path (qualifiedTypeName is computed + // for Trait above). + (isClassLikeLabel || nodeLabel === 'Trait') && provider.classExtractor?.qualifiedNodeId === true && qualifiedTypeName !== undefined ? qualifiedTypeName diff --git a/gitnexus/src/core/ingestion/workers/parse-worker.ts b/gitnexus/src/core/ingestion/workers/parse-worker.ts index e53ec3ab0d..8a01b92560 100644 --- a/gitnexus/src/core/ingestion/workers/parse-worker.ts +++ b/gitnexus/src/core/ingestion/workers/parse-worker.ts @@ -1831,7 +1831,13 @@ const processFileGroup = ( const getQualifiedOwnerName = provider.classExtractor?.qualifiedNodeId === true ? (node: SyntaxNode, simpleName: string): string | null => - provider.classExtractor!.extractQualifiedName(node, simpleName) + // #1991: LOCKSTEP — a Ruby `module` owner is not a typeDeclaration, so + // extractQualifiedName returns null; fall back to the scope walk so a + // method inside a nested module owns through the SAME qualified Trait + // id its node uses on the worker path too. + provider.classExtractor!.extractQualifiedName(node, simpleName) ?? + provider.classExtractor!.qualifyScopeName?.(node, simpleName) ?? + null : undefined; const enclosingClassInfo = needsOwner ? cachedFindEnclosingClassInfo( @@ -1856,7 +1862,15 @@ const processFileGroup = ( extractedClassSymbol?.qualifiedName ?? (classNodeForSymbol && provider.classExtractor?.isTypeDeclaration(classNodeForSymbol) ? (provider.classExtractor.extractQualifiedName(classNodeForSymbol, nodeName) ?? nodeName) - : undefined); + : // #1991: LOCKSTEP with parsing-processor.ts — qualify a Ruby `module` + // (Trait) via the scope walk so same-tail nested mixin modules get + // distinct ids on the worker path too. Gated on qualifiedNodeId. + nodeLabel === 'Trait' && + provider.classExtractor?.qualifiedNodeId === true && + classNodeForSymbol + ? (provider.classExtractor.qualifyScopeName?.(classNodeForSymbol, nodeName) ?? + undefined) + : undefined); // Qualify method/property IDs with enclosing class name to avoid collisions. // Class-like nodes use their own fully-qualified path as the id key when the @@ -1874,7 +1888,9 @@ const processFileGroup = ( const qualifiedName = rustImplQualifiedName !== undefined ? rustImplQualifiedName - : isClassLikeLabel && + : // #1991: LOCKSTEP — include Trait so a Ruby mixin module's qualified + // scope id keys the worker-path node, matching the sequential path. + (isClassLikeLabel || nodeLabel === 'Trait') && provider.classExtractor?.qualifiedNodeId === true && qualifiedTypeName !== undefined ? qualifiedTypeName diff --git a/gitnexus/test/fixtures/lang-resolution/ruby-nested-mixin-tail-collision/app.rb b/gitnexus/test/fixtures/lang-resolution/ruby-nested-mixin-tail-collision/app.rb new file mode 100644 index 0000000000..8e709facad --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/ruby-nested-mixin-tail-collision/app.rb @@ -0,0 +1,25 @@ +# Two same-tail NESTED mixin modules (App::Loggable + Web::Loggable), each included +# by a sibling class in the same enclosing module (#1991). The structure phase never +# qualified `module` (Trait) node ids, so both collapsed onto one Trait:app.rb:Loggable +# node and the bare-name mixin reference cross-wired IMPLEMENTS (first-wins tail). +# Single-file on purpose: the bare node id embeds file.path, so a cross-file split +# would not collide. S must IMPLEMENTS App::Loggable only; T → Web::Loggable only. +module App + module Loggable + def log; end + end + + class S + include Loggable + end +end + +module Web + module Loggable + def warn; end + end + + class T + include Loggable + end +end diff --git a/gitnexus/test/fixtures/ruby-captures-golden/expected-captures.json b/gitnexus/test/fixtures/ruby-captures-golden/expected-captures.json index 72c44d05ad..0046dcb34c 100644 --- a/gitnexus/test/fixtures/ruby-captures-golden/expected-captures.json +++ b/gitnexus/test/fixtures/ruby-captures-golden/expected-captures.json @@ -211,6 +211,10 @@ "captureGroups": 11, "digest": "dfa494facc56b5e07a12befc77cd1e3788f0494f1373960cd9fe88715750e590" }, + "ruby-nested-mixin-tail-collision/app.rb": { + "captureGroups": 21, + "digest": "b42c38446b3e5307cd79eec888d5faf9d9683d79bacdd9738f64871d2a1e8bbc" + }, "ruby-nested-tail-collision/nested.rb": { "captureGroups": 31, "digest": "c48ebe5516a0faf50effbad0a19fe29be70c371b50ba9d6fa6ae3f6f708b3a4e" diff --git a/gitnexus/test/integration/resolvers/helpers.ts b/gitnexus/test/integration/resolvers/helpers.ts index 09aa997eb0..b0df66e8e7 100644 --- a/gitnexus/test/integration/resolvers/helpers.ts +++ b/gitnexus/test/integration/resolvers/helpers.ts @@ -337,6 +337,12 @@ const LEGACY_RESOLVER_PARITY_EXPECTED_FAILURES: Readonly App.Loggable and T -> Web.Loggable (no cross-wire, R2)', + 'genuinely used the worker pool for the same-tail mixin-module fixture', + 'routes S -> App.Loggable and T -> Web.Loggable on the worker path (no cross-wire)', // #1982 follow-up: a nested mixin included by short name must not drop its // IMPLEMENTS edge. The fix (graphIdByTail fallback in emitRubyMixinEdges) is // registry-primary only; the legacy DAG does not use that bridge. diff --git a/gitnexus/test/integration/resolvers/ruby.test.ts b/gitnexus/test/integration/resolvers/ruby.test.ts index 763dfd3560..44bdcafd5a 100644 --- a/gitnexus/test/integration/resolvers/ruby.test.ts +++ b/gitnexus/test/integration/resolvers/ruby.test.ts @@ -1659,6 +1659,82 @@ describe('Ruby inline module-nested same-tail collision — worker path parity ( ); }); +// --------------------------------------------------------------------------- +// Same-tail NESTED mixin MODULE collision — distinct Trait nodes (issue #1991) +// +// `module App; module Loggable; class S; include Loggable; end; end` + +// `module Web; module Loggable; class T; include Loggable; end; end`. The +// structure phase never qualified `module` (Trait) node ids, so both Loggable +// modules collapsed onto one Trait:app.rb:Loggable node and the bare-name mixin +// reference cross-wired IMPLEMENTS (first-wins tail). Asserts two distinct Trait +// nodes and each class IMPLEMENTS its OWN module (positive target identity), not +// just dangle-free. The IMPLEMENTS routing is registry-primary. +// --------------------------------------------------------------------------- + +describe('Ruby same-tail nested mixin-module collision — distinct Trait nodes (issue #1991)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'ruby-nested-mixin-tail-collision'), + () => {}, + ); + }, 60000); + + it('materializes App.Loggable and Web.Loggable as two distinct Trait nodes', () => { + const qns = getNodesByLabelFull(result, 'Trait') + .map((n) => n.properties.qualifiedName) + .filter((q) => q === 'App.Loggable' || q === 'Web.Loggable') + .sort(); + expect(qns).toEqual(['App.Loggable', 'Web.Loggable']); + }); + + pit('routes S -> App.Loggable and T -> Web.Loggable (no cross-wire, R2)', () => { + expect(findDanglingEdges(result, ['IMPLEMENTS', 'HAS_METHOD'])).toEqual([]); + const impl = getRelationships(result, 'IMPLEMENTS'); + const targetQnOf = (className: string) => { + const e = impl.find((x) => x.source === className && x.target === 'Loggable'); + expect(e, `IMPLEMENTS from ${className}`).toBeDefined(); + return result.graph.getNode(e!.rel.targetId)?.properties.qualifiedName; + }; + expect(targetQnOf('S')).toBe('App.Loggable'); + expect(targetQnOf('T')).toBe('Web.Loggable'); + expect(impl.filter((x) => x.source === 'S')).toHaveLength(1); + expect(impl.filter((x) => x.source === 'T')).toHaveLength(1); + }); +}); + +// Same fixture through the WORKER pool — the __heritage__ marker owner + the +// qualified module node id must survive worker serialization (#1991 R2/R15). +describe('Ruby same-tail nested mixin-module collision — worker path parity (issue #1991)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'ruby-nested-mixin-tail-collision'), + () => {}, + { workerThresholdsForTest: { minFiles: 1, minBytes: 1 }, workerPoolSize: 2 }, + ); + }, 120000); + + pit('genuinely used the worker pool for the same-tail mixin-module fixture', () => { + expect(result.usedWorkerPool).toBe(true); + }); + + pit('routes S -> App.Loggable and T -> Web.Loggable on the worker path (no cross-wire)', () => { + const impl = getRelationships(result, 'IMPLEMENTS'); + const targetQnOf = (className: string) => { + const e = impl.find((x) => x.source === className && x.target === 'Loggable'); + expect(e, `IMPLEMENTS from ${className}`).toBeDefined(); + return result.graph.getNode(e!.rel.targetId)?.properties.qualifiedName; + }; + expect(targetQnOf('S')).toBe('App.Loggable'); + expect(targetQnOf('T')).toBe('Web.Loggable'); + expect(impl.filter((x) => x.source === 'S')).toHaveLength(1); + expect(impl.filter((x) => x.source === 'T')).toHaveLength(1); + }); +}); + // --------------------------------------------------------------------------- // Nested mixin included by SHORT name — IMPLEMENTS edge must not drop (#1982). //