diff --git a/gitnexus-shared/src/scope-resolution/symbol-definition.ts b/gitnexus-shared/src/scope-resolution/symbol-definition.ts index d1b30abcce..06814db3c2 100644 --- a/gitnexus-shared/src/scope-resolution/symbol-definition.ts +++ b/gitnexus-shared/src/scope-resolution/symbol-definition.ts @@ -59,4 +59,13 @@ export interface SymbolDefinition { isExplicit?: boolean; /** Links Method/Constructor/Property to owning Class/Struct/Trait nodeId */ ownerId?: string; + /** #1982/#1993: bridge-held enclosing-namespace path (e.g. `NS1`, `Outer.Inner`) + * tagged during the C++ resolution phase. Lets the graph bridge retry a + * namespace-prefixed node-lookup key and lets the qualified-base resolver + * break same-tail cross-namespace inheritance ties. A deliberate sidecar, + * separate from `qualifiedName`: it does NOT participate in graph node + * identity (node keys derive from filePath/type/qualifiedName) and leaves the + * qualifiedName-keyed resolution index untouched. Absent for the common case + * (non-namespace-nested defs and all non-C++ languages). */ + namespacePrefix?: string; } diff --git a/gitnexus/src/core/ingestion/class-extractors/configs/c-cpp.ts b/gitnexus/src/core/ingestion/class-extractors/configs/c-cpp.ts index d39888aa14..41a4483066 100644 --- a/gitnexus/src/core/ingestion/class-extractors/configs/c-cpp.ts +++ b/gitnexus/src/core/ingestion/class-extractors/configs/c-cpp.ts @@ -45,10 +45,33 @@ export const cClassConfig: ClassExtractionConfig = { export const cppClassConfig: ClassExtractionConfig = { language: SupportedLanguages.CPlusPlus, typeDeclarationNodes: ['class_specifier', 'struct_specifier', 'enum_specifier'], - ancestorScopeNodeTypes: ['namespace_definition', 'class_specifier', 'struct_specifier'], + // #1995: `union_specifier` is included so a type nested in a NAMED union + // (`union U1 { struct Inner {...} }`) qualifies as `U1.Inner`. Anonymous unions + // have no `name` child → extractScopeSegmentsFromNode returns [] → they correctly + // contribute nothing (members inject into the enclosing scope). C uses the + // separate cClassConfig (no qualifiedNodeId), so it is intentionally untouched. + ancestorScopeNodeTypes: [ + 'namespace_definition', + 'class_specifier', + 'struct_specifier', + 'union_specifier', + ], // #1978: key nested-type nodes by their fully-qualified path (Outer.Inner) so // same-tail nested types in one TU stay distinct instead of silently merging. qualifiedNodeId: true, + // #1995: anonymous namespaces have no `name` child, so the generic scope walker + // drops them (empty segment) and two `namespace { struct Inner {} }` blocks in one + // TU collapse onto a single `Inner` node. Give each anonymous namespace_definition + // a deterministic per-block discriminator (its start byte — stable across the + // sequential and worker full-file parses) so the nested types stay distinct. + // Returning `undefined` for every other scope — named namespaces (incl. `inline + // namespace`), classes, structs, named unions — falls through to the default + // name-based extraction, leaving them unchanged. Anonymous UNIONS are not matched + // here (members inject into the enclosing scope), so they keep yielding []. + extractScopeSegments: (node) => + node.type === 'namespace_definition' && !node.childForFieldName?.('name') + ? [`@anon${node.startIndex}`] + : undefined, extractName: (node) => { const nameNode = node.childForFieldName?.('name'); if (!nameNode) return undefined; diff --git a/gitnexus/src/core/ingestion/class-extractors/generic.ts b/gitnexus/src/core/ingestion/class-extractors/generic.ts index 6cfc7a3c08..39e060e008 100644 --- a/gitnexus/src/core/ingestion/class-extractors/generic.ts +++ b/gitnexus/src/core/ingestion/class-extractors/generic.ts @@ -164,6 +164,14 @@ export function createClassExtractor(config: ClassExtractionConfig): ClassExtrac return extract(node, { name: simpleName })?.qualifiedName ?? null; }, + // #1991: qualify a non-typeDeclaration scope node (e.g. a Ruby `module` → Trait) + // by the same ancestor-scope walk the node-id path uses, so two same-tail nested + // mixin modules stay distinct. extract()/extractQualifiedName cannot be reused — + // they bail on non-typeDeclarations (a module is not in typeDeclarationNodes). + qualifyScopeName(node: SyntaxNode, simpleName: string): string { + return buildQualifiedName(node, simpleName); + }, + shouldSkipClassCapture(context): boolean { return config.shouldSkipClassCapture?.(context) ?? false; }, diff --git a/gitnexus/src/core/ingestion/class-types.ts b/gitnexus/src/core/ingestion/class-types.ts index 2e3d1f6886..f71fa55316 100644 --- a/gitnexus/src/core/ingestion/class-types.ts +++ b/gitnexus/src/core/ingestion/class-types.ts @@ -44,6 +44,14 @@ export interface ClassExtractor { }, ): ExtractedClassSymbol | null; extractQualifiedName(node: SyntaxNode, simpleName: string): string | null; + /** + * #1991: qualify a scope-defining node that maps to a class-like registry label + * (e.g. a Ruby `module` → Trait) but is NOT a typeDeclaration, so it cannot go + * through extract()/extractQualifiedName (which bail on non-typeDeclarations). + * Walks the same ancestor scopes as the node-id path. Optional — only providers + * that materialize such nodes implement it. + */ + qualifyScopeName?(node: SyntaxNode, simpleName: string): string; shouldSkipClassCapture?( context: ClassCaptureContext & { nodeLabel: ClassLikeNodeLabel }, ): boolean; diff --git a/gitnexus/src/core/ingestion/languages/dart/captures.ts b/gitnexus/src/core/ingestion/languages/dart/captures.ts index e10b1cc375..21d14c7b43 100644 --- a/gitnexus/src/core/ingestion/languages/dart/captures.ts +++ b/gitnexus/src/core/ingestion/languages/dart/captures.ts @@ -42,7 +42,7 @@ import { getDartParser, getDartScopeQuery } from './query.js'; import { recordCacheHit, recordCacheMiss } from './cache-stats.js'; import { getTreeSitterBufferSize } from '../../constants.js'; import { parseSourceSafe } from '../../../tree-sitter/safe-parse.js'; -import { DART_HERITAGE_PREFIX } from './interpret.js'; +import { encodeMarker } from '../../utils/heritage-marker.js'; import { DART_BUILT_INS } from './built-ins.js'; const FUNCTION_DECL_TAGS = [ @@ -491,7 +491,7 @@ function emitHeritageMarkers( for (let i = 0; i < container.namedChildCount; i++) { const c = container.namedChild(i); if (c === null || c.type !== 'type_identifier') continue; - const payload = `${DART_HERITAGE_PREFIX}${kind}:${c.text}:${className}`; + const payload = encodeMarker('heritage', [kind, c.text, className]); out.push({ '@import.heritage': syntheticCapture('@import.heritage', c, payload) }); } } diff --git a/gitnexus/src/core/ingestion/languages/dart/interpret.ts b/gitnexus/src/core/ingestion/languages/dart/interpret.ts index 529be365e2..7fe8fb02d2 100644 --- a/gitnexus/src/core/ingestion/languages/dart/interpret.ts +++ b/gitnexus/src/core/ingestion/languages/dart/interpret.ts @@ -15,10 +15,13 @@ */ import type { CaptureMatch, ParsedImport, ParsedTypeBinding, TypeRef } from 'gitnexus-shared'; +import { HERITAGE_MARKER_PREFIX } from '../../utils/heritage-marker.js'; /** Marker prefix carried on a side-effect `ParsedImport.targetRaw` for - * `implements`/`with` heritage, consumed by `emitDartHeritageEdges`. */ -export const DART_HERITAGE_PREFIX = '__heritage__:'; + * `implements`/`with` heritage, consumed by `emitDartHeritageEdges`. Aliased to + * the shared codec prefix (#1994) so the Dart wire prefix has a single source of + * truth and cannot desync from `encodeMarker`/`decodeMarker`. */ +export const DART_HERITAGE_PREFIX = HERITAGE_MARKER_PREFIX; function stripQuotes(s: string): string { return s.replace(/^['"]|['"]$/g, ''); diff --git a/gitnexus/src/core/ingestion/languages/dart/scope-resolver.ts b/gitnexus/src/core/ingestion/languages/dart/scope-resolver.ts index c705738100..22e1171d12 100644 --- a/gitnexus/src/core/ingestion/languages/dart/scope-resolver.ts +++ b/gitnexus/src/core/ingestion/languages/dart/scope-resolver.ts @@ -36,12 +36,8 @@ import type { KnowledgeGraph } from '../../../graph/types.js'; import type { ScopeResolver } from '../../scope-resolution/contract/scope-resolver.js'; import { generateId } from '../../../../lib/utils.js'; import { dartProvider } from '../dart.js'; -import { - dartArityCompatibility, - dartMergeBindings, - resolveDartImportTarget, - DART_HERITAGE_PREFIX, -} from './index.js'; +import { dartArityCompatibility, dartMergeBindings, resolveDartImportTarget } from './index.js'; +import { decodeMarker } from '../../utils/heritage-marker.js'; import { expandDartWildcardNames } from './expand-wildcards.js'; interface ClassDefRef { @@ -109,8 +105,10 @@ function emitDartHeritageEdges( for (const parsed of parsedFiles) { for (const imp of parsed.parsedImports) { const raw = imp.targetRaw; - if (typeof raw !== 'string' || !raw.startsWith(DART_HERITAGE_PREFIX)) continue; - const parts = raw.slice(DART_HERITAGE_PREFIX.length).split(':'); + if (typeof raw !== 'string') continue; + const decoded = decodeMarker(raw); + if (decoded?.kind !== 'heritage') continue; + const parts = decoded.fields; if (parts.length < 3) continue; const [kind, baseName, childName] = parts; const childId = pickClassByName(childName!, parsed.filePath, defsByName); diff --git a/gitnexus/src/core/ingestion/languages/ruby/captures.ts b/gitnexus/src/core/ingestion/languages/ruby/captures.ts index 7f5d98f91e..c072b84d86 100644 --- a/gitnexus/src/core/ingestion/languages/ruby/captures.ts +++ b/gitnexus/src/core/ingestion/languages/ruby/captures.ts @@ -13,6 +13,7 @@ import { synthesizeRubyReceiverBinding, findEnclosingClassOrModule } from './rec import { getTreeSitterBufferSize } from '../../constants.js'; import { parseSourceSafe } from '../../../tree-sitter/safe-parse.js'; import { splitQualifiedName } from '../../utils/qualified-name.js'; +import { encodeMarker } from '../../utils/heritage-marker.js'; const FUNCTION_NODE_TYPES = ['method', 'singleton_method'] as const; const HERITAGE_CALL_NAMES: ReadonlySet = new Set(['include', 'extend', 'prepend']); @@ -193,7 +194,7 @@ export function emitRubyScopeCaptures( '@import.source': syntheticCapture( '@import.source', callNode, - `__heritage__:${callName}:${mixinName}:${ownerName}`, + encodeMarker('heritage', [callName, mixinName, ownerName]), ), '@import.name': syntheticCapture('@import.name', callNode, mixinName), }); @@ -227,7 +228,7 @@ export function emitRubyScopeCaptures( '@import.source': syntheticCapture( '@import.source', callNode, - `__property__:${callName}:${propName}:${ownerName}`, + encodeMarker('property', [callName, propName, ownerName]), ), '@import.name': syntheticCapture('@import.name', callNode, propName), }); diff --git a/gitnexus/src/core/ingestion/languages/ruby/import-target.ts b/gitnexus/src/core/ingestion/languages/ruby/import-target.ts index a8dcd323c6..a31f75febc 100644 --- a/gitnexus/src/core/ingestion/languages/ruby/import-target.ts +++ b/gitnexus/src/core/ingestion/languages/ruby/import-target.ts @@ -9,6 +9,7 @@ import { resolveRubyImportInternal } from '../../import-resolvers/ruby.js'; import { buildSuffixIndex } from '../../import-resolvers/utils.js'; +import { isHeritageMarker } from '../../utils/heritage-marker.js'; export interface RubyResolveContext { readonly fromFile: string; @@ -37,7 +38,7 @@ export function resolveRubyImportTarget( _resolutionConfig?: unknown, ): string | readonly string[] | null { if (!targetRaw) return null; - if (targetRaw.startsWith('__heritage__:') || targetRaw.startsWith('__property__:')) return null; + if (isHeritageMarker(targetRaw)) return null; const fromNormalized = fromFile.replace(/\\/g, '/'); const fromDir = fromNormalized.includes('/') diff --git a/gitnexus/src/core/ingestion/languages/ruby/interpret.ts b/gitnexus/src/core/ingestion/languages/ruby/interpret.ts index 2cee5a1d2f..e329e19f45 100644 --- a/gitnexus/src/core/ingestion/languages/ruby/interpret.ts +++ b/gitnexus/src/core/ingestion/languages/ruby/interpret.ts @@ -1,4 +1,5 @@ import type { CaptureMatch, ParsedImport, ParsedTypeBinding, TypeRef } from 'gitnexus-shared'; +import { isHeritageMarker } from '../../utils/heritage-marker.js'; // ─── interpretImport ────────────────────────────────────────────────────── @@ -21,7 +22,7 @@ export function interpretRubyImport(captures: CaptureMatch): ParsedImport | null // Heritage-encoded imports (__heritage__:include:Serializable:User) // are stored as namespace imports so emitHeritageEdges can read them. - if (source.startsWith('__heritage__:') || source.startsWith('__property__:')) { + if (isHeritageMarker(source)) { const name = captures['@import.name']?.text ?? source; return { kind: 'namespace', localName: name, importedName: name, targetRaw: source }; } diff --git a/gitnexus/src/core/ingestion/languages/ruby/scope-resolver.ts b/gitnexus/src/core/ingestion/languages/ruby/scope-resolver.ts index afefb0c9d3..07325e084c 100644 --- a/gitnexus/src/core/ingestion/languages/ruby/scope-resolver.ts +++ b/gitnexus/src/core/ingestion/languages/ruby/scope-resolver.ts @@ -9,9 +9,29 @@ import { resolveDefGraphId } from '../../scope-resolution/graph-bridge/ids.js'; import type { GraphNodeLookup } from '../../scope-resolution/graph-bridge/node-lookup.js'; import type { KnowledgeGraph } from '../../../graph/types.js'; import { generateId } from '../../../../lib/utils.js'; +import { decodeMarker } from '../../utils/heritage-marker.js'; -const HERITAGE_PREFIX = '__heritage__:'; -const PROPERTY_PREFIX = '__property__:'; +/** + * #1991: resolve a BARE mixin reference (`include Loggable`) to a nested module by + * the INCLUDING class's lexical scope — Ruby looks up a constant in the innermost + * enclosing scope first. For owner `App.S`, try `App.Loggable`, then walk outward. + * Returns undefined if no enclosing-scope-qualified module exists. + */ +function qualifyMixinByOwnerScope( + mixinName: string, + ownerName: string, + graphIdByName: ReadonlyMap, +): string | undefined { + let prefix = ownerName; + let dot = prefix.lastIndexOf('.'); + while (dot !== -1) { + prefix = prefix.slice(0, dot); + const g = graphIdByName.get(`${prefix}.${mixinName}`); + if (g !== undefined) return g; + dot = prefix.lastIndexOf('.'); + } + return undefined; +} function emitRubyMixinEdges( graph: KnowledgeGraph, @@ -19,15 +39,14 @@ function emitRubyMixinEdges( nodeLookup: GraphNodeLookup, ): void { const graphIdByName = new Map(); - // Secondary tail -> graphId map (first-wins). The `__heritage__` marker carries - // the mixin TARGET as the bare written name (`arg.text`, e.g. `Loggable`), not - // its full qualifiedName, so a nested mixin module included by its short name - // (`include Loggable` where it is `App::Loggable`) misses the full-qn map and - // its IMPLEMENTS edge is silently dropped (#1982 follow-up). The tail fallback - // recovers it. OWNER (`className`) lookups stay full-qn only, preserving - // same-tail owner disambiguation; only the under-qualified mixin reference - // falls back, and a genuine same-tail mixin tie there resolves first-wins. - const graphIdByTail = new Map(); + // Secondary tail -> graphId map. The `__heritage__` marker carries the mixin + // TARGET as the bare written name (`arg.text`, e.g. `Loggable`), not its full + // qualifiedName, so a nested mixin module included by its short name misses the + // full-qn map. We first resolve it lexically by the including class's enclosing + // scope (`qualifyMixinByOwnerScope`); this tail map is the last resort. A genuine + // same-tail collision is mapped to `null` so we REFUSE to guess (#1991) rather + // than the old first-wins, which cross-wired App::Loggable / Web::Loggable. + const graphIdByTail = new Map(); for (const parsed of parsedFiles) { for (const def of parsed.localDefs) { if (!isClassLike(def.type)) continue; @@ -43,7 +62,12 @@ function emitRubyMixinEdges( graphIdByName.set(fullName, graphId); const dot = fullName.lastIndexOf('.'); const tail = dot === -1 ? fullName : fullName.slice(dot + 1); - if (tail.length > 0 && !graphIdByTail.has(tail)) graphIdByTail.set(tail, graphId); + if (tail.length > 0) { + const existingTail = graphIdByTail.get(tail); + if (existingTail === undefined) graphIdByTail.set(tail, graphId); + else if (existingTail !== null && existingTail !== graphId) + graphIdByTail.set(tail, null); // same-tail collision — refuse to guess + } } } } @@ -59,14 +83,21 @@ function emitRubyMixinEdges( for (const parsed of parsedFiles) { for (const imp of parsed.parsedImports) { - if (!imp.targetRaw.startsWith(HERITAGE_PREFIX)) continue; - const parts = imp.targetRaw.slice(HERITAGE_PREFIX.length).split(':'); + const decoded = decodeMarker(imp.targetRaw); + if (decoded?.kind !== 'heritage') continue; + const parts = decoded.fields; if (parts.length < 3) continue; const [kind, mixinName, className] = parts; const classGraphId = graphIdByName.get(className!); - // Owner stays full-qn; the mixin target may be written by short name and - // miss the full-qn map, so fall back to the simple-tail map (#1982). - const mixinGraphId = graphIdByName.get(mixinName!) ?? graphIdByTail.get(mixinName!); + // Owner stays full-qn. The mixin target may be written by short name and miss + // the full-qn map; resolve it lexically by the including class's enclosing + // scope (`App::S` + `Loggable` -> `App::Loggable`), then fall back to the tail + // map ONLY when unambiguous — never first-wins on a collision (#1982/#1991). + const mixinGraphId = + graphIdByName.get(mixinName!) ?? + qualifyMixinByOwnerScope(mixinName!, className!, graphIdByName) ?? + graphIdByTail.get(mixinName!) ?? + undefined; if (classGraphId === undefined || mixinGraphId === undefined) continue; const edgeKey = `${classGraphId}->${mixinGraphId}:${kind}`; if (emitted.has(edgeKey)) continue; @@ -95,8 +126,9 @@ function emitRubyMixinEdges( for (const parsed of parsedFiles) { for (const imp of parsed.parsedImports) { - if (!imp.targetRaw.startsWith(PROPERTY_PREFIX)) continue; - const parts = imp.targetRaw.slice(PROPERTY_PREFIX.length).split(':'); + const decoded = decodeMarker(imp.targetRaw); + if (decoded?.kind !== 'property') continue; + const parts = decoded.fields; if (parts.length < 3) continue; const [_attrKind, propName, className] = parts; const classGraphId = graphIdByName.get(className!); diff --git a/gitnexus/src/core/ingestion/parsing-processor.ts b/gitnexus/src/core/ingestion/parsing-processor.ts index b2b7f10184..799fdd7e0f 100644 --- a/gitnexus/src/core/ingestion/parsing-processor.ts +++ b/gitnexus/src/core/ingestion/parsing-processor.ts @@ -18,6 +18,7 @@ import { findObjectLiteralBindingInfo, getLabelFromCaptures, isSuppressedConcreteTypedefDuplicate, + isQualifiableScopeLabel, qualifyRustImplTargetByModScope, CLASS_CONTAINER_TYPES, type SyntaxNode, @@ -617,7 +618,13 @@ const processParsingSequential = async ( const getQualifiedOwnerName = provider.classExtractor?.qualifiedNodeId === true ? (node: SyntaxNode, simpleName: string): string | null => - provider.classExtractor!.extractQualifiedName(node, simpleName) + // #1991: a Ruby `module` owner is not a typeDeclaration, so + // extractQualifiedName returns null; fall back to the scope walk so a + // method inside a nested module owns through the SAME qualified Trait + // id its node uses (App.Loggable), not a dangling bare id. + provider.classExtractor!.extractQualifiedName(node, simpleName) ?? + provider.classExtractor!.qualifyScopeName?.(node, simpleName) ?? + null : undefined; const enclosingClassInfo = needsOwner ? cachedFindEnclosingClassInfo( @@ -644,7 +651,16 @@ const processParsingSequential = async ( extractedClassSymbol?.qualifiedName ?? (classNodeForSymbol && provider.classExtractor?.isTypeDeclaration(classNodeForSymbol) ? (provider.classExtractor.extractQualifiedName(classNodeForSymbol, nodeName) ?? nodeName) - : undefined); + : // #1991: a Ruby `module` maps to Trait (class-like registry) but is not a + // typeDeclaration, so extractQualifiedName bails. Qualify it via the scope + // walk so two same-tail nested mixin modules get distinct ids. Gated on + // qualifiedNodeId, so languages without the flag are unaffected. + isQualifiableScopeLabel(nodeLabel) && + provider.classExtractor?.qualifiedNodeId === true && + classNodeForSymbol + ? (provider.classExtractor.qualifyScopeName?.(classNodeForSymbol, nodeName) ?? + undefined) + : undefined); // Qualify method/property IDs with enclosing class name to avoid collisions // e.g. "Method:animal.dart:Animal.speak" vs "Method:animal.dart:Dog.speak". @@ -667,7 +683,10 @@ const processParsingSequential = async ( const qualifiedName = rustImplQualifiedName !== undefined ? rustImplQualifiedName - : isClassLikeLabel && + : // #1991: include Trait so a Ruby mixin module's qualified scope id keys + // the node, mirroring the class-like path (qualifiedTypeName is computed + // for Trait above). + (isClassLikeLabel || isQualifiableScopeLabel(nodeLabel)) && provider.classExtractor?.qualifiedNodeId === true && qualifiedTypeName !== undefined ? qualifiedTypeName diff --git a/gitnexus/src/core/ingestion/scope-resolution/graph-bridge/ids.ts b/gitnexus/src/core/ingestion/scope-resolution/graph-bridge/ids.ts index 25927a579b..92f3f2d8b7 100644 --- a/gitnexus/src/core/ingestion/scope-resolution/graph-bridge/ids.ts +++ b/gitnexus/src/core/ingestion/scope-resolution/graph-bridge/ids.ts @@ -80,6 +80,8 @@ export function resolveDefGraphId( parameterTypeClasses?: readonly ParameterTypeClass[]; templateArguments?: readonly string[]; templateConstraints?: unknown; + /** #1982 bridge-held namespace path; see `SymbolDefinition.namespacePrefix`. */ + namespacePrefix?: string; }, nodeLookup: GraphNodeLookup, ): string | undefined { @@ -149,7 +151,7 @@ export function resolveDefGraphId( // namespace-prefixed key (tagged by `tagNamespacePrefixes`) BEFORE the // simple-name fallback, so same-tail nested bases don't collapse across // sibling namespace members via `simpleKey`. - const nsPrefix = (def as { namespacePrefix?: string }).namespacePrefix; + const nsPrefix = def.namespacePrefix; if (nsPrefix !== undefined && nsPrefix.length > 0) { const nsHit = nodeLookup.get(qualifiedKey(filePath, def.type, `${nsPrefix}.${qn}`)); if (nsHit !== undefined) return nsHit; diff --git a/gitnexus/src/core/ingestion/scope-resolution/scope/walkers.ts b/gitnexus/src/core/ingestion/scope-resolution/scope/walkers.ts index b594d4acfb..23cc0aa8ee 100644 --- a/gitnexus/src/core/ingestion/scope-resolution/scope/walkers.ts +++ b/gitnexus/src/core/ingestion/scope-resolution/scope/walkers.ts @@ -385,7 +385,27 @@ function resolveQualifiedInheritanceBase( } } if (count === 1) return unique; - if (count > 1) return undefined; // genuine tie at this key → refuse, don't guess + if (count > 1) { + // #1993: same-tail bases collide at this namespace-omitted key (`NS1::A::Inner` + // and `NS2::A::Inner` both key `A.Inner`). Break the tie with the bridge's + // `namespacePrefix` sidecar — prefer the candidate in the SAME enclosing + // namespace as the deriving class. Bridge-held: `def.qualifiedName` and the + // index keys are untouched; still refuse when the sidecar can't pick a unique. + const childPrefix = enclosingClassDef?.namespacePrefix; + if (childPrefix !== undefined && childPrefix.length > 0) { + let nsUnique: SymbolDefinition | undefined; + let nsCount = 0; + for (const id of ids) { + const def = scopes.defs.get(id); + if (def !== undefined && isClassLike(def.type) && def.namespacePrefix === childPrefix) { + nsUnique = def; + nsCount++; + } + } + if (nsCount === 1) return nsUnique; + } + return undefined; // genuine tie → refuse, don't guess + } } return undefined; } @@ -853,7 +873,32 @@ export function tagNamespacePrefixes(parsed: ParsedFile): void { const q = def.qualifiedName; if (q === undefined || q.length === 0) continue; if (q === prefix || q.startsWith(`${prefix}.`)) continue; // already namespaced - (def as { namespacePrefix?: string }).namespacePrefix = prefix; + def.namespacePrefix = prefix; + } + } + + // #1993: also tag defs declared DIRECTLY in a Namespace scope with that + // namespace's OWN full path. The loop above only reaches class-nested defs + // (`A::Inner`); a deriving class like `NS1::DA` lives in the namespace scope and + // is skipped, so it would carry no prefix and a same-tail cross-namespace base + // tie (`NS1::A::Inner` vs `NS2::A::Inner`) could not be broken by the deriving + // side. Composed identically to the class-nested path (enclosing tails + own + // tail) so the two agree; still sidecar-only (`qualifiedName` untouched). + for (const scope of parsed.scopes) { + if (scope.kind !== 'Namespace') continue; + const ownNsDef = scope.ownedDefs.find((d) => d.type === 'Namespace'); + const ownQ = ownNsDef?.qualifiedName; + if (ownQ === undefined || ownQ.length === 0) continue; + const ownTail = ownQ.slice(ownQ.lastIndexOf('.') + 1); + const parentPrefix = namespacePrefixOf(scope); + const fullPrefix = parentPrefix.length > 0 ? `${parentPrefix}.${ownTail}` : ownTail; + for (const def of scope.ownedDefs) { + if (def.type === 'Namespace') continue; + const q = def.qualifiedName; + if (q === undefined || q.length === 0) continue; + if (q === fullPrefix || q.startsWith(`${fullPrefix}.`)) continue; // already namespaced + if (def.namespacePrefix !== undefined) continue; + def.namespacePrefix = fullPrefix; } } } diff --git a/gitnexus/src/core/ingestion/utils/ast-helpers.ts b/gitnexus/src/core/ingestion/utils/ast-helpers.ts index 52d28169f7..0eeb246728 100644 --- a/gitnexus/src/core/ingestion/utils/ast-helpers.ts +++ b/gitnexus/src/core/ingestion/utils/ast-helpers.ts @@ -43,6 +43,18 @@ export const qualifyRustImplTargetByModScope = ( return [...modSegments, ...splitQualifiedName(rawTargetText)].filter(Boolean).join('.'); }; +/** + * #1991: scope-label predicate that single-sources the `nodeLabel === 'Trait'` + * checks in parsing-processor.ts / parse-worker.ts. A Ruby `module` maps to the + * `Trait` registry label but is NOT a typeDeclaration, so `extractQualifiedName` + * bails on it; these node labels are instead qualified via the scope walk + * (`qualifyScopeName`) so same-tail nested modules get distinct ids. Keeping the + * literal in one place stops the four hand-maintained copies (two each in the + * sequential and worker definition paths) from drifting apart. Pure predicate — + * value-identical to the inlined `nodeLabel === 'Trait'`. + */ +export const isQualifiableScopeLabel = (nodeLabel: string): boolean => nodeLabel === 'Trait'; + /** * Ordered list of definition capture keys for tree-sitter query matches. * Used to extract the definition node from a capture map. @@ -503,18 +515,50 @@ export const findEnclosingClassInfo = ( // different mods own through DISTINCT nodes. The Impl-node // materialization (parsing-processor / parse-worker) mirrors this, so // the owner id == the Impl node id byte-for-byte (#1982). - const firstType = children.find( - (c: SyntaxNode) => c.type === 'type_identifier' || c.type === 'scoped_type_identifier', + // - GENERIC (`impl Inner`, generic_type): the @definition.impl + // node is materialized only when the generic base is a bare + // `type_identifier` (tree-sitter-queries.ts), qualified the same way — + // so drill into the base and mirror that gate, keeping the owner id == + // the node id byte-for-byte (#1992). A generic over a SCOPED base + // (`impl a::Inner`) materializes NO node, so it must produce NO + // owner (the method orphans — scoped-generic deferred, #1992). + const implTarget = children.find( + (c: SyntaxNode) => + c.type === 'type_identifier' || + c.type === 'scoped_type_identifier' || + c.type === 'generic_type', ); - if (firstType) { - const ownerKey = - firstType.type === 'type_identifier' - ? qualifyRustImplTargetByModScope(current, firstType.text) - : firstType.text; - return { - classId: generateId('Impl', `${filePath}:${ownerKey}`), - className: firstType.text, - }; + if (implTarget) { + const baseType = + implTarget.type === 'generic_type' + ? (implTarget.childForFieldName?.('type') ?? null) + : implTarget; + if (baseType?.type === 'type_identifier') { + // Bare target (`impl Inner` or `impl Inner`): qualify by mod scope. + // #1992 follow-up: qualify `className` too (not just `classId`). The + // method node id is keyed `${className}.${name}`, so a bare tail collapses + // two same-tail bare impls that ALSO share a method name (`a::Inner::m` + + // `b::Inner::m` both → `Inner.m`) onto one Method node (graph addNode is + // first-write-wins). Qualifying className → `a.Inner.m` / `b.Inner.m` keeps + // them distinct. Symmetric: the call-resolution fallback rebuilds the same + // `${className}.${name}` from the same enclosing-impl walk, so def and call + // ids still agree. Owner edge anchors on `classId` (already qualified). + const qualified = qualifyRustImplTargetByModScope(current, baseType.text); + return { + classId: generateId('Impl', `${filePath}:${qualified}`), + className: qualified, + }; + } + if (baseType?.type === 'scoped_type_identifier' && implTarget.type !== 'generic_type') { + // Top-level scoped `impl a::Inner`: key by full raw text (#1975). + return { + classId: generateId('Impl', `${filePath}:${baseType.text}`), + className: baseType.text, + }; + } + // generic-over-scoped (`impl a::Inner`) and any other base: fall + // through with no owner — no @definition.impl node exists, so attributing + // a method to a synthesized id would orphan it against a phantom owner. } } diff --git a/gitnexus/src/core/ingestion/utils/heritage-marker.ts b/gitnexus/src/core/ingestion/utils/heritage-marker.ts new file mode 100644 index 0000000000..322244a39a --- /dev/null +++ b/gitnexus/src/core/ingestion/utils/heritage-marker.ts @@ -0,0 +1,58 @@ +/** + * #1994: shared codec for the synthetic `__heritage__:` / `__property__:` import + * markers used by the Ruby and Dart scope resolvers to carry side-effect facts + * (mixin includes, attr_accessor properties) through the import channel. Both + * languages share the exact `':'`-delimited wire format, so a single encode/decode + * pair removes the per-site hand-rolled string handling that produced the #1981 + * edge-drop. Language-NEUTRAL — keyed only on the literal prefixes; no provider + * branching belongs here. + */ +export type MarkerKind = 'heritage' | 'property'; + +const PREFIX_BY_KIND: Record = { + heritage: '__heritage__:', + property: '__property__:', +}; + +export const HERITAGE_MARKER_PREFIX = PREFIX_BY_KIND.heritage; +export const PROPERTY_MARKER_PREFIX = PREFIX_BY_KIND.property; + +/** + * Build a marker string `::...`. The `':'` delimiter IS the + * wire format, so a field that itself contains `':'` is structurally invalid and + * THROWS — callers must pre-normalize colon-bearing values (e.g. a qualified mixin + * arg `Outer::Mixin` → `Outer.Mixin`). This makes the #1981 silent edge-drop a + * loud failure instead. + */ +export function encodeMarker(kind: MarkerKind, fields: readonly string[]): string { + for (const field of fields) { + if (field.includes(':')) { + throw new Error( + `encodeMarker: field "${field}" contains the ':' delimiter; normalize it before encoding`, + ); + } + } + return PREFIX_BY_KIND[kind] + fields.join(':'); +} + +/** + * Parse a marker string back into its kind + positional fields, or `null` if `raw` + * is not a marker. Mirrors the historical `slice(PREFIX.length).split(':')`. + */ +export function decodeMarker(raw: string): { kind: MarkerKind; fields: string[] } | null { + if (raw.startsWith(PREFIX_BY_KIND.heritage)) { + return { kind: 'heritage', fields: raw.slice(PREFIX_BY_KIND.heritage.length).split(':') }; + } + if (raw.startsWith(PREFIX_BY_KIND.property)) { + return { kind: 'property', fields: raw.slice(PREFIX_BY_KIND.property.length).split(':') }; + } + return null; +} + +/** + * True if `raw` is a synthetic heritage/property marker — exactly the prior + * `startsWith('__heritage__:') || startsWith('__property__:')` pair. + */ +export function isHeritageMarker(raw: string): boolean { + return raw.startsWith(PREFIX_BY_KIND.heritage) || raw.startsWith(PREFIX_BY_KIND.property); +} diff --git a/gitnexus/src/core/ingestion/workers/parse-worker.ts b/gitnexus/src/core/ingestion/workers/parse-worker.ts index 2bb2ea0439..ac41979889 100644 --- a/gitnexus/src/core/ingestion/workers/parse-worker.ts +++ b/gitnexus/src/core/ingestion/workers/parse-worker.ts @@ -67,6 +67,7 @@ import { genericFuncName, inferFunctionLabel, isSuppressedConcreteTypedefDuplicate, + isQualifiableScopeLabel, qualifyRustImplTargetByModScope, CLASS_CONTAINER_TYPES, type SyntaxNode, @@ -1828,7 +1829,13 @@ const processFileGroup = ( const getQualifiedOwnerName = provider.classExtractor?.qualifiedNodeId === true ? (node: SyntaxNode, simpleName: string): string | null => - provider.classExtractor!.extractQualifiedName(node, simpleName) + // #1991: LOCKSTEP — a Ruby `module` owner is not a typeDeclaration, so + // extractQualifiedName returns null; fall back to the scope walk so a + // method inside a nested module owns through the SAME qualified Trait + // id its node uses on the worker path too. + provider.classExtractor!.extractQualifiedName(node, simpleName) ?? + provider.classExtractor!.qualifyScopeName?.(node, simpleName) ?? + null : undefined; const enclosingClassInfo = needsOwner ? cachedFindEnclosingClassInfo( @@ -1853,7 +1860,15 @@ const processFileGroup = ( extractedClassSymbol?.qualifiedName ?? (classNodeForSymbol && provider.classExtractor?.isTypeDeclaration(classNodeForSymbol) ? (provider.classExtractor.extractQualifiedName(classNodeForSymbol, nodeName) ?? nodeName) - : undefined); + : // #1991: LOCKSTEP with parsing-processor.ts — qualify a Ruby `module` + // (Trait) via the scope walk so same-tail nested mixin modules get + // distinct ids on the worker path too. Gated on qualifiedNodeId. + isQualifiableScopeLabel(nodeLabel) && + provider.classExtractor?.qualifiedNodeId === true && + classNodeForSymbol + ? (provider.classExtractor.qualifyScopeName?.(classNodeForSymbol, nodeName) ?? + undefined) + : undefined); // Qualify method/property IDs with enclosing class name to avoid collisions. // Class-like nodes use their own fully-qualified path as the id key when the @@ -1871,7 +1886,9 @@ const processFileGroup = ( const qualifiedName = rustImplQualifiedName !== undefined ? rustImplQualifiedName - : isClassLikeLabel && + : // #1991: LOCKSTEP — include Trait so a Ruby mixin module's qualified + // scope id keys the worker-path node, matching the sequential path. + (isClassLikeLabel || isQualifiableScopeLabel(nodeLabel)) && provider.classExtractor?.qualifiedNodeId === true && qualifiedTypeName !== undefined ? qualifiedTypeName diff --git a/gitnexus/test/fixtures/lang-resolution/cpp-anon-ns-tail-collision/main.cpp b/gitnexus/test/fixtures/lang-resolution/cpp-anon-ns-tail-collision/main.cpp new file mode 100644 index 0000000000..47250e0b42 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/cpp-anon-ns-tail-collision/main.cpp @@ -0,0 +1,17 @@ +// Same-tail structs in sibling ANONYMOUS namespaces (#1995). +// +// An anonymous `namespace { }` is a namespace_definition with no `name` child, so +// extractScopeSegmentsFromNode returns [] and both `Inner` structs qualified to the +// bare `Inner` and merged onto one node — from_anon_a / from_anon_b cross-wired. A +// deterministic per-block discriminator (derived from the namespace node's start +// byte) keeps the two blocks' types distinct. +namespace { +struct Inner { + void from_anon_a() {} +}; +} +namespace { +struct Inner { + void from_anon_b() {} +}; +} diff --git a/gitnexus/test/fixtures/lang-resolution/cpp-cross-namespace-same-tail/main.cpp b/gitnexus/test/fixtures/lang-resolution/cpp-cross-namespace-same-tail/main.cpp new file mode 100644 index 0000000000..f16b783bec --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/cpp-cross-namespace-same-tail/main.cpp @@ -0,0 +1,23 @@ +// Cross-namespace same-tail nested heritage (#1993). +// +// NS1::A::Inner and NS2::A::Inner are distinct nested types whose scope-model +// def.qualifiedName both drops the enclosing namespace and reads `A.Inner`. They +// collide in the qualifiedNames resolution index, so resolveQualifiedInheritanceBase +// hit refuse-on-tie and the scope-walk fallback first-won to NS1's Inner — DB +// CROSS-WIRED its EXTENDS to NS1::A::Inner (DA resolved correctly only by that +// first-wins luck). The cross-wire still lands on a real node, so findDanglingEdges +// stays blind to it. The `namespacePrefix` sidecar breaks the tie (bridge-held): +// DA's enclosing namespace NS1 selects NS1::A::Inner. +namespace NS1 { +struct A { + struct Inner {}; +}; +struct DA : A::Inner {}; +} // namespace NS1 + +namespace NS2 { +struct A { + struct Inner {}; +}; +struct DB : A::Inner {}; +} // namespace NS2 diff --git a/gitnexus/test/fixtures/lang-resolution/cpp-union-nested-tail-collision/main.cpp b/gitnexus/test/fixtures/lang-resolution/cpp-union-nested-tail-collision/main.cpp new file mode 100644 index 0000000000..063da384f8 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/cpp-union-nested-tail-collision/main.cpp @@ -0,0 +1,17 @@ +// Same-tail structs nested in sibling NAMED unions (#1995). +// +// `union_specifier` was omitted from cppClassConfig.ancestorScopeNodeTypes, so a +// struct nested in `union U1` and one nested in `union U2` both qualified to the +// bare `Inner` and merged onto ONE Struct:...:Inner node — from_u1 / from_u2 +// cross-wired (dangling:0 but wrong). With the union scope qualified they must +// materialize distinct `U1.Inner` / `U2.Inner` nodes. +union U1 { + struct Inner { + void from_u1() {} + }; +}; +union U2 { + struct Inner { + void from_u2() {} + }; +}; diff --git a/gitnexus/test/fixtures/lang-resolution/ruby-nested-mixin-tail-collision/app.rb b/gitnexus/test/fixtures/lang-resolution/ruby-nested-mixin-tail-collision/app.rb new file mode 100644 index 0000000000..8e709facad --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/ruby-nested-mixin-tail-collision/app.rb @@ -0,0 +1,25 @@ +# Two same-tail NESTED mixin modules (App::Loggable + Web::Loggable), each included +# by a sibling class in the same enclosing module (#1991). The structure phase never +# qualified `module` (Trait) node ids, so both collapsed onto one Trait:app.rb:Loggable +# node and the bare-name mixin reference cross-wired IMPLEMENTS (first-wins tail). +# Single-file on purpose: the bare node id embeds file.path, so a cross-file split +# would not collide. S must IMPLEMENTS App::Loggable only; T → Web::Loggable only. +module App + module Loggable + def log; end + end + + class S + include Loggable + end +end + +module Web + module Loggable + def warn; end + end + + class T + include Loggable + end +end diff --git a/gitnexus/test/fixtures/lang-resolution/rust-generic-impl-same-method-name/lib.rs b/gitnexus/test/fixtures/lang-resolution/rust-generic-impl-same-method-name/lib.rs new file mode 100644 index 0000000000..ea52dda5b6 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/rust-generic-impl-same-method-name/lib.rs @@ -0,0 +1,24 @@ +// #1992 follow-up (F3): two same-tail generic inherent impls that ALSO share a +// method name. Pre-fix the Method node id keys `${className}.${name}` with the +// BARE tail (`Inner.m`), so `a::Inner::m` and `b::Inner::m` collapse onto ONE +// Method node (graph addNode is first-write-wins). Both HAS_METHOD edges then +// point at the survivor, silently losing the second method. Qualifying +// `className` (`a.Inner` / `b.Inner`) keys them as `a.Inner.m` / `b.Inner.m`, so +// BOTH Method nodes survive and each owns through its own mod-qualified Impl node. +pub mod a { + pub struct Inner { + v: T, + } + impl Inner { + pub fn m(&self) {} + } +} + +pub mod b { + pub struct Inner { + v: T, + } + impl Inner { + pub fn m(&self) {} + } +} diff --git a/gitnexus/test/fixtures/lang-resolution/rust-nested-tail-collision-generic/lib.rs b/gitnexus/test/fixtures/lang-resolution/rust-nested-tail-collision-generic/lib.rs new file mode 100644 index 0000000000..2a1afe8230 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/rust-nested-tail-collision-generic/lib.rs @@ -0,0 +1,30 @@ +// #1992: GENERIC inherent-impl ownership. Two same-tail `Inner` types under +// sibling mods, each with a generic inherent impl `impl Inner`. Their +// methods must own through DISTINCT mod-qualified Impl nodes (`a.Inner` / +// `b.Inner`), not orphan to File. +pub mod a { + pub struct Inner { v: T } + impl Inner { + pub fn fa(&self) {} + } +} + +pub mod b { + pub struct Inner { v: T } + impl Inner { + pub fn fb(&self) {} + } +} + +// Scoped-generic inherent impl: `impl crate::c::Scoped` is a `generic_type` +// wrapping a `scoped_type_identifier`. tree-sitter-queries materializes NO +// @definition.impl node for this shape, so `fd` must stay orphaned (scoped-generic +// deferred, #1992) — the owner walk must NOT mint a phantom `c.Scoped` owner. +pub mod c { + pub struct Scoped { v: T } +} +pub mod d { + impl crate::c::Scoped { + pub fn fd(&self) {} + } +} diff --git a/gitnexus/test/fixtures/ruby-captures-golden/expected-captures.json b/gitnexus/test/fixtures/ruby-captures-golden/expected-captures.json index 72c44d05ad..0046dcb34c 100644 --- a/gitnexus/test/fixtures/ruby-captures-golden/expected-captures.json +++ b/gitnexus/test/fixtures/ruby-captures-golden/expected-captures.json @@ -211,6 +211,10 @@ "captureGroups": 11, "digest": "dfa494facc56b5e07a12befc77cd1e3788f0494f1373960cd9fe88715750e590" }, + "ruby-nested-mixin-tail-collision/app.rb": { + "captureGroups": 21, + "digest": "b42c38446b3e5307cd79eec888d5faf9d9683d79bacdd9738f64871d2a1e8bbc" + }, "ruby-nested-tail-collision/nested.rb": { "captureGroups": 31, "digest": "c48ebe5516a0faf50effbad0a19fe29be70c371b50ba9d6fa6ae3f6f708b3a4e" diff --git a/gitnexus/test/fixtures/rust-captures-golden/expected-captures.json b/gitnexus/test/fixtures/rust-captures-golden/expected-captures.json index e9a735553b..e74e2db387 100644 --- a/gitnexus/test/fixtures/rust-captures-golden/expected-captures.json +++ b/gitnexus/test/fixtures/rust-captures-golden/expected-captures.json @@ -319,6 +319,10 @@ "captureGroups": 18, "digest": "3326eb4f82b1559b6afec497dc52cab734e6f3209501a4bd982bf5eab9ec6dba" }, + "rust-nested-tail-collision-generic/lib.rs": { + "captureGroups": 29, + "digest": "1bfdaaf207a83924fc25d2eec0a47e5807754bbd0de499b81adea48342c6e687" + }, "rust-nested-tail-collision/lib.rs": { "captureGroups": 17, "digest": "2fc1fe1eb4e8727a89ab283ae34a0ae8df0c421551a7bd5e6e7ffb9d4aa54189" diff --git a/gitnexus/test/integration/resolvers/cpp.test.ts b/gitnexus/test/integration/resolvers/cpp.test.ts index 5b9dc58cad..a571778487 100644 --- a/gitnexus/test/integration/resolvers/cpp.test.ts +++ b/gitnexus/test/integration/resolvers/cpp.test.ts @@ -3915,6 +3915,150 @@ describe('C++ inline nested same-tail collision — worker path parity (issue #1 }); }); +// --------------------------------------------------------------------------- +// Named-union nested same-tail collision — distinct qualified nodes (issue #1995) +// +// `union U1 { struct Inner {...} }` + `union U2 { struct Inner {...} }` must +// materialize TWO distinct Struct nodes (qn U1.Inner / U2.Inner). `union_specifier` +// was missing from cppClassConfig.ancestorScopeNodeTypes, so both Inner structs +// qualified to the bare `Inner` and merged (dangling:0 but wrong). Mirrors the +// #1978 inline-collision template; positive owner-identity, not just dangle-free. +// --------------------------------------------------------------------------- + +describe('C++ named-union nested same-tail collision — distinct qualified nodes (issue #1995)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'cpp-union-nested-tail-collision'), + () => {}, + ); + }, 60000); + + it('materializes U1.Inner and U2.Inner as two distinct Struct nodes [#1995-union]', () => { + const qns = getNodesByLabelFull(result, 'Struct') + .map((n) => n.properties.qualifiedName) + .filter((q) => q === 'U1.Inner' || q === 'U2.Inner') + .sort(); + expect(qns).toEqual(['U1.Inner', 'U2.Inner']); + }); + + it('owns from_u1 / from_u2 through their OWN distinct node (positive identity) [#1995-union]', () => { + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + const hm = getRelationships(result, 'HAS_METHOD'); + const ownerQn = (target: string) => { + const e = hm.find((x) => x.target === target); + expect(e, `HAS_METHOD -> ${target}`).toBeDefined(); + return result.graph.getNode(e!.rel.sourceId)?.properties.qualifiedName; + }; + expect(ownerQn('from_u1')).toBe('U1.Inner'); + expect(ownerQn('from_u2')).toBe('U2.Inner'); + }); +}); + +// Worker-path parity for the named-union collision (parse-worker.ts must qualify +// the union scope byte-identically to the sequential parser). +describe('C++ named-union nested same-tail collision — worker path parity (issue #1995)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'cpp-union-nested-tail-collision'), + () => {}, + { workerThresholdsForTest: { minFiles: 1, minBytes: 1 }, workerPoolSize: 2 }, + ); + }, 120000); + + it('genuinely used the worker pool [#1995-union]', () => { + expect(result.usedWorkerPool).toBe(true); + }); + + it('materializes U1.Inner / U2.Inner and owns each method on the worker path [#1995-union]', () => { + const qns = getNodesByLabelFull(result, 'Struct') + .map((n) => n.properties.qualifiedName) + .filter((q) => q === 'U1.Inner' || q === 'U2.Inner') + .sort(); + expect(qns).toEqual(['U1.Inner', 'U2.Inner']); + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + const hm = getRelationships(result, 'HAS_METHOD'); + const ownerQn = (target: string) => + result.graph.getNode(hm.find((x) => x.target === target)!.rel.sourceId)?.properties + .qualifiedName; + expect(ownerQn('from_u1')).toBe('U1.Inner'); + expect(ownerQn('from_u2')).toBe('U2.Inner'); + }); +}); + +// --------------------------------------------------------------------------- +// Anonymous-namespace nested same-tail collision — distinct nodes (issue #1995) +// +// Two `namespace { struct Inner {...} }` blocks must materialize TWO distinct +// Struct nodes. An anonymous namespace_definition has no `name` child, so both +// Inner structs qualified to the bare `Inner` and merged. A C++ extractScopeSegments +// override gives each anon block a deterministic start-byte discriminator. The +// discriminator value is not portable, so assert on node DISTINCTNESS (count==2 / +// distinct owner ids), never a literal qualifiedName. +// --------------------------------------------------------------------------- + +describe('C++ anonymous-namespace nested same-tail collision — distinct nodes (issue #1995)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo(path.join(FIXTURES, 'cpp-anon-ns-tail-collision'), () => {}); + }, 60000); + + it('materializes two distinct Struct Inner nodes (one per anon namespace) [#1995-anon]', () => { + const innerQns = getNodesByLabelFull(result, 'Struct') + .map((n) => n.properties.qualifiedName) + .filter((q): q is string => typeof q === 'string' && q.endsWith('Inner')); + // Start-byte discriminator → assert DISTINCTNESS, not a literal value. Pre-fix + // both Inner structs merge onto one bare `Inner` node (set size 1). + expect(new Set(innerQns).size).toBe(2); + }); + + it('owns from_anon_a / from_anon_b through DISTINCT nodes (no merge) [#1995-anon]', () => { + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + const hm = getRelationships(result, 'HAS_METHOD'); + const a = hm.find((x) => x.target === 'from_anon_a'); + const b = hm.find((x) => x.target === 'from_anon_b'); + expect(a, 'HAS_METHOD -> from_anon_a').toBeDefined(); + expect(b, 'HAS_METHOD -> from_anon_b').toBeDefined(); + expect(a!.rel.sourceId).not.toBe(b!.rel.sourceId); + }); +}); + +// Worker-path parity for the anonymous-namespace collision: the start-byte +// discriminator must be deterministic across the worker's full-file parse. +describe('C++ anonymous-namespace nested same-tail collision — worker path parity (issue #1995)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'cpp-anon-ns-tail-collision'), + () => {}, + { workerThresholdsForTest: { minFiles: 1, minBytes: 1 }, workerPoolSize: 2 }, + ); + }, 120000); + + it('genuinely used the worker pool [#1995-anon]', () => { + expect(result.usedWorkerPool).toBe(true); + }); + + it('materializes two distinct anon Inner nodes and owns each method on the worker path [#1995-anon]', () => { + const innerQns = getNodesByLabelFull(result, 'Struct') + .map((n) => n.properties.qualifiedName) + .filter((q): q is string => typeof q === 'string' && q.endsWith('Inner')); + expect(new Set(innerQns).size).toBe(2); + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + const hm = getRelationships(result, 'HAS_METHOD'); + const a = hm.find((x) => x.target === 'from_anon_a'); + const b = hm.find((x) => x.target === 'from_anon_b'); + expect(a, 'HAS_METHOD -> from_anon_a').toBeDefined(); + expect(b, 'HAS_METHOD -> from_anon_b').toBeDefined(); + expect(a!.rel.sourceId).not.toBe(b!.rel.sourceId); + }); +}); + // --------------------------------------------------------------------------- // Inline nested same-tail HERITAGE — qualified base resolution (issue #1982) // @@ -4044,6 +4188,73 @@ describe('C++ namespaced same-tail nested heritage — worker path parity (issue // `A.Inner` key is tried → the global type. Registry-primary only. // --------------------------------------------------------------------------- +// --------------------------------------------------------------------------- +// Cross-namespace same-tail nested heritage — bridge-held tie-break (issue #1993) +// +// NS1::A::Inner and NS2::A::Inner both key the namespace-omitted `A.Inner` in the +// qualifiedNames index, so resolveQualifiedInheritanceBase refused-on-tie and the +// scope-walk fallback first-wins to NS1's Inner — DB CROSS-WIRES its EXTENDS to +// NS1::A::Inner (DA resolves correctly only by that first-wins luck). The cross-wire +// still resolves to a real node, so findDanglingEdges can't catch it, and the #1982 +// bridge can't reach it either (it rescues the structure-phase node lookup, not the +// resolution-index tie). The `namespacePrefix` sidecar breaks the tie: DA's enclosing +// namespace NS1 selects NS1::A::Inner. Bridge-held — def.qualifiedName and the index +// keys are unchanged. Registry-primary only (the qualified-base resolver is the bridge). +// --------------------------------------------------------------------------- + +describe('C++ cross-namespace same-tail nested heritage — bridge-held tie-break (issue #1993)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'cpp-cross-namespace-same-tail'), + () => {}, + ); + }, 60000); + + it('routes NS1.DA EXTENDS NS1.A.Inner and NS2.DB EXTENDS NS2.A.Inner (no cross-ns tie)', () => { + const extendsEdges = getRelationships(result, 'EXTENDS'); + const baseQnOf = (derivedQn: string) => { + const e = extendsEdges.find( + (x) => result.graph.getNode(x.rel.sourceId)?.properties.qualifiedName === derivedQn, + ); + expect(e, `EXTENDS from ${derivedQn}`).toBeDefined(); + return result.graph.getNode(e!.rel.targetId)?.properties.qualifiedName; + }; + expect(baseQnOf('NS1.DA')).toBe('NS1.A.Inner'); + expect(baseQnOf('NS2.DB')).toBe('NS2.A.Inner'); + }); +}); + +describe('C++ cross-namespace same-tail nested heritage — worker path parity (issue #1993)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'cpp-cross-namespace-same-tail'), + () => {}, + { workerThresholdsForTest: { minFiles: 1, minBytes: 1 }, workerPoolSize: 2 }, + ); + }, 120000); + + it('genuinely used the worker pool for the cross-namespace fixture', () => { + expect(result.usedWorkerPool).toBe(true); + }); + + it('routes NS1.DA / NS2.DB to their own namespaced base on the worker path (no cross-ns tie)', () => { + const extendsEdges = getRelationships(result, 'EXTENDS'); + const baseQnOf = (derivedQn: string) => { + const e = extendsEdges.find( + (x) => result.graph.getNode(x.rel.sourceId)?.properties.qualifiedName === derivedQn, + ); + expect(e, `EXTENDS from ${derivedQn} (worker)`).toBeDefined(); + return result.graph.getNode(e!.rel.targetId)?.properties.qualifiedName; + }; + expect(baseQnOf('NS1.DA')).toBe('NS1.A.Inner'); + expect(baseQnOf('NS2.DB')).toBe('NS2.A.Inner'); + }); +}); + describe('C++ root-anchored base ignores enclosing-relative type (issue #1982)', () => { let result: PipelineResult; diff --git a/gitnexus/test/integration/resolvers/helpers.ts b/gitnexus/test/integration/resolvers/helpers.ts index ae07af5012..8492d2c48a 100644 --- a/gitnexus/test/integration/resolvers/helpers.ts +++ b/gitnexus/test/integration/resolvers/helpers.ts @@ -288,6 +288,12 @@ const LEGACY_RESOLVER_PARITY_EXPECTED_FAILURES: Readonly App.Loggable and T -> Web.Loggable (no cross-wire, R2)', + 'genuinely used the worker pool for the same-tail mixin-module fixture', + 'routes S -> App.Loggable and T -> Web.Loggable on the worker path (no cross-wire)', // #1982 follow-up: a nested mixin included by short name must not drop its // IMPLEMENTS edge. The fix (graphIdByTail fallback in emitRubyMixinEdges) is // registry-primary only; the legacy DAG does not use that bridge. @@ -548,6 +554,13 @@ const LEGACY_RESOLVER_PARITY_EXPECTED_FAILURES: Readonly { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'ruby-nested-mixin-tail-collision'), + () => {}, + ); + }, 60000); + + it('materializes App.Loggable and Web.Loggable as two distinct Trait nodes', () => { + const qns = getNodesByLabelFull(result, 'Trait') + .map((n) => n.properties.qualifiedName) + .filter((q) => q === 'App.Loggable' || q === 'Web.Loggable') + .sort(); + expect(qns).toEqual(['App.Loggable', 'Web.Loggable']); + }); + + pit('routes S -> App.Loggable and T -> Web.Loggable (no cross-wire, R2)', () => { + expect(findDanglingEdges(result, ['IMPLEMENTS', 'HAS_METHOD'])).toEqual([]); + const impl = getRelationships(result, 'IMPLEMENTS'); + const targetQnOf = (className: string) => { + const e = impl.find((x) => x.source === className && x.target === 'Loggable'); + expect(e, `IMPLEMENTS from ${className}`).toBeDefined(); + return result.graph.getNode(e!.rel.targetId)?.properties.qualifiedName; + }; + expect(targetQnOf('S')).toBe('App.Loggable'); + expect(targetQnOf('T')).toBe('Web.Loggable'); + expect(impl.filter((x) => x.source === 'S')).toHaveLength(1); + expect(impl.filter((x) => x.source === 'T')).toHaveLength(1); + }); +}); + +// Same fixture through the WORKER pool — the __heritage__ marker owner + the +// qualified module node id must survive worker serialization (#1991 R2/R15). +describe('Ruby same-tail nested mixin-module collision — worker path parity (issue #1991)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'ruby-nested-mixin-tail-collision'), + () => {}, + { workerThresholdsForTest: { minFiles: 1, minBytes: 1 }, workerPoolSize: 2 }, + ); + }, 120000); + + pit('genuinely used the worker pool for the same-tail mixin-module fixture', () => { + expect(result.usedWorkerPool).toBe(true); + }); + + pit('routes S -> App.Loggable and T -> Web.Loggable on the worker path (no cross-wire)', () => { + const impl = getRelationships(result, 'IMPLEMENTS'); + const targetQnOf = (className: string) => { + const e = impl.find((x) => x.source === className && x.target === 'Loggable'); + expect(e, `IMPLEMENTS from ${className}`).toBeDefined(); + return result.graph.getNode(e!.rel.targetId)?.properties.qualifiedName; + }; + expect(targetQnOf('S')).toBe('App.Loggable'); + expect(targetQnOf('T')).toBe('Web.Loggable'); + expect(impl.filter((x) => x.source === 'S')).toHaveLength(1); + expect(impl.filter((x) => x.source === 'T')).toHaveLength(1); + }); +}); + // --------------------------------------------------------------------------- // Nested mixin included by SHORT name — IMPLEMENTS edge must not drop (#1982). // diff --git a/gitnexus/test/integration/resolvers/rust.test.ts b/gitnexus/test/integration/resolvers/rust.test.ts index 8a99119a09..750940a906 100644 --- a/gitnexus/test/integration/resolvers/rust.test.ts +++ b/gitnexus/test/integration/resolvers/rust.test.ts @@ -2102,6 +2102,164 @@ describe('Rust inline mod-nested same-tail collision — distinct nodes (issue # }); }); +// --------------------------------------------------------------------------- +// #1992: GENERIC inherent-impl ownership — `impl Inner` methods own through +// the mod-qualified Impl node, not orphaned to File. +// +// PR #1981 / `bc4a560d` qualified the UNSCOPED bare `impl Inner` target. A GENERIC +// inherent-impl target (`impl Inner`) is a `generic_type` node, which the +// inherent-impl owner walk (ast-helpers `findEnclosingClassInfo`) did not match — +// so the walk returned null and the method got `File -> DEFINES` with NO HAS_METHOD +// (orphaned; invisible to findDanglingEdges). The Impl NODE was already correctly +// mod-qualified (the @name capture drills into the inner type_identifier, +// tree-sitter-queries.ts), so the fix is owner-walk-only and the owner id == the +// node id (`a.Inner` / `b.Inner`) by construction. Holds on both resolver legs +// (structure-phase). +// --------------------------------------------------------------------------- + +describe('Rust generic inherent-impl same-tail ownership — distinct nodes (issue #1992)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'rust-nested-tail-collision-generic'), + () => {}, + ); + }, 60000); + + it('owns fa / fb through distinct mod-qualified Impl nodes (generic impl, no orphan)', () => { + const hm = getRelationships(result, 'HAS_METHOD'); + const a = hm.find((e) => e.target === 'fa'); + const b = hm.find((e) => e.target === 'fb'); + // Pre-fix the generic-impl owner walk returns null, so fa/fb orphan to File + // (File -> DEFINES, no HAS_METHOD) — toBeDefined() fails on the pre-fix base. + expect(a, 'HAS_METHOD -> fa').toBeDefined(); + expect(b, 'HAS_METHOD -> fb').toBeDefined(); + // Owner id is the mod-qualified Impl node, byte-identical to the node id. + expect(a!.rel.sourceId).not.toBe(b!.rel.sourceId); + expect(a!.rel.sourceId).toContain('a.Inner'); + expect(b!.rel.sourceId).toContain('b.Inner'); + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + }); + + // R6: scoped-generic `impl crate::c::Scoped` materializes no Impl node, so + // `fd` must NOT own through a phantom `c.Scoped` node — it stays orphaned + // (deferred). Guards against the owner walk minting an owner id for an + // unmaterialized node. + it('does not mint a phantom owner for a scoped-generic impl (fd orphaned, deferred)', () => { + const hm = getRelationships(result, 'HAS_METHOD'); + expect(hm.find((e) => e.target === 'fd')).toBeUndefined(); + }); +}); + +// Same fixture forced through the WORKER pool (parse-worker.ts). The inherent-impl +// owner walk is shared structure-phase logic, so generic-impl ownership must hold +// on BOTH the sequential and worker paths. +describe('Rust generic inherent-impl ownership — worker path parity (issue #1992)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'rust-nested-tail-collision-generic'), + () => {}, + { workerThresholdsForTest: { minFiles: 1, minBytes: 1 }, workerPoolSize: 2 }, + ); + }, 120000); + + it('genuinely used the worker pool', () => { + expect(result.usedWorkerPool).toBe(true); + }); + + it('owns fa / fb through distinct mod-qualified Impl nodes on the worker path', () => { + const hm = getRelationships(result, 'HAS_METHOD'); + const a = hm.find((e) => e.target === 'fa'); + const b = hm.find((e) => e.target === 'fb'); + expect(a, 'HAS_METHOD -> fa').toBeDefined(); + expect(b, 'HAS_METHOD -> fb').toBeDefined(); + expect(a!.rel.sourceId).not.toBe(b!.rel.sourceId); + expect(a!.rel.sourceId).toContain('a.Inner'); + expect(b!.rel.sourceId).toContain('b.Inner'); + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + }); +}); + +// --------------------------------------------------------------------------- +// F3 (#1992 follow-up) — same-tail generic impls that ALSO share a method name +// must materialize DISTINCT method (Function) nodes. +// +// `${className}.${methodName}` keys the method node id (Rust `fn`s carry the +// `Function` label). Before this fix the bare inherent-impl arm set `className` to +// the bare tail (`Inner`), so two same-tail generic impls under sibling mods that +// each define `fn m` both keyed `Function:…:Inner.m#0` and collapsed onto ONE node +// (graph addNode is first-write-wins) — the second `m` was silently dropped and +// both HAS_METHOD edges targeted the survivor. The owner `classId` was already +// mod-qualified, so HAS_METHOD *sources* stayed distinct, which masked the +// collision (sourceId-only assertions passed). Qualifying `className` +// (`a.Inner` / `b.Inner`) keys `a.Inner.m` / `b.Inner.m`, so both nodes survive +// with distinct ids. Structure-phase, so it holds on both resolver legs and the +// worker path. +// --------------------------------------------------------------------------- + +describe('Rust same-tail generic impls with shared method name — distinct nodes (issue #1992)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'rust-generic-impl-same-method-name'), + () => {}, + ); + }, 60000); + + it('materializes two distinct `m` method nodes (no first-write-wins collapse)', () => { + // Pre-fix: only one `m` Function node survives (the second is dropped on the + // colliding id) — length is 1, so toBe(2) fails on the pre-fix base. + const methods = getNodesByLabel(result, 'Function').filter((n) => n === 'm'); + expect(methods.length).toBe(2); + }); + + it('owns each `m` through its own mod-qualified Impl node (distinct source AND target)', () => { + const hm = getRelationships(result, 'HAS_METHOD').filter((e) => e.target === 'm'); + expect(hm.length).toBe(2); + // Owner edges were always distinct (classId is mod-qualified)… + expect(hm[0].rel.sourceId).not.toBe(hm[1].rel.sourceId); + const sources = [hm[0].rel.sourceId, hm[1].rel.sourceId].sort(); + expect(sources[0]).toContain('a.Inner'); + expect(sources[1]).toContain('b.Inner'); + // …but the TARGET node collapsed pre-fix — this is the F3 assertion. + expect(hm[0].rel.targetId).not.toBe(hm[1].rel.targetId); + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + }); +}); + +// Same fixture forced through the WORKER pool — the impl owner walk + node-id +// keying is shared structure-phase logic, so the distinct-node guarantee must hold +// on the worker path too (parse-worker.ts mirrors parsing-processor.ts). +describe('Rust same-tail generic impls with shared method name — worker path parity (issue #1992)', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo( + path.join(FIXTURES, 'rust-generic-impl-same-method-name'), + () => {}, + { workerThresholdsForTest: { minFiles: 1, minBytes: 1 }, workerPoolSize: 2 }, + ); + }, 120000); + + it('genuinely used the worker pool', () => { + expect(result.usedWorkerPool).toBe(true); + }); + + it('materializes two distinct `m` method nodes on the worker path', () => { + const methods = getNodesByLabel(result, 'Function').filter((n) => n === 'm'); + expect(methods.length).toBe(2); + const hm = getRelationships(result, 'HAS_METHOD').filter((e) => e.target === 'm'); + expect(hm.length).toBe(2); + expect(hm[0].rel.sourceId).not.toBe(hm[1].rel.sourceId); + expect(hm[0].rel.targetId).not.toBe(hm[1].rel.targetId); + expect(findDanglingEdges(result, ['HAS_METHOD'])).toEqual([]); + }); +}); + // --------------------------------------------------------------------------- // F71 — union declarations resolve as Struct nodes (issue #1934) // diff --git a/gitnexus/test/unit/ingestion/heritage-marker.test.ts b/gitnexus/test/unit/ingestion/heritage-marker.test.ts new file mode 100644 index 0000000000..57d0a25d84 --- /dev/null +++ b/gitnexus/test/unit/ingestion/heritage-marker.test.ts @@ -0,0 +1,51 @@ +import { describe, it, expect } from 'vitest'; +import { + encodeMarker, + decodeMarker, + isHeritageMarker, + HERITAGE_MARKER_PREFIX, + PROPERTY_MARKER_PREFIX, +} from '../../../src/core/ingestion/utils/heritage-marker.js'; + +describe('heritage-marker codec (#1994)', () => { + it('encodes the exact Ruby/Dart wire format (byte-identical to the hand-rolled markers)', () => { + expect(encodeMarker('heritage', ['include', 'Loggable', 'App.S'])).toBe( + '__heritage__:include:Loggable:App.S', + ); + expect(encodeMarker('property', ['attr_accessor', 'radius', 'Shapes.Circle'])).toBe( + '__property__:attr_accessor:radius:Shapes.Circle', + ); + expect(HERITAGE_MARKER_PREFIX).toBe('__heritage__:'); + expect(PROPERTY_MARKER_PREFIX).toBe('__property__:'); + }); + + it('round-trips encode → decode for both kinds', () => { + const heritage = encodeMarker('heritage', ['with', 'Logger', 'Service']); + expect(decodeMarker(heritage)).toEqual({ + kind: 'heritage', + fields: ['with', 'Logger', 'Service'], + }); + const property = encodeMarker('property', ['attr_reader', 'name', 'User']); + expect(decodeMarker(property)).toEqual({ + kind: 'property', + fields: ['attr_reader', 'name', 'User'], + }); + }); + + it('throws on a colon-bearing field (the wire format reserves ":" as the delimiter)', () => { + expect(() => encodeMarker('heritage', ['include', 'Outer::Mixin', 'User'])).toThrow(/':'/); + }); + + it('decodeMarker returns null for non-markers', () => { + expect(decodeMarker('./relative/path')).toBeNull(); + expect(decodeMarker('package:foo/bar.dart')).toBeNull(); + expect(decodeMarker('')).toBeNull(); + }); + + it('isHeritageMarker matches exactly the prior startsWith pair', () => { + expect(isHeritageMarker('__heritage__:include:M:C')).toBe(true); + expect(isHeritageMarker('__property__:attr:p:C')).toBe(true); + expect(isHeritageMarker('Serializable')).toBe(false); + expect(isHeritageMarker('dart:core')).toBe(false); + }); +});