Skip to content
Merged
52 changes: 50 additions & 2 deletions gitnexus-shared/src/scope-resolution/finalize-algorithm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -740,10 +740,58 @@ function findExportByName(
defs: readonly SymbolDefinition[],
name: string,
): SymbolDefinition | undefined {
// GENERIC RULE (applies to every language using this finalize
// algorithm): when MULTIPLE `SymbolDefinition`s share the same simple
// name in `localDefs`, prefer callable / type-like defs over plain
// value defs (`Variable`, `Property`, …). The CALLER side of an
// import almost always wants the callable, not a value shadow that
// happens to share the name — and without a deterministic
// preference, capture order silently decides which def the import
// binds to.
//
// The single-def case is unchanged: when only one def has the name,
// it's returned regardless of its type (the `fallback` path below).
//
// TypeScript is the first known language where this matters in
// practice: `const fn = () => {}` emits BOTH a `Function` def (from
// `@declaration.function` on the inner arrow) AND a `Variable` def
// (from the generic `@declaration.variable` pattern matching the
// wrapping `lexical_declaration`), and consumers of `import { fn }`
// need to bind to the callable. Other migrated languages don't
// currently produce dual emits of this shape, so the rule is a no-op
// for them today; future languages get the same correctness
// guarantee for free if they ever do.
//
// See `gitnexus/test/integration/resolvers/typescript-hof-callbacks.test.ts`
// for the cross-file regression this rule prevents.
let fallback: SymbolDefinition | undefined;
for (const d of defs) {
if (deriveSimpleName(d) === name) return d;
if (deriveSimpleName(d) !== name) continue;
if (isCallableOrTypeLike(d.type)) return d;
if (fallback === undefined) fallback = d;
}
return undefined;
return fallback;
}

const CALLABLE_OR_TYPE_LIKE: ReadonlySet<string> = new Set([
'Function',
'Method',
'Constructor',
'Class',
'Interface',
'Enum',
'Struct',
'Record',
'Trait',
'Namespace',
'Module',
'TypeAlias',
'Type',
'Typedef',
]);

function isCallableOrTypeLike(type: string): boolean {
return CALLABLE_OR_TYPE_LIKE.has(type);
}

function countEdgesWithin(edgeIndex: Map<string, ImportEdgeDraft[]>, files: Set<string>): number {
Expand Down
58 changes: 47 additions & 11 deletions gitnexus/src/core/ingestion/languages/typescript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,28 +57,64 @@ import {
} from './typescript/index.js';

/**
* TypeScript/JavaScript: arrow_function and function_expression get their name
* from the parent variable_declarator (e.g. `const foo = () => {}`).
* TypeScript/JavaScript: arrow_function and function_expression are
* anonymous AST nodes — they take their name from the surrounding
* declarative context.
*
* Recognised contexts:
* - `const foo = () => {}` (variable_declarator) → "foo"
* - `{ addItem: (item) => ... }` (pair / property_assignment) → "addItem"
* Covers Zustand stores, TanStack Query factories, React Context
* providers, and most other HOF-heavy idioms (issue #1166).
*
* Returns `null` for funcName when the arrow lives in a context that has
* no static name — call arguments, computed keys, return-from-arrow
* positions. The parent walk in findEnclosingFunctionId then continues
* up to the next named ancestor (or to the file).
*/
const tsExtractFunctionName = (
node: SyntaxNode,
): { funcName: string | null; label: NodeLabel } | null => {
if (node.type !== 'arrow_function' && node.type !== 'function_expression') return null;

const parent = node.parent;
if (parent?.type !== 'variable_declarator') return null;
if (!parent) return null;

let nameNode = parent.childForFieldName?.('name');
if (!nameNode) {
for (let i = 0; i < parent.childCount; i++) {
const c = parent.child(i);
if (c?.type === 'identifier') {
nameNode = c;
break;
if (parent.type === 'variable_declarator') {
let nameNode = parent.childForFieldName?.('name');
if (!nameNode) {
for (let i = 0; i < parent.childCount; i++) {
const c = parent.child(i);
if (c?.type === 'identifier') {
nameNode = c;
break;
}
}
}
return { funcName: nameNode?.text ?? null, label: 'Function' };
}
return { funcName: nameNode?.text ?? null, label: 'Function' };

// Object property pair: `{ addItem: (item) => ... }`.
// tree-sitter-typescript uses `pair`; tree-sitter-javascript also exposes
// `pair`. (Older grammars used `property_assignment`; we accept both.)
if (parent.type === 'pair' || parent.type === 'property_assignment') {
const keyNode = parent.childForFieldName?.('key');
if (!keyNode) return { funcName: null, label: 'Function' };
if (keyNode.type === 'property_identifier' || keyNode.type === 'identifier') {
return { funcName: keyNode.text, label: 'Function' };
}
if (keyNode.type === 'string') {
// `"add-item": () => ...` — the literal text inside the quotes.
const fragment = keyNode.children?.find((c: SyntaxNode) => c.type === 'string_fragment');
const text = fragment?.text ?? null;
return { funcName: text, label: 'Function' };
}
// computed_property_name (`[ACTION_KEY]`) and other dynamic keys have
// no static name — fall through anonymous.
return { funcName: null, label: 'Function' };
}

return { funcName: null, label: 'Function' };
};

export const BUILT_INS: ReadonlySet<string> = new Set([
Expand Down
22 changes: 22 additions & 0 deletions gitnexus/src/core/ingestion/languages/typescript/captures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ function pickFirstDefined(grouped: CaptureMatch, tags: readonly string[]): Captu
* as `@reference.write.member`).
* 4. The member_expression is the `function:` of an `await_expression`
* being called (handled by the member-call capture).
* 5. The member_expression is the `name:` of a `jsx_self_closing_element`
* or `jsx_opening_element` (it's a JSX component invocation, already
* captured as `@reference.call.member` by the TSX-only query suffix).
* Without this filter, `<Foo.Bar />` would emit a phantom ACCESSES
* edge to `Foo.Bar` IN ADDITION to the CALLS edge.
*
* Returns `true` when the capture should be kept as a read reference,
* `false` when it should be dropped.
Expand All @@ -99,6 +104,9 @@ function shouldEmitReadMember(memberNode: SyntaxNode): boolean {
case 'assignment_expression':
case 'augmented_assignment_expression':
return parent.childForFieldName('left')?.id !== memberNode.id;
case 'jsx_self_closing_element':
case 'jsx_opening_element':
return parent.childForFieldName('name')?.id !== memberNode.id;
default:
return true;
}
Expand Down Expand Up @@ -232,6 +240,20 @@ export function emitTsScopeCaptures(
// arity filter can narrow overloads. Count the `argument` named
// children of the backing `arguments` node. TypeScript constructor
// calls use `new_expression`; regular calls use `call_expression`.
//
// JSX call anchors (`jsx_self_closing_element` / `jsx_opening_element`
// captured by the TSX-only suffix in `query.ts`) intentionally do
// NOT carry arity metadata. The lookup below would resolve `callNode`
// to `null` for a JSX anchor (the anchor is neither a call_expression
// nor a new_expression), so the synthesis branch silently no-ops and
// the JSX call enters the registry with name-only resolution. This
// is acceptable for React: components are virtually never
// overloaded in the current GitNexus graph model, so name-only
// dispatch matches the single component definition. If a future
// codebase introduces overloaded React components AND needs JSX
// calls to disambiguate by props-arity, a JSX-aware arity
// synthesizer would need to count `jsx_attribute` children of the
// opening tag instead of `arguments`.
const callAnchor = pickFirstDefined(grouped, CALL_TAGS);
if (callAnchor !== undefined && grouped['@reference.arity'] === undefined) {
const callNode =
Expand Down
122 changes: 117 additions & 5 deletions gitnexus/src/core/ingestion/languages/typescript/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,25 +136,83 @@ const TYPESCRIPT_SCOPE_QUERY = `
;; Arrow/function-expression assigned to a const/let/var — named by the
;; variable_declarator. Covers \`const fn = () => {}\` and its export
;; variant. Matches the legacy TYPESCRIPT_QUERIES pattern.
;;
;; The \`@declaration.function\` anchor sits on the INNER arrow_function /
;; function_expression node (NOT the wrapping lexical_declaration), so
;; \`anchor.range\` aligns with the corresponding \`@scope.function\` scope
;; range. \`pass2AttachDeclarations\` then resolves \`innermost\` to the
;; arrow's own scope (instead of the module scope) and the def is owned
;; by the arrow itself. Without this alignment, calls inside the arrow
;; body lose caller attribution: \`resolveCallerGraphId\` walks up past
;; the empty arrow scope into the module scope and grabs whichever
;; Function-like def appears first there — silently mis-attributing
;; every nested call (Zustand stores, TanStack hooks, Promise-all/map,
;; etc.). See \`typescript-hof-callbacks.test.ts\`.
(lexical_declaration
(variable_declarator
name: (identifier) @declaration.name
value: (arrow_function))) @declaration.function
value: (arrow_function) @declaration.function))

(lexical_declaration
(variable_declarator
name: (identifier) @declaration.name
value: (function_expression))) @declaration.function
value: (function_expression) @declaration.function))

(variable_declaration
(variable_declarator
name: (identifier) @declaration.name
value: (arrow_function))) @declaration.function
value: (arrow_function) @declaration.function))

(variable_declaration
(variable_declarator
name: (identifier) @declaration.name
value: (function_expression))) @declaration.function
value: (function_expression) @declaration.function))

;; Object-property arrows / function expressions named by their pair key:
;; \`{ addItem: (item) => ..., removeItem: (item) => ... }\`. The legacy
;; TYPESCRIPT_QUERIES emits the same shape; mirroring it here keeps
;; scope-resolution declarations in sync (issue #1166). Computed keys
;; (\`[K]: () => ...\`) intentionally fall through anonymous.
;;
;; Same anchor discipline as the \`lexical_declaration\` block above: the
;; \`@declaration.function\` capture must sit on the INNER \`arrow_function\`
;; / \`function_expression\` node — NOT the outer \`pair\`. The pair node
;; starts at the property-key token, BEFORE the arrow's
;; \`@scope.function\` range. \`pass2AttachDeclarations.atPosition(pair.startLine,
;; pair.startCol)\` therefore resolves to the PARENT scope (the enclosing
;; function-like, e.g. the \`(set) => ({...})\` callback in
;; \`persist((set) => ({...}))\`), not the inner arrow's own scope.
;;
;; With the anchor on \`pair\`, ALL pair-function defs from the same object
;; literal land in the same parent scope's \`ownedDefs\`. \`resolveCallerGraphId\`
;; walking up from a call inside any of those arrows then matches the
;; FIRST Function-like def via \`ownedDefs.find()\` — silently mis-attributing
;; every call to the first sibling. Multi-action Zustand stores
;; (\`{ addItem, removeItem, fetchData, … }\`) — the dominant 0%-capture
;; pattern in the bug report — would land all calls on \`addItem\`.
;;
;; With the anchor on the inner \`arrow_function\` / \`function_expression\`,
;; \`anchor.range\` matches the arrow's own \`@scope.function\` range; the
;; def lands in the arrow scope's own \`ownedDefs\` and \`pass2AttachDeclarations\`'s
;; auto-hoist (\`rangesEqual(anchor.range, innermost.range)\`) promotes
;; the BINDING to the parent scope (so importers and lookups still find
;; the name in the object's surrounding scope). Each pair-arrow becomes
;; an independent caller anchor in the walk.
(pair
key: (property_identifier) @declaration.name
value: (arrow_function) @declaration.function)

(pair
key: (property_identifier) @declaration.name
value: (function_expression) @declaration.function)

(pair
key: (string (string_fragment) @declaration.name)
value: (arrow_function) @declaration.function)

(pair
key: (string (string_fragment) @declaration.name)
value: (function_expression) @declaration.function)

;; Method definitions — regular + private (#field) methods.
(method_definition
Expand Down Expand Up @@ -723,6 +781,53 @@ const TYPESCRIPT_SCOPE_QUERY = `
property: (property_identifier) @reference.name) @reference.read.member
`;

/**
* JSX-only query suffix. Appended to the base query when compiling
* against the TSX grammar; NOT compiled against the plain TS grammar
* (which has no \`jsx_*\` node types and would reject these patterns).
*
* Why JSX as a CALLS edge: \`<Foo />\` is syntactic sugar for \`Foo(props)\`
* and the React component is invoked by the renderer, so for blast-radius
* (\`gitnexus_impact("Badge", direction: "upstream")\`) and call-graph
* (\`gitnexus_context("Foo")\`) purposes JSX usage IS a call. Routing
* through \`@reference.call.free\` / \`@reference.call.member\` makes the
* downstream caller-walk + edge-emission paths handle JSX uniformly with
* ordinary call expressions — no new edge type, no schema changes.
*
* Identifier-only JSX is filtered to PascalCase via \`(#match? ... "^[A-Z]")\`
* so \`<div>\`, \`<span>\`, \`<button>\` and other native HTML elements (which
* by JSX convention start lowercase) don't emit edges to nonexistent
* "div" / "span" symbols. Member-form JSX (\`<Foo.Bar />\`) is always a
* component (HTML element names can't contain dots), so no predicate
* filter is applied there.
*
* Both \`jsx_self_closing_element\` (\`<Foo />\`) and \`jsx_opening_element\`
* (\`<Foo>...</Foo>\`) emit; the closing tag is intentionally NOT captured —
* each JSX element should emit exactly one CALLS edge per use site.
*/
const TSX_JSX_QUERY_SUFFIX = `
;; <Foo />
((jsx_self_closing_element
name: (identifier) @reference.name) @reference.call.free
(#match? @reference.name "^[A-Z]"))

;; <Foo> ... </Foo> (paired form — match the opening tag only)
((jsx_opening_element
name: (identifier) @reference.name) @reference.call.free
(#match? @reference.name "^[A-Z]"))

;; <Foo.Bar /> / <Container.Section.Title /> — namespaced JSX
(jsx_self_closing_element
name: (member_expression
object: (_) @reference.receiver
property: (property_identifier) @reference.name)) @reference.call.member

(jsx_opening_element
name: (member_expression
object: (_) @reference.receiver
property: (property_identifier) @reference.name)) @reference.call.member
`;

let _tsParser: Parser | null = null;
let _tsxParser: Parser | null = null;
let _tsQuery: Parser.Query | null = null;
Expand Down Expand Up @@ -753,11 +858,18 @@ export function getTsParser(filePath?: string): Parser {
* executed against a Tree produced by the `tsx` grammar — tree-sitter
* matches by node-type id, and the two grammars have separate id
* spaces.
*
* The TSX query is compiled with the JSX-as-call patterns appended.
* Those patterns reference `jsx_self_closing_element` /
* `jsx_opening_element` which exist only in the TSX grammar — embedding
* them in the plain TS query would throw `Query.InvalidNodeType` at
* compile time (and even if it didn't, the patterns would never fire on
* `.ts` source).
*/
export function getTsScopeQuery(filePath?: string): Parser.Query {
if (filePath !== undefined && isTsxFile(filePath)) {
if (_tsxQuery === null) {
_tsxQuery = new Parser.Query(TSX_GRAMMAR, TYPESCRIPT_SCOPE_QUERY);
_tsxQuery = new Parser.Query(TSX_GRAMMAR, TYPESCRIPT_SCOPE_QUERY + TSX_JSX_QUERY_SUFFIX);
}
return _tsxQuery;
}
Expand Down
Loading
Loading