diff --git a/gitnexus/src/core/ingestion/languages/typescript.ts b/gitnexus/src/core/ingestion/languages/typescript.ts index e2ec9d23f1..f1c7613422 100644 --- a/gitnexus/src/core/ingestion/languages/typescript.ts +++ b/gitnexus/src/core/ingestion/languages/typescript.ts @@ -66,11 +66,21 @@ import { * - `{ addItem: (item) => ... }` (pair / property_assignment) → "addItem" * Covers Zustand stores, TanStack Query factories, React Context * providers, and most other HOF-heavy idioms (issue #1166). + * - `const X = HOC((args) => { ... })` (arguments → call_expression → + * variable_declarator) → "X". Covers `React.forwardRef`, `memo`, + * `useCallback`, `useMemo`, `observer`, `debounce`, and other HOC + * factories that wrap their behaviour-defining arrow. Without this + * branch, every shadcn/Radix UI component (`const Button = + * React.forwardRef(...)`) registered as an anonymous arrow with + * calls inside falling back to File-level attribution. The same + * applied to all `useCallback` / `useMemo` callbacks bound to a + * const — the sole way to give them a named caller anchor. * * Returns `null` for funcName when the arrow lives in a context that has - * no static name — call arguments, computed keys, return-from-arrow - * positions. The parent walk in findEnclosingFunctionId then continues - * up to the next named ancestor (or to the file). + * no static name — bare call arguments (not bound to a const), computed + * keys, return-from-arrow positions. The parent walk in + * findEnclosingFunctionId then continues up to the next named ancestor + * (or to the file). */ const tsExtractFunctionName = ( node: SyntaxNode, @@ -114,6 +124,37 @@ const tsExtractFunctionName = ( return { funcName: null, label: 'Function' }; } + // HOC-wrapped variable declarations: `const Button = forwardRef((p, r) => { ... })`, + // `const handleClick = useCallback(() => doStuff(), [deps])`, + // `const Card = React.memo((props) => { ... })`. The arrow's `parent` is + // `arguments`, grandparent is `call_expression`, great-grandparent is + // `variable_declarator`. Walk the chain up and take the variable's name + // — the meaningful identifier the developer wrote on the LHS. Mirrors + // the four registry-primary patterns in `typescript/query.ts`. The + // wrapping callee (`forwardRef`, `memo`, `React.memo`, `useCallback`, + // user-defined HOCs) is intentionally NOT constrained: any function + // call whose result is bound to a const and whose first/positional + // argument is an arrow takes the const's name. Chained array-method + // calls (`const x = arr.find((y) => p(y))`) match too and produce a + // mostly-harmless `Function:x` (consumed as a value, never invoked), + // accepted as a small false-positive cost vs. the much larger gain of + // capturing the React UI-component idiom. + if (parent.type === 'arguments') { + const callExpr = parent.parent; + if (!callExpr || callExpr.type !== 'call_expression') { + return { funcName: null, label: 'Function' }; + } + const declarator = callExpr.parent; + if (!declarator || declarator.type !== 'variable_declarator') { + return { funcName: null, label: 'Function' }; + } + const nameNode = declarator.childForFieldName?.('name'); + if (nameNode?.type === 'identifier') { + return { funcName: nameNode.text, label: 'Function' }; + } + return { funcName: null, label: 'Function' }; + } + return { funcName: null, label: 'Function' }; }; diff --git a/gitnexus/src/core/ingestion/languages/typescript/query.ts b/gitnexus/src/core/ingestion/languages/typescript/query.ts index 07d02f507b..9e0d9b8094 100644 --- a/gitnexus/src/core/ingestion/languages/typescript/query.ts +++ b/gitnexus/src/core/ingestion/languages/typescript/query.ts @@ -214,6 +214,95 @@ const TYPESCRIPT_SCOPE_QUERY = ` key: (string (string_fragment) @declaration.name) value: (function_expression) @declaration.function) +;; HOC-wrapped variable declarations: \`const X = HOC((args) => { ... })\`. +;; +;; Covers the dominant React UI idiom (\`React.forwardRef\`, \`React.memo\`, +;; bare \`forwardRef\` / \`memo\` / \`observer\`), Hook callbacks +;; (\`useCallback\`, \`useMemo\`), and library-wrapper factories (\`debounce\`, +;; \`throttle\`, user-defined \`withErrorBoundary\` / \`createHook\`, etc.). +;; All produce the same AST shape: +;; +;; lexical_declaration +;; variable_declarator +;; name: identifier "X" ← we want this name +;; value: call_expression +;; function: identifier | member_expression ← any callee +;; arguments: arguments +;; arrow_function | function_expression ← the actual code +;; +;; The pre-fix \`tsExtractFunctionName\` only handled \`variable_declarator\` +;; and \`pair\` parents, so HOC-wrapped arrows fell through anonymous. The +;; registry-primary \`query.ts\` had no pattern for this shape either — +;; \`const Button = forwardRef((p, r) => { ... })\` registered as a +;; \`Variable\` with no \`Function\` def, and every call inside the arrow +;; body lost caller attribution: \`resolveCallerGraphId\` walked up past +;; the empty arrow scope to the module's File fallback. Sourcerer-fe alone +;; has ~296 such declarations (57 forwardRef + 21 memo + 161 useCallback +;; + 57 useMemo) — all invisible to \`gitnexus_context\` / +;; \`gitnexus_impact\` for outgoing edges before this fix. +;; +;; Anchor discipline: same as the \`lexical_declaration\` / \`pair\` blocks +;; above — on the INNER \`arrow_function\` / \`function_expression\`, NOT +;; the outer \`call_expression\`. The arrow's range matches its own +;; \`@scope.function\` range, so \`pass2AttachDeclarations.atPosition\` +;; resolves \`innermost\` to the arrow's own scope and +;; \`rangesEqual(anchor.range, innermost.range)\` triggers the auto-hoist +;; that promotes the binding to the parent scope (where \`const X\` +;; lives). +;; +;; Trade-off — chained array-method form: \`const x = arr.find((y) => p(y))\` +;; has the same syntactic shape and would also match, naming the +;; \`.find\` callback as \`x\`. The resulting \`Function:x\` is mostly +;; harmless: \`x\` is consumed as a value (\`if (x) { ... }\`), never +;; invoked as a function, so it gets zero incoming \`CALLS\` edges. The +;; one outgoing edge \`Function:x → p\` is a minor mis-attribution that +;; could in principle be fixed by adding a \`function: [(identifier) +;; (member_expression)]\` predicate that excludes property-identifiers +;; matching a known array-method blocklist (\`map\` / \`filter\` / \`find\` +;; / \`reduce\` / \`forEach\` / \`some\` / \`every\`). We don't do that here +;; because (a) the false-positive cost is negligible, (b) the blocklist +;; would need maintenance, and (c) any user-defined fluent-API method +;; with a callback argument would still false-positive — there's no +;; clean syntactic line. +;; +;; Trade-off — multi-arrow arguments: \`const x = call(arrow1, arrow2)\` +;; would emit TWO matches with the same name \`x\`. tree-sitter-query +;; iterates all arrow_function direct children of \`arguments\`, so each +;; emits its own \`(name=x, function=...)\` pair. \`pass2AttachDeclarations\` +;; pushes both \`Function:x\` defs into the same arrow scopes (each in +;; its own arrow's \`ownedDefs\`) and hoists both bindings to the parent. +;; The downstream registry's qualified-name dedup then collapses them +;; via \`(filePath, type, qualifiedName)\` — second wins. Acceptable; +;; multi-arrow-callback APIs are rare (\`new Promise(executor)\` is the +;; main one and takes a single executor). +(lexical_declaration + (variable_declarator + name: (identifier) @declaration.name + value: (call_expression + arguments: (arguments + (arrow_function) @declaration.function)))) + +(lexical_declaration + (variable_declarator + name: (identifier) @declaration.name + value: (call_expression + arguments: (arguments + (function_expression) @declaration.function)))) + +(variable_declaration + (variable_declarator + name: (identifier) @declaration.name + value: (call_expression + arguments: (arguments + (arrow_function) @declaration.function)))) + +(variable_declaration + (variable_declarator + name: (identifier) @declaration.name + value: (call_expression + arguments: (arguments + (function_expression) @declaration.function)))) + ;; Method definitions — regular + private (#field) methods. (method_definition name: (property_identifier) @declaration.name) @declaration.method diff --git a/gitnexus/src/core/ingestion/tree-sitter-queries.ts b/gitnexus/src/core/ingestion/tree-sitter-queries.ts index 61030ff663..8e165a837d 100644 --- a/gitnexus/src/core/ingestion/tree-sitter-queries.ts +++ b/gitnexus/src/core/ingestion/tree-sitter-queries.ts @@ -84,6 +84,63 @@ export const TYPESCRIPT_QUERIES = ` key: (string (string_fragment) @name) value: (function_expression)) @definition.function +; HOC-wrapped variable declarations: \`const X = HOC((args) => { ... })\`. +; Mirrors the registry-primary patterns in \`languages/typescript/query.ts\` +; so the legacy Call-Resolution DAG and the registry-primary pipeline +; produce the same set of \`Function\` nodes — required for the CI parity +; gate. Covers React.forwardRef / memo / useCallback / useMemo / observer +; / debounce / user-defined HOC factories. The \`var X = HOC(...)\` form is +; mirrored too (registry-primary has it) so that codebases mixing \`var\` and +; \`const\` see identical attribution on both pipelines. See +; \`tsExtractFunctionName\` for the resolution logic and the \`query.ts\` +; comment for the full anchor-discipline rationale and the chained- +; array-method trade-off. +(lexical_declaration + (variable_declarator + name: (identifier) @name + value: (call_expression + arguments: (arguments + (arrow_function))))) @definition.function + +(lexical_declaration + (variable_declarator + name: (identifier) @name + value: (call_expression + arguments: (arguments + (function_expression))))) @definition.function + +(export_statement + declaration: (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (call_expression + arguments: (arguments + (arrow_function)))))) @definition.function + +(export_statement + declaration: (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (call_expression + arguments: (arguments + (function_expression)))))) @definition.function + +; \`var X = HOC(...)\` parity with registry-primary. Legacy code (and any +; transpiler output that downlevels \`const\` to \`var\`) hits this shape. +(variable_declaration + (variable_declarator + name: (identifier) @name + value: (call_expression + arguments: (arguments + (arrow_function))))) @definition.function + +(variable_declaration + (variable_declarator + name: (identifier) @name + value: (call_expression + arguments: (arguments + (function_expression))))) @definition.function + ; Variable/constant declarations (non-function values). ; Overlap with @definition.function patterns is handled by parse-worker dedup. (lexical_declaration @@ -260,6 +317,57 @@ export const JAVASCRIPT_QUERIES = ` key: (string (string_fragment) @name) value: (function_expression)) @definition.function +; HOC-wrapped variable declarations: \`const X = HOC((args) => { ... })\`. +; See TYPESCRIPT_QUERIES section above for the full rationale (issue #1166 +; follow-up — covers forwardRef / memo / useCallback / useMemo / observer +; / debounce / user-defined HOC factories). Both \`const\` and \`var\` forms +; are mirrored so JS code that uses \`var\` (or transpiler output) gets the +; same attribution as the registry-primary path. +(lexical_declaration + (variable_declarator + name: (identifier) @name + value: (call_expression + arguments: (arguments + (arrow_function))))) @definition.function + +(lexical_declaration + (variable_declarator + name: (identifier) @name + value: (call_expression + arguments: (arguments + (function_expression))))) @definition.function + +(export_statement + declaration: (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (call_expression + arguments: (arguments + (arrow_function)))))) @definition.function + +(export_statement + declaration: (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (call_expression + arguments: (arguments + (function_expression)))))) @definition.function + +; \`var X = HOC(...)\` parity with registry-primary. +(variable_declaration + (variable_declarator + name: (identifier) @name + value: (call_expression + arguments: (arguments + (arrow_function))))) @definition.function + +(variable_declaration + (variable_declarator + name: (identifier) @name + value: (call_expression + arguments: (arguments + (function_expression))))) @definition.function + ; Variable/constant declarations (non-function values). ; Overlap with @definition.function patterns is handled by parse-worker dedup. (lexical_declaration diff --git a/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/debounce.ts b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/debounce.ts new file mode 100644 index 0000000000..e4eda5bd91 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/debounce.ts @@ -0,0 +1,11 @@ +// Library-wrapper / utility-HOC form: `debounce`, `throttle`, `once`, +// `memoize` — all share the same shape `const X = wrap(arrow)` and should +// produce a `Function:X` def named after the const. + +import { doStuff } from './helpers'; + +const debounce = unknown>(fn: F, _ms: number): F => fn; + +export const debouncedSearch = debounce((query: string) => { + doStuff(query.length); +}, 250); diff --git a/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/forward-ref.tsx b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/forward-ref.tsx new file mode 100644 index 0000000000..9e7fa84154 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/forward-ref.tsx @@ -0,0 +1,29 @@ +// shadcn/Radix UI canonical pattern: every primitive component is wrapped +// in `React.forwardRef` so callers can attach a ref. The arrow inside is +// where the actual rendering logic lives — every call inside its body +// (cn(), helper(), JSX renders) should attribute to `Button`, not File. +// +// Pre-fix: `Button` was a Variable; calls inside attributed to File. +// Post-fix: `Button` is a Function; calls attribute to `Button`. + +import { helper, cn } from './helpers'; + +// Stand-in for React.forwardRef — defined locally so the outer call_expression +// is in-fixture and we don't need to mock the React types. Same shape as +// the real React.forwardRef. +const React = { + forwardRef: (render: (props: P, ref: T | null) => unknown) => render, +}; + +interface ButtonProps { + className?: string; + variant?: 'default' | 'ghost'; +} + +export const Button = React.forwardRef( + ({ className, variant }, _ref) => { + const cls = cn('btn', variant ?? 'default', className ?? ''); + helper(cls); + return null; + }, +); diff --git a/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/helpers.ts b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/helpers.ts new file mode 100644 index 0000000000..7e0ce56998 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/helpers.ts @@ -0,0 +1,11 @@ +// Shared helpers used as call targets in HOC-wrapped fixture files. Each +// helper is a plain named arrow so we can assert exact `Caller → helper` +// edges without confounding cross-file resolution. + +export const helper = (label: string): string => label.toUpperCase(); + +export const doStuff = (n: number): number => n + 1; + +export const cn = (...classes: string[]): string => classes.filter(Boolean).join(' '); + +export const fmt = (value: number): string => `[${value}]`; diff --git a/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/memo.tsx b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/memo.tsx new file mode 100644 index 0000000000..ecb50f46fe --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/memo.tsx @@ -0,0 +1,19 @@ +// Bare-identifier HOC form: `const Card = memo((props) => { ... })`. +// Common when the HOC is named-imported (`import { memo } from 'react'`) +// rather than accessed via a namespace (`React.memo`). Both should work. + +import { helper, cn } from './helpers'; + +const memo = (render: (props: P) => unknown) => render; + +interface CardProps { + title: string; + className?: string; +} + +export const Card = memo(({ title, className }) => { + const cls = cn('card', className ?? ''); + helper(title); + helper(cls); + return null; +}); diff --git a/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/negative-bare-call.ts b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/negative-bare-call.ts new file mode 100644 index 0000000000..18eee8360b --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/negative-bare-call.ts @@ -0,0 +1,30 @@ +// Negative-control: bare statement-level HOC calls (NOT bound to a +// `const`/`let`/`var`) must NOT produce phantom Function nodes. +// +// This exercises the `parent.type === 'arguments'` branch in +// `tsExtractFunctionName`: the walk-up `arguments → call_expression → +// (program | expression_statement)` short-circuits because the parent +// of `call_expression` is NOT `variable_declarator`. The arrow stays +// anonymous and calls inside fall back to the enclosing module scope. + +import { doStuff } from './helpers'; + +const useCallback = unknown>(fn: F, _deps: unknown[]): F => fn; +const memo = (render: (props: P) => unknown) => render; + +// Statement-level: result is discarded. +useCallback(() => { + doStuff(1); +}, []); + +memo<{ x: number }>(({ x }) => { + doStuff(x); +}); + +// Function-arg position (passed to another call): also unbound. +const wrap = (value: T): T => value; +wrap( + memo<{ y: number }>(({ y }) => { + doStuff(y); + }), +); diff --git a/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/nested.tsx b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/nested.tsx new file mode 100644 index 0000000000..75720217b9 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/nested.tsx @@ -0,0 +1,60 @@ +// Nested HOCs: `const X = memo(forwardRef((p, r) => { ... }))`. +// +// Tree-sitter shape: +// variable_declarator +// value: call_expression (memo(...)) +// arguments: arguments +// call_expression (forwardRef(arrow)) ← FIRST positional arg +// arguments: arguments +// arrow_function ← deepest behaviour-arrow +// +// Our outermost lexical_declaration → call_expression → arguments → +// arrow_function pattern requires the arrow to be a direct child of the +// outermost call's `arguments` — which it is NOT here (it's two levels +// deeper). So the OUTER pattern misses. But the same pattern, anchored +// on the INNER call (`forwardRef(arrow)`), wouldn't match either: the +// inner call_expression is a child of `arguments`, not of a +// `variable_declarator`. +// +// So the question is: which Function name does `arrow_function`'s +// `tsExtractFunctionName` resolve to in the legacy DAG? +// +// arrow.parent = arguments (forwardRef's args) +// arguments.parent = call_expression (forwardRef(...)) +// call_expr.parent = arguments (memo's args) ← NOT variable_declarator! +// +// The legacy walker requires `call_expression.parent === variable_declarator`, +// so it returns null → arrow stays anonymous. Same for the registry-primary +// query (the lexical_declaration shape doesn't match because the arrow +// isn't a direct grandchild of the outer call). +// +// CURRENT STATE (post-fix): the deepest arrow is anonymous, calls inside +// fall back to the next named ancestor. There IS no named ancestor here +// (the outer `forwardRef` and `memo` calls don't have variable_declarator +// `value:` containing them — they ARE that value). So calls walk up to +// File. The OUTER `Wrapped` const is named `Variable:Wrapped`, not a +// Function — because no Function pattern matches the OUTER shape either +// (the immediate arg of `memo(...)` is a `call_expression`, not an arrow). +// +// This file documents a known limitation: deep-nested HOCs (memo of +// forwardRef of arrow) lose attribution at depth ≥ 2. The test below +// asserts the ABSENCE of phantom edges (we don't make this worse) rather +// than positive resolution. A future fix would require recursing into +// arguments-containing-call_expression on the value side, which has its +// own trade-offs (which level wins the name?). + +import { helper } from './helpers'; + +const memo = (render: (props: P) => unknown) => render; +const forwardRef = (render: (props: P, ref: T | null) => unknown) => render; + +interface DeepProps { + label: string; +} + +export const Wrapped = memo( + forwardRef(({ label }, _ref) => { + helper(label); + return null; + }), +); diff --git a/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/observer.tsx b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/observer.tsx new file mode 100644 index 0000000000..cfdfaf7296 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/observer.tsx @@ -0,0 +1,16 @@ +// MobX `observer` HOC: `const Card = observer((props) => { ... })`. Same +// shape as `memo` but the wrapper is named `observer`. Used heavily in +// MobX-based React codebases. + +import { helper } from './helpers'; + +const observer = (render: (props: P) => unknown) => render; + +interface ItemProps { + label: string; +} + +export const Item = observer(({ label }) => { + helper(label); + return null; +}); diff --git a/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/use-callback.ts b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/use-callback.ts new file mode 100644 index 0000000000..8f690581c2 --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/use-callback.ts @@ -0,0 +1,16 @@ +// React Hook callback bound to a const — `useCallback((...) => ..., [deps])`. +// Calls inside the callback body should attribute to `handleClick` / +// `handleSubmit`, the names the developer wrote on the LHS. + +import { doStuff, fmt } from './helpers'; + +const useCallback = unknown>(fn: F, _deps: unknown[]): F => fn; + +export const handleClick = useCallback(() => { + const n = doStuff(1); + fmt(n); +}, []); + +export const handleSubmit = useCallback((value: number) => { + doStuff(value); +}, []); diff --git a/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/use-memo.ts b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/use-memo.ts new file mode 100644 index 0000000000..ed55ac96da --- /dev/null +++ b/gitnexus/test/fixtures/lang-resolution/typescript-hoc-wrapped/src/use-memo.ts @@ -0,0 +1,12 @@ +// `useMemo((...) => value, [deps])` — same shape as useCallback but the +// arrow returns a value instead of a callable. The shape we care about is +// `const X = useMemo(() => { ... }, [...])` and the test is symmetric: +// calls inside should attribute to `computed`. + +import { doStuff } from './helpers'; + +const useMemo = (factory: () => T, _deps: unknown[]): T => factory(); + +export const computed = useMemo(() => { + return doStuff(42); +}, []); diff --git a/gitnexus/test/integration/resolvers/typescript-hoc-wrapped.test.ts b/gitnexus/test/integration/resolvers/typescript-hoc-wrapped.test.ts new file mode 100644 index 0000000000..e39971f804 --- /dev/null +++ b/gitnexus/test/integration/resolvers/typescript-hoc-wrapped.test.ts @@ -0,0 +1,306 @@ +/** + * TypeScript: CALLS edges from inside HOC-wrapped variable declarations. + * + * Follow-up to issue #1166 / PR #1175. After fixing HOF callbacks (Promise + * fan-out, queryFn pair-arrows, Zustand actions) and JSX-as-call, the + * residual 0%-capture pattern in real React UI codebases (Sourcerer-fe + * audit) was the HOC-wrapped declaration: + * + * const Button = React.forwardRef((props, ref) => { ... }) + * const Card = memo(({ ... }) => { ... }) + * const handleClick = useCallback(() => { ... }, []) + * const computed = useMemo(() => { ... }, []) + * const Item = observer((props) => { ... }) + * const debouncedSearch = debounce((query) => { ... }, 250) + * + * All share the AST shape `lexical_declaration > variable_declarator > + * call_expression > arguments > arrow_function`. Pre-fix, none matched + * any `@declaration.function` pattern (the registry-primary `query.ts` + * only knew about `variable_declarator > arrow_function` directly), and + * the legacy DAG's `tsExtractFunctionName` only walked `pair` / + * `variable_declarator` parents — `arguments` parents fell through with + * `funcName = null`. + * + * Result: every shadcn/Radix component, every memoised React component, + * every useCallback/useMemo hook callback registered as anonymous, and + * calls inside their bodies attributed to the file. Sourcerer-fe alone + * had ~296 such declarations affected (57 forwardRef + 21 memo + 161 + * useCallback + 57 useMemo). + * + * Fix: + * - 4 new tree-sitter patterns in `typescript/query.ts` (registry). + * - 4 mirrored patterns in `tree-sitter-queries.ts` (legacy). + * - `tsExtractFunctionName` extended with a third branch that walks + * `arguments → call_expression → variable_declarator`. + * + * Each test fixture below isolates one wrapper shape with the call + * target defined in `helpers.ts` (cross-file resolution). + */ +import { describe, it, expect, beforeAll } from 'vitest'; +import path from 'path'; +import { + FIXTURES, + getRelationships, + edgeSet, + getNodesByLabel, + runPipelineFromRepo, + type PipelineResult, +} from './helpers.js'; + +describe('TypeScript HOC-wrapped variable declarations', () => { + let result: PipelineResult; + + beforeAll(async () => { + result = await runPipelineFromRepo(path.join(FIXTURES, 'typescript-hoc-wrapped'), () => {}); + }, 60000); + + // ───────────────────────────────────────────────────────────────── + // Positive: each HOC-wrapped const becomes a named Function whose + // body's calls attribute to it (not File). + // ───────────────────────────────────────────────────────────────── + + it('React.forwardRef: Button → cn and Button → helper (member-expression callee)', () => { + // `const Button = React.forwardRef<...>(({ ... }, _ref) => { ... })`. + // The wrapping callee is a `member_expression` (`React.forwardRef`), + // exercising the "any callee" leniency in the new patterns. + const calls = getRelationships(result, 'CALLS').filter( + (c) => c.sourceFilePath === 'src/forward-ref.tsx' && c.source === 'Button', + ); + const targets = new Set(calls.map((c) => c.target)); + expect(targets, 'Button must call cn').toContain('cn'); + expect(targets, 'Button must call helper').toContain('helper'); + }); + + it('memo (bare identifier): Card → cn and Card → helper', () => { + // `const Card = memo<...>((...) => { ... })`. Bare-identifier callee + // — the named-import form (`import { memo } from 'react'`). + const calls = getRelationships(result, 'CALLS').filter( + (c) => c.sourceFilePath === 'src/memo.tsx' && c.source === 'Card', + ); + const targets = new Set(calls.map((c) => c.target)); + expect(targets, 'Card must call cn').toContain('cn'); + expect(targets, 'Card must call helper').toContain('helper'); + }); + + it('useCallback: handleClick → doStuff and handleClick → fmt', () => { + // `const handleClick = useCallback(() => { ... }, [])`. + const calls = getRelationships(result, 'CALLS').filter( + (c) => c.sourceFilePath === 'src/use-callback.ts' && c.source === 'handleClick', + ); + const targets = new Set(calls.map((c) => c.target)); + expect(targets).toContain('doStuff'); + expect(targets).toContain('fmt'); + }); + + it('useCallback: handleSubmit → doStuff (sibling const, separate caller)', () => { + // Two useCallback consts in the same file — each must be its own + // caller anchor (no leakage to the first sibling, the analogue of + // the multi-action-store regression in PR #1175). + const calls = getRelationships(result, 'CALLS').filter( + (c) => c.sourceFilePath === 'src/use-callback.ts' && c.target === 'doStuff', + ); + const fromHandleSubmit = calls.filter((c) => c.source === 'handleSubmit'); + expect(fromHandleSubmit.length, 'handleSubmit must call doStuff').toBeGreaterThan(0); + }); + + it('useMemo: computed → doStuff (returns-a-value variant)', () => { + // `const computed = useMemo(() => { return doStuff(42); }, [])`. + // The arrow's body has a `return` statement — irrelevant to call + // attribution but worth exercising as a separate fixture. + const calls = getRelationships(result, 'CALLS').filter( + (c) => c.sourceFilePath === 'src/use-memo.ts' && c.source === 'computed', + ); + expect(edgeSet(calls)).toContain('computed → doStuff'); + }); + + it('observer (MobX): Item → helper', () => { + // Same shape as memo, different wrapper name. Exercises the "any + // callee" leniency for non-React HOCs. + const calls = getRelationships(result, 'CALLS').filter( + (c) => c.sourceFilePath === 'src/observer.tsx' && c.source === 'Item', + ); + expect(edgeSet(calls)).toContain('Item → helper'); + }); + + it('debounce: debouncedSearch → doStuff (utility-HOC form)', () => { + // `const debouncedSearch = debounce((query) => { doStuff(...); }, 250)`. + // Pattern is identical to React HOCs — the wrapper just happens to + // be a debouncer, so this confirms the fix is wrapper-agnostic. + const calls = getRelationships(result, 'CALLS').filter( + (c) => c.sourceFilePath === 'src/debounce.ts' && c.source === 'debouncedSearch', + ); + expect(edgeSet(calls)).toContain('debouncedSearch → doStuff'); + }); + + // ───────────────────────────────────────────────────────────────── + // Function-node assertions: each HOC-wrapped const must register as + // a Function (not just a Variable). Without this, gitnexus_context / + // gitnexus_impact see no symbol to walk from. + // ───────────────────────────────────────────────────────────────── + + it('each HOC-wrapped const registers as a Function node', () => { + const functions = new Set(getNodesByLabel(result, 'Function')); + // Every const we wrote in the fixtures must have a Function entry. + expect(functions, 'forwardRef-wrapped Button').toContain('Button'); + expect(functions, 'memo-wrapped Card').toContain('Card'); + expect(functions, 'useCallback-bound handleClick').toContain('handleClick'); + expect(functions, 'useCallback-bound handleSubmit').toContain('handleSubmit'); + expect(functions, 'useMemo-bound computed').toContain('computed'); + expect(functions, 'observer-wrapped Item').toContain('Item'); + expect(functions, 'debounce-wrapped debouncedSearch').toContain('debouncedSearch'); + }); + + // ───────────────────────────────────────────────────────────────── + // Negative: bare statement-level HOC calls (not bound to a const) + // must NOT produce phantom Function nodes named after some implicit + // anchor, and their inner calls must NOT attribute to a wrong source. + // ───────────────────────────────────────────────────────────────── + + it('bare statement-level HOC calls do not produce phantom Functions', () => { + // `negative-bare-call.ts` has three unbound HOC calls + // (useCallback / memo / wrap(memo(...))). None should become a + // named Function. The only Function-eligible def in the file is + // `wrap` (a regular `const wrap = (value: T): T => value`), + // exercised here as the negative-control's only legit Function. + const fileFns = getRelationships(result, 'CALLS') + .filter((c) => c.sourceFilePath === 'src/negative-bare-call.ts') + .map((c) => c.source); + const sources = new Set(fileFns); + // Only the file itself (or `wrap` if its body had calls — it + // doesn't) should appear as a source. Assert the phantom-prone + // names are absent. + expect(sources, 'no phantom useCallback as caller').not.toContain('useCallback'); + expect(sources, 'no phantom memo as caller').not.toContain('memo'); + // doStuff calls inside the bare HOCs fall back to File-level + // attribution (the arrow has no caller anchor). + const fromFile = getRelationships(result, 'CALLS').filter( + (c) => + c.sourceFilePath === 'src/negative-bare-call.ts' && + c.sourceLabel === 'File' && + c.target === 'doStuff', + ); + expect(fromFile.length, 'unbound HOC inner calls source from File').toBeGreaterThan(0); + }); + + it('no phantom self-loops in HOC-wrapped fixtures', () => { + // The Zustand-style regression from PR #1175 (CallerAnchorLabel + // exclusion of Variable defs) must continue to hold here. The + // outer module-level call (e.g., `React.forwardRef(...)`, + // `memo(...)`) should source from File, not from the const it + // declares. If the new patterns inadvertently re-promoted Variable + // defs to caller anchors, we'd see edges like `Button → forwardRef` + // (sourceLabel=Function). Filter to call edges where the SOURCE is + // the const we just declared — and check that the const's target + // set never includes the wrapper itself. + const buttonCalls = getRelationships(result, 'CALLS').filter( + (c) => c.source === 'Button' && c.target === 'forwardRef', + ); + expect(buttonCalls, 'Button must NOT call forwardRef (phantom self-loop)').toEqual([]); + const cardCalls = getRelationships(result, 'CALLS').filter( + (c) => c.source === 'Card' && c.target === 'memo', + ); + expect(cardCalls, 'Card must NOT call memo (phantom self-loop)').toEqual([]); + const handleClickCalls = getRelationships(result, 'CALLS').filter( + (c) => c.source === 'handleClick' && c.target === 'useCallback', + ); + expect(handleClickCalls, 'handleClick must NOT call useCallback (phantom self-loop)').toEqual( + [], + ); + }); + + // ───────────────────────────────────────────────────────────────── + // Cross-pollination: multiple HOC-wrapped consts in the same file + // must each be their own caller anchor (the multi-pair regression + // analogue, restated for HOC patterns). + // ───────────────────────────────────────────────────────────────── + + it('handleClick and handleSubmit do not cross-attribute (no first-sibling-wins)', () => { + // `use-callback.ts` has two useCallback-bound consts. Calls inside + // `handleSubmit` (`doStuff(value)`) must NOT appear as + // `handleClick → doStuff`. The fix in PR #1175 + // (`isCallerAnchorLabel` + per-arrow ownedDefs via inner anchor + // discipline) plus the new patterns here must compose: each + // useCallback callback gets its own arrow scope, each scope has + // its own def in `ownedDefs`, and `resolveCallerGraphId` walks + // up to the right one. + const calls = getRelationships(result, 'CALLS').filter( + (c) => c.sourceFilePath === 'src/use-callback.ts' && c.target === 'doStuff', + ); + const fromHandleClick = calls.filter((c) => c.source === 'handleClick'); + const fromHandleSubmit = calls.filter((c) => c.source === 'handleSubmit'); + expect(fromHandleClick.length, 'handleClick must call doStuff').toBeGreaterThan(0); + expect(fromHandleSubmit.length, 'handleSubmit must call doStuff').toBeGreaterThan(0); + // Both consts call doStuff, but each must source from its OWN name. + // We assert there's no "spilled" edge that names the wrong const. + const stray = calls.filter((c) => c.source !== 'handleClick' && c.source !== 'handleSubmit'); + // Allow File-rooted edges as a fallback if the fix regresses; we + // explicitly disallow Function-rooted strays from siblings/ + // imported names (e.g., useCallback itself). + const functionStrays = stray.filter((c) => c.sourceLabel === 'Function'); + expect(functionStrays, 'no other Function sources for doStuff calls').toEqual([]); + }); + + // ───────────────────────────────────────────────────────────────── + // Documented limitation: deeply-nested HOCs (`memo(forwardRef(...))`). + // + // The fixture `nested.tsx` documents that the OUTER pattern requires + // the arrow to be a direct grandchild of the const's `call_expression` + // value — when the arrow is wrapped in another `call_expression` + // (`memo(forwardRef(arrow))`), the pattern misses and the deepest + // arrow stays anonymous. The const itself (`Wrapped`) is also NOT a + // Function: the immediate arg of the outer `memo(...)` call is a + // `call_expression` (`forwardRef(...)`), not an arrow / fn-expression, + // so no `@declaration.function` pattern matches the outer shape either. + // + // We assert ABSENCE here (rather than positive resolution) so that any + // future change to the patterns or to `tsExtractFunctionName` that + // accidentally starts matching nested HOCs surfaces immediately. A + // proper fix for nested HOCs would require deciding which level wins + // the name (outer wrapper? deepest behaviour-arrow?) and is out of + // scope for this PR. + // ───────────────────────────────────────────────────────────────── + + it('nested HOCs (memo(forwardRef(...))): Wrapped is NOT a Function (known limitation)', () => { + // The outer const `Wrapped` matches NO `@declaration.function` pattern + // because the outer call's first argument is itself a call_expression, + // not an arrow_function / function_expression. It should be picked up + // as a Variable by `@definition.const` (or skipped entirely) — but it + // must NOT appear as a Function node. + const functions = new Set(getNodesByLabel(result, 'Function')); + expect(functions, 'Wrapped (nested HOC) must NOT be a Function node').not.toContain('Wrapped'); + }); + + it('nested HOCs: helper() call inside the deepest arrow does NOT source from Function:Wrapped', () => { + // Calls inside the doubly-wrapped arrow have no named ancestor (deepest + // arrow is anonymous because `call_expression.parent` is `arguments`, + // not `variable_declarator`; the outer `memo` and `forwardRef` calls + // are themselves anonymous expressions). So calls in `nested.tsx` must + // either source from File or not be attributed to `Wrapped` at all. + // + // The negative assertion is what matters: a future change that wrongly + // attributes the deepest arrow to its outer const would silently corrupt + // impact analysis for any real code that nests HOCs (e.g., + // `memo(forwardRef(...))` UI primitives). + const helperCalls = getRelationships(result, 'CALLS').filter( + (c) => c.sourceFilePath === 'src/nested.tsx' && c.target === 'helper', + ); + expect(helperCalls.length, 'helper call must still be captured').toBeGreaterThan(0); + + const fromWrapped = helperCalls.filter((c) => c.source === 'Wrapped'); + expect( + fromWrapped, + 'helper call must NOT be attributed to Function:Wrapped (deepest arrow stays anonymous)', + ).toEqual([]); + + // Defensive: there should be no Function-sourced edges from anywhere in + // `nested.tsx` (everything is anonymous or module-level). + const allNestedCalls = getRelationships(result, 'CALLS').filter( + (c) => c.sourceFilePath === 'src/nested.tsx', + ); + const functionSourced = allNestedCalls.filter((c) => c.sourceLabel === 'Function'); + expect( + functionSourced, + 'no Function-sourced CALLS from nested.tsx (all anchors should be File)', + ).toEqual([]); + }); +}); diff --git a/gitnexus/test/unit/call-attribution-issue-1166.test.ts b/gitnexus/test/unit/call-attribution-issue-1166.test.ts index 5dfbb06ad7..b7cc09de04 100644 --- a/gitnexus/test/unit/call-attribution-issue-1166.test.ts +++ b/gitnexus/test/unit/call-attribution-issue-1166.test.ts @@ -393,3 +393,195 @@ describe('issue #1166 — regression guards', () => { expect(findCall(sites, 'persist')?.attributedTo).toBeNull(); }); }); + +// ─── HOC-wrapped variable declarations (issue #1166 follow-up) ────────────── + +describe('issue #1166 follow-up — HOC-wrapped variable declarations', () => { + // The third `tsExtractFunctionName` branch: `arguments → call_expression → + // variable_declarator`. Covers React.forwardRef / memo / useCallback / + // useMemo / observer / debounce — every HOC factory whose result is bound + // to a const. Without this branch, the wrapped arrow had no name and calls + // inside attributed to the file. See `languages/typescript.ts` for the + // resolution logic and `tree-sitter-queries.ts` for the @definition.function + // capture mirror. + + it('attributes call inside `const X = forwardRef((p, r) => fn())` to "X"', () => { + // Bare-identifier callee form. The arrow's parent is `arguments`; the + // walker climbs to `call_expression` then to `variable_declarator` and + // returns the const's name. + const sites = collectCallAttributions(` + const Button = forwardRef((props, ref) => { + return doSomething(props); + }); + `); + const call = findCall(sites, 'doSomething'); + expect(call, 'doSomething call should be captured').toBeDefined(); + expect(call!.attributedTo).toBe('Button'); + }); + + it('attributes call inside `const X = React.forwardRef((p, r) => fn())` to "X" (member-expression callee)', () => { + // Member-expression callee form (`React.forwardRef`). The `arguments`- + // parent walk doesn't constrain the function field, so this resolves + // identically to the bare-identifier form. + const sites = collectCallAttributions(` + const Card = React.forwardRef((props, ref) => { + return doStuff(props); + }); + `); + expect(findCall(sites, 'doStuff')?.attributedTo).toBe('Card'); + }); + + it('attributes call inside `const X = useCallback(() => fn(), [])` to "X"', () => { + // useCallback / useMemo are the most common HOC-wrapped form in real + // React codebases. The trailing `[deps]` array doesn't affect the walk + // — the arrow is still the first `arguments` child. + const sites = collectCallAttributions(` + const handleClick = useCallback(() => { + sendEvent('click'); + }, []); + `); + expect(findCall(sites, 'sendEvent')?.attributedTo).toBe('handleClick'); + }); + + it('attributes call inside `const X = memo((props) => fn())` to "X"', () => { + const sites = collectCallAttributions(` + const Item = memo((props) => { + return render(props); + }); + `); + expect(findCall(sites, 'render')?.attributedTo).toBe('Item'); + }); + + it('does NOT name the HOC callback after its sibling (no first-sibling-wins regression)', () => { + // Two HOC-wrapped consts in the same module — each must take its own + // name, not bleed into the first declared. Mirrors the multi-action + // Zustand regression from PR #1175 review applied to HOC patterns. + const sites = collectCallAttributions(` + const handleClick = useCallback(() => { + doA(); + }, []); + + const handleSubmit = useCallback((value) => { + doB(value); + }, []); + `); + expect(findCall(sites, 'doA')?.attributedTo).toBe('handleClick'); + expect(findCall(sites, 'doB')?.attributedTo).toBe('handleSubmit'); + }); + + it('does NOT name a bare statement-level HOC call (unbound result)', () => { + // `useCallback(() => doStuff(), [])` at statement level (result thrown + // away). The walk climbs `arguments → call_expression → expression_statement`, + // which is NOT `variable_declarator`, so the branch returns null and the + // arrow stays anonymous. Calls inside attribute to no enclosing function. + const sites = collectCallAttributions(` + useCallback(() => { + doSomething(); + }, []); + `); + // The `useCallback` call itself is module-level → null. The `doSomething` + // call is inside an unnamed arrow → null. Both must NOT borrow a name. + expect(findCall(sites, 'doSomething')?.attributedTo).toBeNull(); + expect(findCall(sites, 'useCallback')?.attributedTo).toBeNull(); + }); + + // ─── Definition-phase: HOC-wrapped consts must register as @definition.function ─── + + function definedFunctionNames(code: string): string[] { + const { parser, query } = makeParserAndQuery(); + const tree = parser.parse(code); + const out: string[] = []; + for (const match of query.matches(tree.rootNode)) { + let isFn = false; + let name: string | undefined; + for (const c of match.captures) { + if (c.name === 'definition.function') isFn = true; + if (c.name === 'name') name = c.node.text; + } + if (isFn && name) out.push(name); + } + return out; + } + + it('captures `const X = HOC((args) => ...)` as @definition.function in TYPESCRIPT_QUERIES', () => { + // The query mirror of the resolver fix. Without these patterns, + // `Function:X` would never enter the registry on the legacy DAG and + // any CALLS edge claiming `Function:X` as source would dangle. + const names = definedFunctionNames(` + const Button = forwardRef((p, r) => render(p)); + const Card = React.memo((p) => layout(p)); + const handleClick = useCallback(() => doStuff(), []); + const computed = useMemo(() => result(), []); + const debounced = debounce((q) => search(q), 250); + export const Exported = forwardRef((p, r) => render(p)); + `); + expect(names).toContain('Button'); + expect(names).toContain('Card'); + expect(names).toContain('handleClick'); + expect(names).toContain('computed'); + expect(names).toContain('debounced'); + expect(names).toContain('Exported'); + }); + + it('captures `const X = HOC(function (args) { ... })` (function-expression form)', () => { + // Pre-arrow legacy code uses `function () { ... }` instead of `() => ...`. + // The mirror pattern uses `(function_expression)` and must trigger. + const names = definedFunctionNames(` + const Legacy = wrap(function (x) { return doStuff(x); }); + `); + expect(names).toContain('Legacy'); + }); + + // ─── Documented trade-offs: pin behaviour so future readers aren't surprised ─── + + it('accepted false-positive: `const x = arr.find((y) => p(y))` attributes p to "x"', () => { + // The HOC-wrapped pattern (arrow's parent is `arguments`, grandparent is + // `call_expression`, great-grandparent is `variable_declarator`) is broad + // enough to also match chained array-method callbacks like Array#find / + // Array#some / Array#every — where `x` is a *value* (the result of the + // method), not a function. The arrow inside borrows the const's name and + // its inner calls attribute to it. + // + // This is documented in `languages/typescript/query.ts` as an accepted + // trade-off because: + // 1. `x` is never invoked as a function, so no incoming CALLS edge is + // ever created — the spurious `Function:x` is graph-isolated on the + // incoming side. + // 2. The outgoing edge `Function:x → predicate` is a minor mis-attribution + // (the call IS happening, just not from a "function called x"), and + // the alternative — narrowing the pattern to known HOC names — would + // require maintaining a wrapper allowlist that breaks for every new + // HOC factory. + // + // We pin this here so a future change that tightens the pattern is forced + // to update this test and re-evaluate the trade-off explicitly. + const sites = collectCallAttributions(` + const found = items.find((item) => predicate(item)); + `); + expect(findCall(sites, 'predicate')?.attributedTo).toBe('found'); + }); + + it('multi-arrow argument: both arrows resolve to the same const name (legacy DAG path)', () => { + // `const x = call(() => first(), () => second())` — both arrows share the + // same `arguments → call_expression → variable_declarator` ancestor chain, + // so `tsExtractFunctionName`'s third branch returns "x" for each. Calls + // inside both arrows therefore attribute to "x" on the legacy DAG path. + // + // In the registry-primary pipeline the two arrows produce two candidate + // `Function:x` defs that the qualified-name dedup collapses into one + // (last-write-wins by symbol range). The end result is the same set of + // CALLS edges sourced from "x"; only the def's range changes. We pin the + // legacy attribution here because that's what the unit harness exercises. + // + // The pattern is rare in real code (few APIs take two callbacks both + // worth tracking), but it does exist in some `register(setup, teardown)` + // / `addEventListener('event', handler, { once })` shaped helpers. If a + // future change drops one of the calls or attributes it elsewhere, we + // want to know. + const sites = collectCallAttributions(` + const x = call(() => first(), () => second()); + `); + expect(findCall(sites, 'first')?.attributedTo).toBe('x'); + expect(findCall(sites, 'second')?.attributedTo).toBe('x'); + }); +});