diff --git a/.github/workflows/ci-scope-parity.yml b/.github/workflows/ci-scope-parity.yml deleted file mode 100644 index 039438a15c..0000000000 --- a/.github/workflows/ci-scope-parity.yml +++ /dev/null @@ -1,87 +0,0 @@ -name: Scope Resolution Parity - -# Reusable workflow — called from ci.yml. Does NOT declare concurrency; -# it inherits the caller's concurrency group per the convention documented -# in CONTRIBUTING.md → "GitHub Actions — Concurrency Convention". -# -# ── Purpose (RFC #909 Ring 3, §6.4 "Observability gates") ────────────── -# For every language in `MIGRATED_LANGUAGES` (exported from -# `gitnexus/src/core/ingestion/registry-primary-flag.ts`), run the -# resolver integration test at `test/integration/resolvers/.test.ts` -# TWICE on every PR: -# -# 1. `REGISTRY_PRIMARY_=0` — legacy DAG path (guarantees we haven't -# broken the old path while migrating). Known legacy gaps may be skipped -# through the resolver test helper's expected-failure list. -# 2. `REGISTRY_PRIMARY_=1` — registry-primary path (guarantees the -# new path carries the same behavior — the parity gate). -# -# BOTH must pass. The source of truth is the TypeScript constant — adding -# a language to that `Set` is the ONLY contributor action; CI auto- -# discovers it, runs parity, and the language's default production path -# flips to registry-primary in the same change. -# -# When the set is empty (e.g. mid-Ring-3 for every language), the parity -# matrix is skipped and the workflow reports success — no-op until a -# language is explicitly claimed migrated. -# -# ── Consolidation (chore/vitest-speed-strategy) ──────────────────────── -# Previously each language was a separate GitHub Actions matrix job, -# meaning N languages × 1 checkout+install+build per shard. The build -# cost dwarfed the test cost (~5 min setup for ~15 sec test execution). -# -# Now a single job runs `scripts/run-parity.ts` which loops through all -# migrated languages sequentially (2 vitest invocations per language: -# legacy + registry-primary). All failures are collected and reported -# at the end (equivalent to the old fail-fast: false behavior). -# -# Adding a new language to MIGRATED_LANGUAGES still requires no workflow -# edit — the script auto-discovers the set at runtime. - -on: - workflow_call: - -permissions: - contents: read - -jobs: - discover: - name: Discover migrated languages - runs-on: ubuntu-latest - timeout-minutes: 5 - outputs: - has-any: ${{ steps.read.outputs.has-any }} - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: ./.github/actions/setup-gitnexus - - - name: Extract MIGRATED_LANGUAGES from registry-primary-flag.ts - id: read - shell: bash - working-directory: gitnexus - run: | - set -euo pipefail - LANGS=$(npx tsx scripts/ci-list-migrated-languages.ts) - COUNT=$(printf '%s' "$LANGS" | jq 'length') - HAS_ANY="false" - if [[ "$COUNT" -gt 0 ]]; then HAS_ANY="true"; fi - echo "has-any=$HAS_ANY" >> "$GITHUB_OUTPUT" - echo "Discovered $COUNT migrated language(s): $LANGS" - echo "Parity will run: $HAS_ANY" - - parity: - name: scope-resolution parity - needs: discover - if: needs.discover.outputs.has-any == 'true' - runs-on: ubuntu-latest - timeout-minutes: 30 - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: ./.github/actions/setup-gitnexus - with: - build: 'true' - - - name: Run parity for all migrated languages - shell: bash - working-directory: gitnexus - run: npx tsx scripts/run-parity.ts diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 32777fcfdc..9ea361fb6e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,9 +27,8 @@ concurrency: # Each concern lives in its own workflow file for maintainability: # ci-quality.yml — typecheck (tsc --noEmit) # ci-tests.yml — unit + integration tests with coverage + cross-platform +# (includes the scope-resolution resolver tests) # ci-e2e.yml — E2E tests (only when gitnexus-web/ changes) -# ci-scope-parity.yml — RFC #909 Ring 3 parity gate: legacy DAG + registry-primary -# both pass, per migrated language in the JSON registry # # Shared setup is DRY via .github/actions/setup-gitnexus composite action. @@ -49,11 +48,6 @@ jobs: permissions: contents: read - scope-parity: - uses: ./.github/workflows/ci-scope-parity.yml - permissions: - contents: read - # ── Save PR metadata for the reporting workflow ───────────────── # The ci-report.yml workflow (triggered by workflow_run) needs the # PR number and job results to post a comment. We save them as an @@ -62,7 +56,7 @@ jobs: save-pr-meta: name: Save PR Metadata if: always() && github.event_name == 'pull_request' - needs: [quality, tests, e2e, scope-parity] + needs: [quality, tests, e2e] runs-on: ubuntu-latest timeout-minutes: 5 steps: @@ -73,14 +67,12 @@ jobs: QUALITY: ${{ needs.quality.result }} TESTS: ${{ needs.tests.result }} E2E: ${{ needs.e2e.result }} - SCOPE_PARITY: ${{ needs.scope-parity.result }} run: | mkdir -p pr-meta echo "$PR_NUMBER" > pr-meta/pr_number echo "$QUALITY" > pr-meta/quality_result echo "$TESTS" > pr-meta/tests_result echo "$E2E" > pr-meta/e2e_result - echo "$SCOPE_PARITY" > pr-meta/scope_parity_result # TODO(post-merge): remove backward-compat copies once ci-report.yml # on main reads underscore names. # Backward-compat: ci-report.yml on main still reads hyphenated @@ -103,7 +95,7 @@ jobs: # Single required check for branch protection. ci-status: name: CI Gate - needs: [quality, tests, e2e, scope-parity] + needs: [quality, tests, e2e] if: always() runs-on: ubuntu-latest timeout-minutes: 5 @@ -117,14 +109,15 @@ jobs: # reusable workflow, so `needs.tests.result` below blocks the merge # on an ABI mismatch. (`jobs..result` cannot be exposed as a # workflow_call output, so the gate is enforced transitively here.) + # The scope-resolution resolver tests also run inside the `tests` + # workflow (RING4-1 #942 removed the separate scope-parity gate), + # so a resolver regression makes TESTS != success and blocks here. TESTS: ${{ needs.tests.result }} E2E: ${{ needs.e2e.result }} - SCOPE_PARITY: ${{ needs.scope-parity.result }} run: | echo "Quality: $QUALITY" echo "Tests: $TESTS" echo "E2E: $E2E" - echo "Scope parity: $SCOPE_PARITY" # A failed `abi-assert` job (#1922) inside the tests reusable # workflow makes TESTS != success, so this clause also blocks the # merge on a tree-sitter ABI mismatch. @@ -137,14 +130,3 @@ jobs: echo "::error::E2E job failed" exit 1 fi - # scope-parity is a reusable workflow. With an empty migrated- - # languages list, its parity matrix is skipped and the outer - # workflow still reports `success`. If any entry's legacy-DAG or - # registry-primary run fails, the workflow reports `failure`. - # Accept only `success`; `skipped` would mean the entire - # discover job was skipped too (upstream failure), which should - # still block. - if [[ "$SCOPE_PARITY" != "success" ]]; then - echo "::error::Scope-resolution parity gate failed (RFC #909 Ring 3)" - exit 1 - fi diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 122130310f..a5265d073e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -257,7 +257,7 @@ jobs: # ── Phase 3: reusable CI gate ────────────────────────────────────────────── # Runs for both rc (when guard says go) and stable. No `secrets:` passed — # ci.yml and its entire reusable-workflow chain (ci-quality, ci-tests, - # ci-e2e, ci-scope-parity, ci-report) reference zero `secrets.*` values; + # ci-e2e, ci-report) reference zero `secrets.*` values; # passing any would be unused surface. GITHUB_TOKEN is implicit. ci: needs: [route, rc-guard] diff --git a/AGENTS.md b/AGENTS.md index 286fbc14f8..1e31004e42 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -39,8 +39,7 @@ Commands and gotchas live under **Repo reference** below and in **[CONTRIBUTING. ## Reference docs - **[ARCHITECTURE.md](ARCHITECTURE.md)**, **[CONTRIBUTING.md](CONTRIBUTING.md)**, **[GUARDRAILS.md](GUARDRAILS.md)** -- **Call-resolution DAG (legacy path):** See ARCHITECTURE.md § Call-Resolution DAG. Typed 6-stage DAG inside the `parse` phase; language-specific behavior behind `inferImplicitReceiver` / `selectDispatch` hooks on `LanguageProvider`. Shared code in `gitnexus/src/core/ingestion/` must not name languages. Types: `gitnexus/src/core/ingestion/call-types.ts`. -- **Scope-resolution pipeline (RFC #909 Ring 3):** See ARCHITECTURE.md § Scope-Resolution Pipeline. Replaces the legacy DAG for languages in `MIGRATED_LANGUAGES` (see `registry-primary-flag.ts`). A language plugs in by implementing `ScopeResolver` (`scope-resolution/contract/scope-resolver.ts`) and registering it in `SCOPE_RESOLVERS`. CI parity gate runs BOTH paths per migrated language on every PR. +- **Call & inheritance resolution (RFC #909 Ring 3):** See ARCHITECTURE.md § Scope-Resolution Pipeline. All languages resolve calls and inheritance through the scope-resolution pipeline (`Registry.lookup`, `preEmitInheritanceEdges`, `emitHeritageEdges`, `buildMro` → `MethodDispatchIndex`). **Shared code in `gitnexus/src/core/ingestion/` must not name languages** — plug language behavior in via `LanguageProvider` / `ScopeResolver` hooks. A language plugs in by implementing `ScopeResolver` (`scope-resolution/contract/scope-resolver.ts`) and registering it in `SCOPE_RESOLVERS`. (The legacy call-resolution DAG + `@heritage` capture path were removed in RING4-1 #942.) - **Cursor:** `.cursor/index.mdc` (always-on); `.cursor/rules/*.mdc` (glob-scoped). Legacy `.cursorrules` deprecated. - **GitNexus:** skills in `.claude/skills/gitnexus/`; MCP rules in `gitnexus:start` block below. diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index d39fef3a16..f65c175f9c 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -65,7 +65,7 @@ Monorepo: **CLI/MCP** (`gitnexus/`) + **browser UI** (`gitnexus-web/`). | Wiki generation | `src/core/wiki/` | | Language support | `src/core/ingestion/languages/` + `tree-sitter-queries.ts` + `gitnexus-shared/src/languages.ts` | | Import resolution | `src/core/ingestion/import-processor.ts` + `import-resolvers/configs/` + `model/resolution-context.ts` | -| Call resolution/MRO | `src/core/ingestion/call-processor.ts` + `model/resolve.ts` | +| Call resolution/inheritance/MRO | `src/core/ingestion/scope-resolution/` (pipeline, passes, graph-bridge) | | Type extraction | `src/core/ingestion/type-extractors/` | | Worker pool | `src/core/ingestion/workers/` | | Web UI | `gitnexus-web/src/` | @@ -147,105 +147,18 @@ export const myPhase: PipelinePhase = { --- -## Call-Resolution DAG +## Semantic model -Typed 6-stage pipeline in `call-processor.ts` (inside the `parse` phase) that resolves method/function calls and emits CALLS edges. Language behavior plugs in at two `LanguageProvider` hook points (stages 3–4); shared code names no languages. Scope: call resolution only — import resolution, type extraction, heritage, and symbol-table population live in other phases. +`SemanticModel` (`gitnexus/src/core/ingestion/model/semantic-model.ts`) is the authoritative store for every symbol-indexed lookup (by `nodeId`, `simpleName`, `qualifiedName`, or `filePath`). The scope-resolution pipeline reads from here: `findOwnedMember`, `pickOverload`, and `findExportedDefByName` all consult `model.methods` / `model.fields` / `model.symbols`. -### Stages - -``` -extract-call ──▶ classify-form ──▶ infer-receiver ──▶ select-dispatch ──▶ resolve-target ──▶ emit-edge - (1) (2) (3) [hook] (4) [hook] (5) (6) -``` - -| Stage | Produces | Location | -|-------|----------|----------| -| **extract-call** | `ExtractedCallSite` (name, form, receiver, argCount) | `call-extractors/` (per-language); runs in worker | -| **classify-form** | callForm (`free`/`member`/`constructor`) + arity | `call-analysis.ts` → `inferCallForm`; shared, runs in worker | -| **infer-receiver** | `ReceiverEnriched` (receiver type finalized) | `call-processor.ts`; shared default chain, then `inferImplicitReceiver` hook | -| **select-dispatch** | `DispatchDecision` (primary, fallback, ancestryView) | `selectDispatch` hook, falls back to shared default | -| **resolve-target** | `TieredCandidates` | `model/resolve.ts` → `lookupMethodByOwnerWithMRO` (MRO walk) | -| **emit-edge** | CALLS edge in graph | `call-processor.ts`; writes edge with confidence tier | - -### Provider hooks - -Both hooks are optional on `LanguageProvider`. Ruby is the only current implementer. - -**`inferImplicitReceiver`** — called after shared infer-receiver defaults. Returns `ImplicitReceiverOverride | null`. - -| | | -|---|---| -| Inputs | `calledName`, `callForm`, `receiverName`, `receiverTypeName`, `callNode` (AST), `filePath` | -| Non-null fields | `callForm`, `receiverName`, `receiverTypeName` (required); `receiverSource: 'implicit-self'` (fixed); `hint?` (opaque, passed to `selectDispatch`) | -| Null | Keep existing `ReceiverEnriched` state | - -**`selectDispatch`** — called after infer-receiver (including hook). Returns `DispatchDecision | null`; null uses shared default (constructor → `primary:'constructor'`; typed receiver → `primary:'owner-scoped'`; else → `primary:'free'`). - -| | | -|---|---| -| Inputs | `calledName`, `callForm`, `receiverName`, `receiverTypeName`, `receiverSource`, `hint` | -| Non-null fields | `primary: 'owner-scoped' \| 'free' \| 'constructor'`; `fallback?: 'free-arity-narrowed'`; `ancestryView?: 'instance' \| 'singleton'`; `hint?` | - -**`DispatchDecision` field semantics:** -- `primary: 'owner-scoped'` — MRO walk from receiver's type; used when receiver type is known. -- `fallback: 'free-arity-narrowed'` — after owner-scoped miss, search free-call candidates by arity only (Ruby uses this for implicit-self calls that miss their owner's MRO). -- `ancestryView: 'singleton'` — walk singleton/class ancestry instead of instance ancestry (Ruby `def self.foo` bodies, so `extend`-ed methods are found). - -### Adding language behavior - -1. **Implicit receivers** — implement `inferImplicitReceiver`: return null if call already has a receiver; otherwise use `findEnclosingClassInfo` (`ast-helpers.ts`) to find the enclosing context, return `ImplicitReceiverOverride` with `receiverSource: 'implicit-self'`, and optionally set `hint` for `selectDispatch`. -2. **Custom dispatch** — implement `selectDispatch`: inspect `receiverSource` and `hint`, return `DispatchDecision` with `primary`, optional `fallback`, optional `ancestryView`; return null to keep shared defaults. -3. **MRO strategy** — confirm `mroStrategy` is `'first-wins'`, `'c3'`, `'ruby-mixin'`, or `'none'`; consumed by `lookupMethodByOwnerWithMRO`. - -**Ruby example** (`languages/ruby.ts` + `utils/ruby-self-call.ts`): `inferImplicitReceiver` rewrites bare-identifier calls to `self.method` and sets `hint` to `'instance'`/`'singleton'`; `selectDispatch` uses hint for `ancestryView` and adds `fallback: 'free-arity-narrowed'` for implicit-self calls. - -### Code references - -| Module | Purpose | -|--------|---------| -| `core/ingestion/call-types.ts` | DAG types: `ReceiverEnriched`, `DispatchDecision`, `ImplicitReceiverOverride` | -| `core/ingestion/language-provider.ts` | Hook signatures: `inferImplicitReceiver`, `selectDispatch` | -| `core/ingestion/call-processor.ts` | `processCalls`: stages 3–6 | -| `core/ingestion/model/resolve.ts` | `lookupMethodByOwnerWithMRO`: stage 5 MRO walk | -| `core/ingestion/languages/ruby.ts` | Both hooks + `mroStrategy: 'ruby-mixin'` | -| `core/ingestion/utils/ruby-self-call.ts` | Bare-call rewrite for `inferImplicitReceiver` | - -### Coexistence with the scope-resolution pipeline - -The Call-Resolution DAG is the **legacy path**. RFC #909 Ring 3 introduces a parallel **scope-resolution pipeline** (next section) that replaces stages 1–6 with a scope-indexed registry lookup. Both paths ship side-by-side and are gated per-language via `MIGRATED_LANGUAGES` + the `REGISTRY_PRIMARY_` env var. - -- **Unmigrated language** → Call-Resolution DAG runs; scope-resolution phase is a no-op. -- **Migrated language** (currently: Python, C#) → scope-resolution owns CALLS/ACCESSES/USES emission; the legacy DAG gates off for that language via `isRegistryPrimary(lang)` checks in `call-processor.ts` and `import-processor.ts`. -- `import-processor` still populates `importMap` for migrated languages — heritage's `ctx.resolve` reads it to disambiguate parent classes. Only edge emission is gated. -- CI runs BOTH paths for every migrated language on every PR (`.github/workflows/ci-scope-parity.yml`); both must pass. - -#### Same-graph guarantee - -Edges emitted by the scope-resolution pipeline and edges emitted by the legacy DAG are indistinguishable to downstream consumers (MCP tools, HTTP API, embeddings, group bridge): - -- **Node identity** — both paths use `generateId(...)` from `lib/utils.ts`, the same qualified-name keyspace, and the same node labels (`File`, `Folder`, `Class`, `Method`, `Function`, …). Overload disambiguation suffixes `parameterTypes` into the id consistently — see `scope-resolution/graph-bridge/ids.ts` and the legacy emitter in `call-processor.ts`. -- **Edge vocabulary** — both paths emit the same reasons: `'import-resolved' | 'global' | 'local-call' | 'same-file' | 'interface-dispatch' | 'read' | 'write'`. Migrating a language must not change which reasons consumers see for previously-resolved edges. -- **Confidence tier** — both paths attach a numeric `confidence` to each edge using the same scale. - -The CI parity workflow (`.github/workflows/ci-scope-parity.yml`) runs both paths against every migrated language's fixture corpus and fails on any divergence. - -#### Semantic-model source of truth - -Two independent invariants. - -**ParsedFile = the AST-level truth.** `ParsedFile` (`gitnexus-shared/src/scope-resolution/parsed-file.ts`) is the single per-file artifact both resolution paths consume. Scope-resolution passes MUST NOT build a parallel parse representation. If a per-language hook needs AST-level facts that `ParsedFile` doesn't expose, it should reuse the orchestrator's `treeCache` (`RunScopeResolutionInput.treeCache`) rather than re-invoking `parser.parse(...)` on its own — the C# `populateNamespaceSiblings` hook is the reference implementation of this pattern. - -**SemanticModel = the symbol-level truth.** `SemanticModel` (`gitnexus/src/core/ingestion/model/semantic-model.ts`) is the authoritative store for every symbol-indexed lookup (by `nodeId`, `simpleName`, `qualifiedName`, or `filePath`). Both paths read from here: - -- Legacy Call-Resolution DAG → `call-processor` Tier 1/2/3 via `model.symbols.lookupExactAll`, `model.methods.lookupMethodByName`, `model.types.lookupClassByName`, `lookupMethodByOwnerWithMRO`. -- Scope-resolution pipeline → `findOwnedMember`, `pickOverload`, `findExportedDefByName` all consult `model.methods` / `model.fields` / `model.symbols`. +`ParsedFile` (`gitnexus-shared/src/scope-resolution/parsed-file.ts`) is the single per-file artifact the scope-resolution pipeline consumes. Scope-resolution passes MUST NOT build a parallel parse representation. If a per-language hook needs AST-level facts that `ParsedFile` doesn't expose, it should reuse the orchestrator's `treeCache` (`RunScopeResolutionInput.treeCache`) rather than re-invoking `parser.parse(...)` on its own — the C# `populateNamespaceSiblings` hook is the reference implementation of this pattern. The scope-resolution pipeline additionally carries `WorkspaceResolutionIndex` for `Scope`-valued lookups (`classScopeByDefId`, `moduleScopeByFile`) that `SemanticModel` structurally cannot hold. No symbol-indexed duplicates exist outside `SemanticModel`. **Write / read phase contract.** The model is mutable during three ordered phases and read-only afterward: ``` - Phase 1: legacy parse ──► symbolTable.add fans into types/methods/fields + Phase 1: parse ──► symbolTable.add fans into types/methods/fields Phase 2: scope-resolution ──► reconcileOwnership() registers corrected ownerIds Phase 3: finalize ──► model.attachScopeIndexes(bundle) — one-shot freeze ─────────────────────────── phase boundary ─────────────────────────── @@ -255,7 +168,7 @@ The scope-resolution pipeline additionally carries `WorkspaceResolutionIndex` fo `runScopeResolution` narrows `MutableSemanticModel` → `SemanticModel` at the phase boundary so downstream passes physically cannot mutate the model even accidentally. -**Transitional: reconciliation pass.** `reconcileOwnership` (`scope-resolution/pipeline/reconcile-ownership.ts`) is a shim for languages whose legacy extractor doesn't resolve `enclosingClassId` at parse time (Python class-body methods are the canonical case). It walks `parsed.localDefs[i].ownerId` after `populateOwners` and registers any missed methods/fields into the model. Idempotent — safe to re-run, safe alongside languages whose legacy extractor already carries `ownerId` (C#). +**Reconciliation pass.** `reconcileOwnership` (`scope-resolution/pipeline/reconcile-ownership.ts`) is a shim for languages whose parse-time extractor doesn't resolve `enclosingClassId` at parse time (Python class-body methods are the canonical case). It walks `parsed.localDefs[i].ownerId` after `populateOwners` and registers any missed methods/fields into the model. Idempotent — safe to re-run, safe alongside languages whose extractor already carries `ownerId` (C#). The architectural end state is for every language's parse-time extractor to emit the correct `ownerId` directly, making reconciliation a no-op (tracked as a follow-up refactor). The dev-mode validator `validateOwnershipParity` surfaces any drift via `onWarn` under `NODE_ENV !== 'production' && VALIDATE_SEMANTIC_MODEL !== '0'`. @@ -265,7 +178,7 @@ References: `semantic-model.ts` file-head (full write/read contract); `contract/ ## Scope-Resolution Pipeline (RFC #909 Ring 3) -Language-agnostic registry-primary resolver. Replaces the Call-Resolution DAG for migrated languages. Adding a language is one interface implementation (`ScopeResolver`) plus two registrations — no changes to shared code, no new pipeline phase. +Language-agnostic scope-resolution resolver. This is the resolution path for every language — it owns CALLS/ACCESSES/USES emission and inheritance edges. Adding a language is one interface implementation (`ScopeResolver`) plus one registration in the `SCOPE_RESOLVERS` map — no changes to shared code, no new pipeline phase. (RING4-1 #942 removed the legacy call-resolution DAG and the per-language `MIGRATED_LANGUAGES` flag, so `SCOPE_RESOLVERS` registration is all that's needed.) ### Pipeline stages @@ -286,7 +199,7 @@ Language-agnostic registry-primary resolver. Replaces the Call-Resolution DAG fo ``` Orchestrator: `runScopeResolution(input, provider)` in `scope-resolution/pipeline/run.ts`. -Pipeline phase: `scopeResolutionPhase` in `scope-resolution/pipeline/phase.ts` — iterates `SCOPE_RESOLVERS ∩ MIGRATED_LANGUAGES`, reads per-file Trees from the parse phase's `scopeTreeCache`, disposes the cache at the end. +Pipeline phase: `scopeResolutionPhase` in `scope-resolution/pipeline/phase.ts` — iterates the registered `SCOPE_RESOLVERS`, reads per-file Trees from the parse phase's `scopeTreeCache`, disposes the cache at the end. ### `ScopeResolver` contract @@ -312,7 +225,6 @@ Single interface a language implements to plug into the pipeline. Contract fully 1. Implement `ScopeResolver` in `languages//scope-resolver.ts`. 2. Add entry to `SCOPE_RESOLVERS` in `scope-resolution/pipeline/registry.ts`. -3. Add the language to `MIGRATED_LANGUAGES` in `registry-primary-flag.ts` when the shadow-harness corpus parity ≥ 99% fixtures / ≥ 98% corpus. CI auto-discovers the set via `tsx`. No workflow edit required. @@ -328,7 +240,6 @@ CI auto-discovers the set via `tsx`. No workflow edit required. | `scope-resolution/graph-bridge/*.ts` | CLI-local translation from resolved references → `KnowledgeGraph` edges | | `scope-resolution/scope/*.ts` | Generic scope-chain walkers + namespace targets | | `scope-resolution/workspace-index.ts` | Build-once O(1) lookup index | -| `registry-primary-flag.ts` | `MIGRATED_LANGUAGES` set + `isRegistryPrimary(lang)` | | `languages/python/index.ts` | Python `ScopeResolver` hooks + known-limitation docs | | `languages/python/captures.ts` | `emitPythonScopeCaptures` (honors cross-phase Tree cache) | | `languages/csharp/index.ts` | C# `ScopeResolver` hooks + known-limitation docs | @@ -351,7 +262,7 @@ CI auto-discovers the set via `tsx`. No workflow edit required. ``` Unified Graph Schema (44 node types, 21 relationship types) ↑ - Unified Resolution (3-tier name lookup + MRO walk) + Scope-Resolution Pipeline (registry lookup + 3-tier import resolution + MRO) ↑ Language Providers (import semantics, type config, export checker, MRO strategy) ↑ @@ -376,7 +287,7 @@ Each language implements `LanguageProvider` (`language-provider.ts`). Key fields ### Unified capture tags -Per-language tree-sitter queries use different AST node names but produce the **same semantic capture tags**: `@definition.class`, `@definition.function`, `@call.name`, `@import.source`, `@heritage.extends`. Downstream extraction needs no language branching. Defined in `tree-sitter-queries.ts`. +Per-language tree-sitter queries use different AST node names but produce the **same semantic capture tags**: `@definition.class`, `@definition.function`, `@call.name`, `@import.source`, `@reference.inherits`. Downstream extraction needs no language branching. Defined in `tree-sitter-queries.ts`. ### Import resolution @@ -403,20 +314,21 @@ Unified 3-tier algorithm (`model/resolution-context.ts`), per-language `importSe 1. Worker pool dispatches files (or sequential fallback via `skipWorkers`) 2. Each worker: detect language → load grammar → run queries → return unified `ParseWorkerResult` 3. Synthesize wildcard bindings (`wildcard-synthesis.ts`) -4. Resolve imports and heritage +4. Resolve imports 5. Collect `BindingAccumulator` entries for cross-file propagation +Inheritance edges are emitted later, by the scope-resolution phase (`preEmitInheritanceEdges` + `emitHeritageEdges`), not during `parse`. + Workers: `workers/worker-pool.ts`, `workers/parse-worker.ts`. -### Heritage and MRO +### Inheritance and MRO -All languages emit unified `ExtractedHeritage` (child, parent, `EXTENDS`/`IMPLEMENTS`). MRO phase walks the heritage graph using per-language strategy: +Inheritance is captured by the `@reference.inherits` tag and emitted by the scope-resolution phase: `preEmitInheritanceEdges` resolves each base in scope, then `emitHeritageEdges` writes the `EXTENDS`/`IMPLEMENTS` edges. The phase then computes method resolution order via each `ScopeResolver`'s `buildMro` hook, feeding a `MethodDispatchIndex` used for owner-scoped lookups. Per-language strategy: - **`first-wins`** — Java, C#, C++, TS, Ruby, Go - **`c3`** — Python (C3 linearization) +- **`ruby-mixin`** — Ruby (mixin-aware linearization) - **`none`** — single-inheritance languages -Unified walk: `lookupMethodByOwnerWithMRO()` in `model/resolve.ts`. - --- ## Full analysis flow diff --git a/CLAUDE.md b/CLAUDE.md index e3815af765..bbb9915897 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -35,7 +35,7 @@ If always-on instructions grow, load deep conventions via conditional reads (e.g ## Reference Documentation - **This repository:** [AGENTS.md](AGENTS.md) (Cursor + monorepo notes), [ARCHITECTURE.md](ARCHITECTURE.md), [CONTRIBUTING.md](CONTRIBUTING.md), [GUARDRAILS.md](GUARDRAILS.md). -- **Call-resolution DAG:** See ARCHITECTURE.md § Call-Resolution DAG. Shared pipeline code in `gitnexus/src/core/ingestion/` must not name languages — use `LanguageProvider` hooks instead (see AGENTS.md). +- **Call & inheritance resolution:** See ARCHITECTURE.md § Scope-Resolution Pipeline. Shared pipeline code in `gitnexus/src/core/ingestion/` must not name languages — use `LanguageProvider` / `ScopeResolver` hooks instead (see AGENTS.md). (The legacy call-resolution DAG was removed in #942.) - **GitNexus:** `.claude/skills/gitnexus/`; MCP and indexed-repo rules live only in [AGENTS.md](AGENTS.md) (`gitnexus:start` … `gitnexus:end`). See **GitNexus rules** below. ## Changelog diff --git a/TESTING.md b/TESTING.md index e69a4b8e9c..72833d3cab 100644 --- a/TESTING.md +++ b/TESTING.md @@ -4,11 +4,11 @@ How we structure tests and which commands to run locally and in CI. ## Packages -| Package | Path | Runner | Notes | -| -------------- | -------------- | -------- | ------------------------------ | -| CLI + MCP core | `gitnexus/` | Vitest | Primary test surface in CI | -| Web UI | `gitnexus-web/`| Vitest | Unit/component tests | -| Web UI E2E | `gitnexus-web/`| Playwright | Run when changing UI flows | +| Package | Path | Runner | Notes | +| -------------- | --------------- | ---------- | -------------------------- | +| CLI + MCP core | `gitnexus/` | Vitest | Primary test surface in CI | +| Web UI | `gitnexus-web/` | Vitest | Unit/component tests | +| Web UI E2E | `gitnexus-web/` | Playwright | Run when changing UI flows | ## Test lanes @@ -16,25 +16,25 @@ How we structure tests and which commands to run locally and in CI. From `gitnexus/`: -| Command | What it runs | When to use | -| ------------------------ | ---------------------------------------------------- | ------------------------------- | -| `npm test` | Full suite (all 3 vitest projects) | Before opening a PR | -| `npm run test:unit` | Unit tests only (`test/unit/`) | Tight development loop | -| `npm run test:integration` | Integration tests (`test/integration/`) | After changing pipelines, DB, workers | -| `npm run test:coverage` | Full suite + v8 coverage with thresholds | Checking coverage impact | -| `npm run test:parity` | Scope-resolution parity for all migrated languages | After changing resolver or scope code | -| `npm run test:cross-platform` | Platform-sensitive subset only | Debugging a Windows/macOS issue | -| `npm run test:watch` | Vitest in watch mode | Active development | +| Command | What it runs | When to use | +| ----------------------------- | -------------------------------------------------- | ------------------------------------- | +| `npm test` | Full suite (all 3 vitest projects) | Before opening a PR | +| `npm run test:unit` | Unit tests only (`test/unit/`) | Tight development loop | +| `npm run test:integration` | Integration tests (`test/integration/`) | After changing pipelines, DB, workers | +| `npm run test:coverage` | Full suite + v8 coverage with thresholds | Checking coverage impact | +| `npm run test:parity` | Scope-resolution parity for all migrated languages | After changing resolver or scope code | +| `npm run test:cross-platform` | Platform-sensitive subset only | Debugging a Windows/macOS issue | +| `npm run test:watch` | Vitest in watch mode | Active development | ### `gitnexus-web/` commands From `gitnexus-web/`: -| Command | What it runs | When to use | -| ---------------------- | --------------------------------- | ------------------------------ | -| `npm test` | Unit/component tests (vitest) | After changing web code | -| `npm run test:coverage`| Unit tests + coverage | Checking coverage impact | -| `npm run test:e2e` | Playwright browser tests | After changing UI flows (requires `gitnexus serve` + `npm run dev`) | +| Command | What it runs | When to use | +| ----------------------- | ----------------------------- | ------------------------------------------------------------------- | +| `npm test` | Unit/component tests (vitest) | After changing web code | +| `npm run test:coverage` | Unit tests + coverage | Checking coverage impact | +| `npm run test:e2e` | Playwright browser tests | After changing UI flows (requires `gitnexus serve` + `npm run dev`) | ### Before opening a PR @@ -59,11 +59,11 @@ Skip with `git commit --no-verify` (use sparingly). `gitnexus/vitest.config.ts` defines three projects for safety isolation: -| Project | Files | Parallelism | Purpose | -| ---------- | ----------------------------- | ----------- | ---------------------------------------------- | -| `lbug-db` | Native LadybugDB integration tests (explicit list) | Sequential | Prevents file-lock conflicts from native mmap addon | -| `cli-e2e` | `skills-e2e.test.ts` | Sequential | CLI process spawning requires serial execution | -| `default` | Everything else | Parallel | Fast execution for pure logic and parser tests | +| Project | Files | Parallelism | Purpose | +| --------- | -------------------------------------------------- | ----------- | --------------------------------------------------- | +| `lbug-db` | Native LadybugDB integration tests (explicit list) | Sequential | Prevents file-lock conflicts from native mmap addon | +| `cli-e2e` | `skills-e2e.test.ts` | Sequential | CLI process spawning requires serial execution | +| `default` | Everything else | Parallel | Fast execution for pure logic and parser tests | When adding a new test that uses native LadybugDB (`@ladybugdb/core`), add it to the `lbug-db` project's explicit include list and the `default` project's exclude list. @@ -74,21 +74,11 @@ When adding a new test that uses native LadybugDB (`@ladybugdb/core`), add it to - **Resolver / parity** — Language-specific call-resolution tests in `test/integration/resolvers/`. - **E2E (web)** — Critical user paths only; prefer `data-testid` attributes for stable selectors. Tests run against real backend (`gitnexus serve`) and Vite dev server. -## Scope-resolution parity +## Scope-resolution tests -Migrated languages (listed in `MIGRATED_LANGUAGES` in `src/core/ingestion/registry-primary-flag.ts`) are tested in both legacy and registry-primary modes on every PR. +Every language resolves calls and inheritance through the scope-resolution pipeline — the legacy call-resolution DAG and the per-language `REGISTRY_PRIMARY_` flag were removed in RING4-1 (#942). Each language's resolver test lives at `test/integration/resolvers/.test.ts` and runs once, on the single scope-resolution path, as part of the normal `tests` job (`vitest test/**/*.test.ts`). -For each migrated language, CI runs the resolver test file twice: -1. `REGISTRY_PRIMARY_=0` — legacy DAG path -2. `REGISTRY_PRIMARY_=1` — registry-primary path - -Both must pass. Known legacy gaps are listed in `LEGACY_RESOLVER_PARITY_EXPECTED_FAILURES` in `test/integration/resolvers/helpers.ts` and are automatically skipped in legacy mode. - -Adding a language to `MIGRATED_LANGUAGES` automatically enrolls it in parity — no workflow or config edit needed. The test file must exist at `test/integration/resolvers/.test.ts`. - -Run parity locally: `cd gitnexus && npm run test:parity` - -Run for a single language: `cd gitnexus && npx tsx scripts/run-parity.ts --language python` +Adding a language: register its `ScopeResolver` in `scope-resolution/pipeline/registry.ts` (`SCOPE_RESOLVERS`) and add the resolver test file — no workflow or config edit needed. ## Cross-platform testing @@ -120,12 +110,12 @@ To check the cross-platform list is up to date, run `npm run test:cross-platform GitHub Actions (`.github/workflows/ci.yml`) orchestrate: -| Workflow | Jobs | Purpose | -| --------------------- | ------------------------------ | ------------------------------------------------ | -| `ci-quality.yml` | format, lint, typecheck, typecheck-web, workflow-convention | Code quality gates | +| Workflow | Jobs | Purpose | +| --------------------- | ----------------------------------------------------------------- | --------------------------------------------------------------------- | +| `ci-quality.yml` | format, lint, typecheck, typecheck-web, workflow-convention | Code quality gates | | `ci-tests.yml` | ubuntu/coverage, cross-platform (Win/Mac), packaged-install-smoke | Full suite + coverage on Ubuntu; platform-sensitive subset on Win/Mac | -| `ci-scope-parity.yml` | discover, parity | Scope-resolution parity for all migrated languages | -| `ci-e2e.yml` | e2e (chromium) | Playwright E2E, gated on `gitnexus-web/**` changes | +| `ci-scope-parity.yml` | discover, parity | Scope-resolution parity for all migrated languages | +| `ci-e2e.yml` | e2e (chromium) | Playwright E2E, gated on `gitnexus-web/**` changes | The `CI Gate` job in `ci.yml` is the single required check for branch protection. It requires quality, tests, e2e, and scope-parity to all pass. diff --git a/gitnexus/bench/python-scope/baseline-fingerprint.txt b/gitnexus/bench/python-scope/baseline-fingerprint.txt index e8337a6d90..13a0b5ee3e 100644 --- a/gitnexus/bench/python-scope/baseline-fingerprint.txt +++ b/gitnexus/bench/python-scope/baseline-fingerprint.txt @@ -1 +1 @@ -06687dff942d531c4d453b5906a8666c90db4867eb43ed18304aa59a8a93ef9d +c03f87cd8cd1ee716dea93cceb27109ee5b4584bc9fe426eea3f18fd6f9854cc diff --git a/gitnexus/bench/scope-capture/baselines.json b/gitnexus/bench/scope-capture/baselines.json index cef734fa84..e838bfa4f8 100644 --- a/gitnexus/bench/scope-capture/baselines.json +++ b/gitnexus/bench/scope-capture/baselines.json @@ -20,18 +20,18 @@ "scaling_budget": 1.5, "_added": "#1956: cpp added to the scope-capture bench (was UNBENCHED). Heritage-bearing scale source (: public Base, public Mixin) drives emitCppInheritanceCaptures at scale. Adding it exposed + fixed a pre-existing O(n^2) findNodeAtRange root-walk in cpp/captures.ts (~12 sites, threaded c.node, byte-identical over 263 cpp-* fixtures); scaling 2.30 -> 1.12.", "_rebaselined": "#1965 / #1923 F4: uninitialized non-leading multi-declarators now emit @declaration.variable captures; cpp-adl-inner-callable-outer-noncallable data::Pair a, b adds the legitimate fixture drift. Linear (~1.06).", - "_note": "#1975: + cpp-out-of-line-class fixture, fixture_count 263->265. #1990: + cpp-adl-ns-plus-hidden-friend-same-name fixture (ADL hidden-friend + namespace-callable merge parity test). Pure fixture-corpus drift — no scope-extractor change; existing fixtures' captures byte-identical. fixture_count 265->267. #1995: + cpp-union-nested-tail-collision and cpp-anon-ns-tail-collision fixtures — pure fixture-corpus drift; fixture_count 270->272, fingerprint 538e8be->d63ded6. #1993: + cpp-cross-namespace-same-tail fixture — pure fixture-corpus drift; fixture_count 272->273, fingerprint d63ded6->6d6207ae." + "_note": "#1975: + cpp-out-of-line-class fixture, fixture_count 263->265. #1990: + cpp-adl-ns-plus-hidden-friend-same-name fixture (ADL hidden-friend + namespace-callable merge parity test). Pure fixture-corpus drift \u2014 no scope-extractor change; existing fixtures' captures byte-identical. fixture_count 265->267. #1995: + cpp-union-nested-tail-collision and cpp-anon-ns-tail-collision fixtures \u2014 pure fixture-corpus drift; fixture_count 270->272, fingerprint 538e8be->d63ded6. #1993: + cpp-cross-namespace-same-tail fixture \u2014 pure fixture-corpus drift; fixture_count 272->273, fingerprint d63ded6->6d6207ae." }, "csharp": { - "_rebaselined": "#1956 synth-widening: + csharp-qualified-base fixture; the synth now walks record_declaration + struct_declaration base_lists and handles alias_qualified_name (matching the #1940 legacy leg), so record/struct heritage now emits. csharp-record-base gains a record inherits capture. (record->record SAME-namespace EXTENDS is a separate registry resolution gap, tracked as follow-up.) Linear (~1.00). (Earlier #1956: heritage-bearing scale source.)", - "fingerprint": "68ef32c126d5c6de5d8184c6ad0a6104043036daf9805947db8b21741b883f43", + "_rebaselined": "#1956 synth-widening: + csharp-qualified-base fixture; the synth now walks record_declaration + struct_declaration base_lists and handles alias_qualified_name (matching the #1940 legacy leg), so record/struct heritage now emits. csharp-record-base gains a record inherits capture. (record->record SAME-namespace EXTENDS is a separate registry resolution gap, tracked as follow-up.) Linear (~1.00). (Earlier #1956: heritage-bearing scale source.) | #942: scope-resolution-only cleanup reworded fixture comments; capture byte-positions shift, capture LOGIC unchanged.", + "fingerprint": "7e8845040540ae69ef564ebf597305ade31e4480cee1dc910ea0fcfc26794910", "scaling_budget": 1.5 }, "rust": { - "fingerprint": "b00aea0f2dbff6a77d3aa709f7f90e8a70649f7e789a8de725d9b1958ebe12bc", + "fingerprint": "ac610bbe97666bf285923479dd7b43a2fe4c5354aae8df1bcbafdc04fb220f82", "scaling_budget": 1.5, - "_rebaselined": "#1956 tri-review U1: rust-qualified-trait fixture (scoped + generic-of-scoped impl trait paths); bareTypeIdentifier now resolves scoped_type_identifier bases by their name: tail (additive, no existing-fixture drift); linear (~1.04). #1975: + rust-scoped-impl fixture (impl a::Inner / b::Inner inherent scoped impls) — legacy @definition.impl scoped arm + findEnclosingClassInfo inherent-impl scoped target; rust scope-extractor captures byte-identical.", - "_note": "PR #1934: F66/F68 let-binding pattern narrowing; F71 union (Struct-labeled, now materialized via legacy @definition.struct + resolvable); F72 macro FULLY WIRED — @declaration.macro/@reference.macro + MacroRegistry → USES edges to Macro nodes (never a same-named fn). + rust-macro / rust-union fixtures and merged with origin/main #1975 rust-scoped-impl; fingerprint re-baselined (scaling ~0.99, fixture_count 126). #1992: + rust-nested-tail-collision-generic and rust-generic-impl-same-method-name (F3) fixtures — pure fixture-corpus drift, no scope-extractor change; fixture_count 127->129, fingerprint 56ffc1c0->b00aea0f." + "_rebaselined": "#1956 tri-review U1: rust-qualified-trait fixture (scoped + generic-of-scoped impl trait paths); bareTypeIdentifier now resolves scoped_type_identifier bases by their name: tail (additive, no existing-fixture drift); linear (~1.04). #1975: + rust-scoped-impl fixture (impl a::Inner / b::Inner inherent scoped impls) \u2014 legacy @definition.impl scoped arm + findEnclosingClassInfo inherent-impl scoped target; rust scope-extractor captures byte-identical. | #942: scope-resolution-only cleanup reworded fixture comments; capture byte-positions shift, capture LOGIC unchanged.", + "_note": "PR #1934: F66/F68 let-binding pattern narrowing; F71 union (Struct-labeled, now materialized via legacy @definition.struct + resolvable); F72 macro FULLY WIRED \u2014 @declaration.macro/@reference.macro + MacroRegistry \u2192 USES edges to Macro nodes (never a same-named fn). + rust-macro / rust-union fixtures and merged with origin/main #1975 rust-scoped-impl; fingerprint re-baselined (scaling ~0.99, fixture_count 126). #1992: + rust-nested-tail-collision-generic and rust-generic-impl-same-method-name (F3) fixtures \u2014 pure fixture-corpus drift, no scope-extractor change; fixture_count 127->129, fingerprint 56ffc1c0->b00aea0f." }, "php": { "fingerprint": "f9c8eaf6d1084f9b95a9fb97ccce5e618a24d936c85fb8af4b96c73a560f7a7f", @@ -39,10 +39,10 @@ "_rebaselined": "#1956: heritage-bearing scale source (class extends Base + use trait); both forms gated at scale; linear (~1.04)." }, "ruby": { - "fingerprint": "bf6b13a366e4116da3772f9a9fdd50517eb11da73918451392e014a2c905b2dd", + "fingerprint": "61d6e5f049e5e6c4871c210d28d15348f2396345751a98ccfff2f4b54f727aff", "scaling_budget": 1.5, - "_rebaselined": "#1956 synth-widening: + ruby-qualified-base fixture; synth now reduces a scope_resolution superclass (class C < Mod::Super) to its trailing constant (matching the #1940 legacy leg), at parity. Linear (~1.03). (Earlier #1956: heritage-bearing scale source.)", - "_note": "F62: + scope_resolution class/module declaration captures — fixture count 78→81, fingerprint drift expected. #1975: + ruby-tail-collision fixture (Foo::Bar vs Baz::Bar stay distinct nodes) — pure fixture-corpus drift, scope-extractor captures unchanged; 81→82." + "_rebaselined": "#1956 synth-widening: + ruby-qualified-base fixture; synth now reduces a scope_resolution superclass (class C < Mod::Super) to its trailing constant (matching the #1940 legacy leg), at parity. Linear (~1.03). (Earlier #1956: heritage-bearing scale source.) | #942: scope-resolution-only cleanup reworded fixture comments; capture byte-positions shift, capture LOGIC unchanged.", + "_note": "F62: + scope_resolution class/module declaration captures \u2014 fixture count 78\u219281, fingerprint drift expected. #1975: + ruby-tail-collision fixture (Foo::Bar vs Baz::Bar stay distinct nodes) \u2014 pure fixture-corpus drift, scope-extractor captures unchanged; 81\u219282." }, "swift": { "fingerprint": "53325c6345161c5a495f997297af5a24fb718fd3e6647040160f8ab2a2c8e4c0", @@ -56,9 +56,9 @@ "_rebaselined": "#1970 review + tri-review follow-ups: constructor-call retag, cascade calls, built-in suppression, enum scope, #1926 F24/F25, named-ctor dedup (crash fix), container-name binding suppression; heritage file-affinity resolution. Fixtures: member-call-contexts, constructor-body, named-constructor-body, heritage-name-collision, construct-cascade." }, "java": { - "fingerprint": "b63f9be458f7ece854e7b007159d7bf65b4b66a86e83a6c0656fc93ebd5d83da", + "fingerprint": "d5cf68e9faf92fffd928c1ee6e584c72cc65918d1f1b5078abb3bfe09ac699bf", "scaling_budget": 1.5, - "_rebaselined": "#1956 synth-widening: + java-iface-extends fixture; synthesizeJavaInheritanceReferences now ALSO walks interface_declaration extends_interfaces (interface IA extends IB, IC), matching the #1940 legacy leg. (Earlier U2+review: java-qualified-base fixture covers 2- AND 3-segment qualified bases guarding the legacy end-anchor; synth tail-resolves scoped bases.) Linear (~1.03). (Earliest: java added to bench, exposed+fixed the O(n^2) findNodeAtRange root-walk; 3.09 -> ~0.99.)" + "_rebaselined": "#1956 synth-widening: + java-iface-extends fixture; synthesizeJavaInheritanceReferences now ALSO walks interface_declaration extends_interfaces (interface IA extends IB, IC), matching the #1940 legacy leg. (Earlier U2+review: java-qualified-base fixture covers 2- AND 3-segment qualified bases guarding the legacy end-anchor; synth tail-resolves scoped bases.) Linear (~1.03). (Earliest: java added to bench, exposed+fixed the O(n^2) findNodeAtRange root-walk; 3.09 -> ~0.99.) | #942: scope-resolution-only cleanup reworded fixture comments; capture byte-positions shift, capture LOGIC unchanged." }, "typescript": { "fingerprint": "3f44a4a6892698df2d145c8ff2812c3b318807648983c88aca28fbd694f172f9", @@ -67,15 +67,15 @@ "_note": "#1968: F44, F85, F87 \u2014 fingerprint drift expected." }, "javascript": { - "fingerprint": "a8ddfb15620ae55e50651fc21ab14c4a1f874d9b19e208cc6cbf0a8daac8ec5b", + "fingerprint": "d72f03c6c502235d2d4b74d66baa5c7d361f040d7a1b72e84acad61210d05ae8", "scaling_budget": 1.5, "_added": "#1951: bench coverage added (was ungated); scale source heritage-bearing (extends Base); js/kotlin O(n^2) findNodeAtRange-per-match fixed to threaded captured node, now linear.", - "_rebaselined": "#1956 synth-widening: + javascript-qualified-base fixture; synthesizeJsInheritanceReferences now handles a member_expression base (class S extends ns.Base -> Base), matching the #1940 legacy leg + the TS terminalTsTypeNameNode property_identifier case, at parity. Linear (~1.05)." + "_rebaselined": "#1956 synth-widening: + javascript-qualified-base fixture; synthesizeJsInheritanceReferences now handles a member_expression base (class S extends ns.Base -> Base), matching the #1940 legacy leg + the TS terminalTsTypeNameNode property_identifier case, at parity. Linear (~1.05). | #942: scope-resolution-only cleanup reworded fixture comments; capture byte-positions shift, capture LOGIC unchanged." }, "kotlin": { - "fingerprint": "5121a11855cd9cc44a357ae3ff50953de80cdd743f00e8924c31503b132bcd84", + "fingerprint": "9b212eca24959cc1705933df213f4ec739c5c1b4580c7929347d37bf4d72ba8a", "scaling_budget": 1.5, "_added": "#1951: bench coverage added (was ungated); scale source heritage-bearing (: Base()); js/kotlin O(n^2) findNodeAtRange-per-match fixed to threaded captured node, now linear.", - "_rebaselined": "#1956 synth-widening: + kotlin-qualified-base fixture; synthesizeKotlinInheritanceReferences now handles the explicit_delegation form (class F : Iface by d -> Iface), matching the #1940 legacy leg, at parity. Linear (~0.87)." + "_rebaselined": "#1956 synth-widening: + kotlin-qualified-base fixture; synthesizeKotlinInheritanceReferences now handles the explicit_delegation form (class F : Iface by d -> Iface), matching the #1940 legacy leg, at parity. Linear (~0.87). | #942: scope-resolution-only cleanup reworded fixture comments; capture byte-positions shift, capture LOGIC unchanged." } } diff --git a/gitnexus/package.json b/gitnexus/package.json index 1e8a458124..53eb7dedd9 100644 --- a/gitnexus/package.json +++ b/gitnexus/package.json @@ -48,7 +48,6 @@ "test:integration": "vitest run test/integration", "test:watch": "vitest", "test:coverage": "vitest run --coverage", - "test:parity": "tsx scripts/run-parity.ts", "test:cross-platform": "tsx scripts/run-cross-platform.ts", "postinstall": "node scripts/materialize-vendor-grammars.cjs && node scripts/build-tree-sitter-dart.cjs && node scripts/build-tree-sitter-proto.cjs && node scripts/build-tree-sitter-swift.cjs", "prepare": "node scripts/build.js", diff --git a/gitnexus/scripts/bench-scope-resolution.ts b/gitnexus/scripts/bench-scope-resolution.ts index 399d44fcfe..32020a2c09 100644 --- a/gitnexus/scripts/bench-scope-resolution.ts +++ b/gitnexus/scripts/bench-scope-resolution.ts @@ -4,10 +4,8 @@ * isolating the resolution cost from parse / heritage / pipeline * overhead. * - * Usage: REGISTRY_PRIMARY_PYTHON=1 npx tsx scripts/bench-scope-resolution.ts + * Usage: npx tsx scripts/bench-scope-resolution.ts */ -process.env.REGISTRY_PRIMARY_PYTHON = '1'; - import { generateId } from '../src/lib/utils.js'; import { createKnowledgeGraph } from '../src/core/graph/graph.js'; import { runScopeResolution } from '../src/core/ingestion/scope-resolution/index.js'; diff --git a/gitnexus/scripts/ci-list-migrated-languages.ts b/gitnexus/scripts/ci-list-migrated-languages.ts deleted file mode 100644 index 732ce861c7..0000000000 --- a/gitnexus/scripts/ci-list-migrated-languages.ts +++ /dev/null @@ -1,24 +0,0 @@ -/** - * CI helper — emits the `MIGRATED_LANGUAGES` set as a JSON matrix array for - * GitHub Actions (`.github/workflows/ci-scope-parity.yml`). - * - * Consumed by the `discover` job in that workflow. Each entry has: - * - `slug`: lowercase language id, matching `test/integration/resolvers/.test.ts`. - * - `envvar`: uppercase suffix used to build the `REGISTRY_PRIMARY_` toggle. - * - * Run with `npx tsx scripts/ci-list-migrated-languages.ts`. The script - * writes a single JSON array to stdout (no wrapper object) so the - * workflow can pipe it straight into `$GITHUB_OUTPUT`. - */ - -import { MIGRATED_LANGUAGES } from '../src/core/ingestion/registry-primary-flag.js'; - -const entries = [...MIGRATED_LANGUAGES].map((slug) => { - const s = String(slug); - return { - slug: s, - envvar: s.toUpperCase().replace(/-/g, '_'), - }; -}); - -process.stdout.write(JSON.stringify(entries)); diff --git a/gitnexus/scripts/run-parity.ts b/gitnexus/scripts/run-parity.ts deleted file mode 100644 index 6fbf35dc60..0000000000 --- a/gitnexus/scripts/run-parity.ts +++ /dev/null @@ -1,139 +0,0 @@ -/** - * Consolidated scope-resolution parity runner. - * - * Replaces the per-language matrix in ci-scope-parity.yml with a single - * job that runs all migrated languages sequentially in one process. This - * eliminates 8× redundant checkout + npm ci + build cycles (the old - * workflow created a separate GitHub Actions job per language). - * - * For each language in MIGRATED_LANGUAGES: - * 1. Run its resolver test with REGISTRY_PRIMARY_=0 (legacy DAG) - * 2. Run its resolver test with REGISTRY_PRIMARY_=1 (registry-primary) - * - * Both modes must pass. Failures are collected and reported at the end - * so all regressions are visible in a single CI run (equivalent to the - * old workflow's fail-fast: false behavior). - * - * Vitest output streams to the console in real time (stdio: 'inherit') - * so CI logs show the actual test output directly. No per-invocation - * timeout — the CI job-level timeout (30 min) is the outer guard. - * - * Usage: - * npx tsx scripts/run-parity.ts - * npx tsx scripts/run-parity.ts --language python # single language - */ - -import { execFileSync } from 'child_process'; -import fs from 'fs'; -import path from 'path'; -import { fileURLToPath } from 'url'; -import { MIGRATED_LANGUAGES } from '../src/core/ingestion/registry-primary-flag.js'; - -const __dirname = path.dirname(fileURLToPath(import.meta.url)); -const ROOT = path.resolve(__dirname, '..'); - -interface ParityFailure { - lang: string; - mode: 'legacy' | 'registry-primary'; -} - -function envVarName(slug: string): string { - return `REGISTRY_PRIMARY_${slug.toUpperCase().replace(/-/g, '_')}`; -} - -function testFilePaths(slug: string): string[] { - const resolverDir = path.resolve(ROOT, 'test/integration/resolvers'); - const files = fs.readdirSync(resolverDir); - const direct = `${slug}.test.ts`; - const prefixed = `${slug}-`; - return files - .filter((name) => name === direct || (name.startsWith(prefixed) && name.endsWith('.test.ts'))) - .sort() - .map((name) => `test/integration/resolvers/${name}`); -} - -function runVitest(testFile: string, env: Record): boolean { - try { - execFileSync('npx', ['vitest', 'run', testFile], { - cwd: ROOT, - env: { ...process.env, ...env }, - stdio: 'inherit', - shell: true, - }); - return true; - } catch { - return false; - } -} - -// Parse CLI args -const args = process.argv.slice(2); -const langFlag = args.indexOf('--language'); -const singleLang = langFlag >= 0 ? args[langFlag + 1] : undefined; - -if (langFlag >= 0 && singleLang === undefined) { - console.error('--language requires a value'); - process.exit(1); -} - -const languages = singleLang ? [singleLang] : [...MIGRATED_LANGUAGES].map(String); - -// Verify test files exist before running -const missingFiles: string[] = []; -const filesByLanguage = new Map(); -for (const lang of languages) { - const files = testFilePaths(lang); - filesByLanguage.set(lang, files); - if (files.length === 0) { - missingFiles.push(`test/integration/resolvers/${lang}*.test.ts (${lang})`); - } -} - -if (missingFiles.length > 0) { - console.error('Missing resolver test files:'); - for (const f of missingFiles) console.error(` ${f}`); - process.exit(1); -} - -console.log(`Scope-resolution parity: ${languages.length} language(s)`); -console.log(`Languages: ${languages.join(', ')}\n`); - -const failures: ParityFailure[] = []; - -for (const lang of languages) { - const files = filesByLanguage.get(lang) ?? []; - const envVar = envVarName(lang); - - console.log(`\n── ${lang} — legacy DAG (${envVar}=0) ──`); - for (const file of files) { - if (!runVitest(file, { [envVar]: '0' })) { - failures.push({ lang, mode: 'legacy' }); - } - } - - console.log(`\n── ${lang} — registry-primary (${envVar}=1) ──`); - for (const file of files) { - if (!runVitest(file, { [envVar]: '1' })) { - failures.push({ lang, mode: 'registry-primary' }); - } - } -} - -// Summary -const total = [...filesByLanguage.values()].reduce((sum, files) => sum + files.length * 2, 0); -const passed = total - failures.length; - -console.log('\n═══════════════════════════════════════'); -console.log('PARITY SUMMARY'); -console.log('═══════════════════════════════════════'); -console.log(`Passed: ${passed}/${total}`); - -if (failures.length > 0) { - console.log(`\nFAILURES (${failures.length}):`); - for (const f of failures) { - console.log(` ✗ ${f.lang} [${f.mode}]`); - } - process.exit(1); -} - -console.log('\nAll parity checks passed.'); diff --git a/gitnexus/src/core/ingestion/call-processor.ts b/gitnexus/src/core/ingestion/call-processor.ts index a3ca9c8986..d7e186e11b 100644 --- a/gitnexus/src/core/ingestion/call-processor.ts +++ b/gitnexus/src/core/ingestion/call-processor.ts @@ -1,3287 +1,84 @@ -import { KnowledgeGraph } from '../graph/types.js'; -import { ASTCache } from './ast-cache.js'; -import type { SymbolDefinition } from 'gitnexus-shared'; -import type { SymbolTableReader, HeritageMap, ExtractedHeritage } from './model/index.js'; -import { CLASS_TYPES, CALL_TARGET_TYPES, lookupMethodByOwnerWithMRO } from './model/index.js'; -import type { DispatchDecision, ReceiverEnriched } from './call-types.js'; - -/** Shorthand for the receiver-source discriminant shared across the DAG. */ -type ReceiverSource = ReceiverEnriched['receiverSource']; - -/** - * DAG stage 4 fallback: used when `selectDispatch` is absent or returns null. - * Preserves pre-DAG dispatch semantics: - * - 'constructor' → constructor branch - * - 'free' → free branch (admits class-target fast path) - * - 'member' or undefined → owner-scoped branch - * - * `undefined` callForm MUST route through owner-scoped (not free) so bare - * identifiers without a classified shape do NOT trigger `resolveFreeCall`'s - * class-target fast path. Without a `receiverTypeName`, the owner-scoped - * branch falls through to `resolveModuleAliasedCall` + `singleCandidate`, - * matching legacy behavior where non-callable symbols (Class, Interface) - * null-route instead of producing spurious Constructor edges. - */ -const defaultDispatchDecision = ( - callForm: 'free' | 'member' | 'constructor' | undefined, -): DispatchDecision => { - if (callForm === 'constructor') return { primary: 'constructor' }; - if (callForm === 'free') return { primary: 'free' }; - return { primary: 'owner-scoped' }; -}; -import Parser from 'tree-sitter'; -import type { ResolutionContext } from './model/resolution-context.js'; -import { TIER_CONFIDENCE, type ResolutionTier } from './model/resolution-context.js'; -import type { TieredCandidates } from './model/resolution-context.js'; -import { isLanguageAvailable, loadParser, loadLanguage } from '../tree-sitter/parser-loader.js'; -import { getProvider } from './languages/index.js'; -import { generateId } from '../../lib/utils.js'; -import { getLanguageFromFilename, SupportedLanguages } from 'gitnexus-shared'; -import { isRegistryPrimary } from './registry-primary-flag.js'; -import { isVerboseIngestionEnabled } from './utils/verbose.js'; -import { - ALWAYS_ON_SLOW_FILE_WARN_THROTTLE_MS, - alwaysOnSlowFileWarnMs, - deferredCallFileSlowMs, - deferredCallLogEveryN, - getDeferredProfileDroppedCount, - isDeferredResolutionProfileEnabled, - logDeferredProfile, - profileElapsedMs, - resetDeferredProfileDroppedCount, - startTimer, -} from './utils/deferred-resolution-profile.js'; -import { yieldToEventLoop } from './utils/event-loop.js'; -import { parseSourceSafe } from '../tree-sitter/safe-parse.js'; -import { - CLASS_CONTAINER_TYPES, - FUNCTION_NODE_TYPES, - findEnclosingClassInfo, - genericFuncName, - inferFunctionLabel, -} from './utils/ast-helpers.js'; -import type { FieldInfo, FieldExtractorContext } from './field-types.js'; -import type { LanguageProvider } from './language-provider.js'; -import { typeTagForId, constTagForId, buildCollisionGroups } from './utils/method-props.js'; -import type { MethodInfo } from './method-types.js'; -import { - countCallArguments, - inferCallForm, - extractReceiverName, - extractReceiverNode, - extractMixedChain, - extractCallArgTypes, - type MixedChainStep, -} from './utils/call-analysis.js'; -import { buildTypeEnv, isSubclassOf } from './type-env.js'; -import type { ConstructorBinding, TypeEnvironment } from './type-env.js'; -import type { BindingAccumulator } from './binding-accumulator.js'; -import { getTreeSitterBufferSize } from './constants.js'; -import type { - ExtractedCall, - ExtractedAssignment, - ExtractedRoute, - ExtractedFetchCall, - FileConstructorBindings, -} from './workers/parse-worker.js'; -import { normalizeFetchURL, routeMatches } from './route-extractors/nextjs.js'; -import { extractTemplateComponents } from './vue-sfc-extractor.js'; -import { extractReturnTypeName, stripNullable } from './type-extractors/shared.js'; -import type { LiteralTypeInferrer } from './type-extractors/types.js'; -import type { SyntaxNode } from './utils/ast-helpers.js'; - -import { logger } from '../logger.js'; - -// ── Property-prepass helpers (parity with parse-worker.ts) ── -// These mirror the sequential-path equivalents in parse-worker.ts so the main- -// thread `processCalls` pre-pass produces byte-identical Property nodes/symbols -// to the worker pool. Drift between the two paths breaks the -// `incremental ≡ --force` invariant the moment a repo crosses the worker -// threshold between runs. - -/** Walk up to the nearest enclosing class/struct/interface AST node. */ -const findEnclosingClassNode = (node: SyntaxNode): SyntaxNode | null => { - let current = node.parent; - while (current) { - if (CLASS_CONTAINER_TYPES.has(current.type)) return current; - current = current.parent; - } - return null; -}; - -/** No-op SymbolTable stub for FieldExtractorContext — matches parse-worker. */ -const NOOP_SYMBOL_TABLE: SymbolTableReader = { - lookupExact: () => undefined, - lookupExactFull: () => undefined, - lookupExactAll: () => [], - lookupCallableByName: () => [], - getFiles: () => [][Symbol.iterator](), - getStats: () => ({ fileCount: 0 }), -}; - -/** - * Extract (and cache) field info for a class node. Cache is passed in so it - * stays scoped to a single `processCalls` invocation rather than leaking - * across analyze runs (worker uses module-level caching because each worker - * process is short-lived; the main thread is not). - * - * Cache key is `${filePath}:${classNode.startIndex}` — startIndex alone is a - * per-file byte offset, so almost every Ruby/Python file's leading class lands - * at byte 0 and would collide across files in the shared map. - */ -const getFieldInfo = ( - classNode: SyntaxNode, - provider: LanguageProvider, - context: FieldExtractorContext, - cache: Map>, -): Map | undefined => { - if (!provider.fieldExtractor) return undefined; - const cacheKey = `${context.filePath}:${classNode.startIndex}`; - const cached = cache.get(cacheKey); - if (cached) return cached; - const result = provider.fieldExtractor.extract(classNode, context); - if (!result?.fields?.length) return undefined; - const map = new Map(); - for (const field of result.fields) map.set(field.name, field); - cache.set(cacheKey, map); - return map; -}; - -/** Per-file resolved type bindings for exported symbols. - * Populated during call processing, consumed by Phase 14 re-resolution pass. */ -export type ExportedTypeMap = Map>; - -/** - * Type labels treated as class-like **method-dispatch receivers** by the call - * resolver — the set walked by the MRO / heritage path for member and static - * method calls. - * - * Derived from `CLASS_TYPES` (the heritage-index set in symbol-table) plus - * `Impl` — Rust `impl` blocks are the definition site of methods for a struct - * and must be walkable as receiver-type candidates even though they are not - * indexed by `lookupClassByName` (which keys off struct/trait names). Keeping - * this set a strict superset of `CLASS_TYPES` guarantees that anything - * reachable via `lookupClassByName` also passes this filter, so the two call - * paths cannot diverge silently. - * - * `Interface` is included even though interfaces cannot be directly - * instantiated in Java/C#/TypeScript: the resolver still needs to reach - * interface nodes for static-method dispatch (`Interface.staticMethod()`) and - * default-method resolution via the MRO walker. - * - * **Do not reuse this set for constructor-fallback filtering.** Constructors - * can only instantiate a narrower subset — see `INSTANTIABLE_CLASS_TYPES` - * below. `resolveStaticCall`'s step-5 class-node fallback uses the narrower - * set to prevent false `CALLS` edges from constructor-shaped calls to - * `Interface`, `Trait`, or `Impl` nodes. - */ -const CLASS_LIKE_TYPES = new Set([...CLASS_TYPES, 'Impl']); - -/** - * Type labels that can be the target of a constructor-shaped call when no - * explicit `Constructor` symbol is indexed — the "return the type itself as - * the call target" fallback set. - * - * Strict subset of both `CLASS_LIKE_TYPES` and `CONSTRUCTOR_TARGET_TYPES`. - * Excludes: - * - `Interface` / `Trait` — not instantiable by definition in any - * supported language. - * - `Impl` — Rust `impl` blocks are method-definition containers, not - * the type itself; the owning `Struct` is the correct target. - * - `Enum` — excluded pending language-specific support with motivating - * test fixtures (matches `CONSTRUCTOR_TARGET_TYPES`). - * - * Used exclusively by `resolveStaticCall`'s step-5 class-node fallback. - * Keep in sync with `CONSTRUCTOR_TARGET_TYPES` (which additionally contains - * `'Constructor'` for explicit-constructor-node filtering) when extending. - */ -const INSTANTIABLE_CLASS_TYPES = new Set(['Class', 'Struct', 'Record']); - -const MAX_EXPORTS_PER_FILE = 500; -const MAX_TYPE_NAME_LENGTH = 256; - -/** Build a map of imported callee names → return types for cross-file call-result binding. - * Consulted ONLY when SymbolTable has no unambiguous local match (local-first principle). - * - * Overlapping mechanism (1 of 3): this is the SymbolTable-backed path. - * See also: - * 2. collectExportedBindings (~line 168) / enrichExportedTypeMap — TypeEnv + graph isExported - * 3. Phase 9 fallback in verifyConstructorBindings (~line 563) — namedImportMap + BindingAccumulator - * A future cleanup should merge these into a single resolution pass. */ -export function buildImportedReturnTypes( - filePath: string, - namedImportMap: ReadonlyMap< - string, - ReadonlyMap - >, - symbolTable: { - lookupExactFull(filePath: string, name: string): { returnType?: string } | undefined; - }, -): ReadonlyMap { - const result = new Map(); - const fileImports = namedImportMap.get(filePath); - if (!fileImports) return result; - - for (const [localName, binding] of fileImports) { - const def = symbolTable.lookupExactFull(binding.sourcePath, binding.exportedName); - if (!def?.returnType) continue; - const simpleReturn = extractReturnTypeName(def.returnType); - if (simpleReturn) result.set(localName, simpleReturn); - } - return result; -} - -/** Build cross-file RAW return types for imported callables. - * Unlike buildImportedReturnTypes (which stores extractReturnTypeName output), - * this stores the raw declared return type string (e.g., 'User[]', 'List'). - * Used by lookupRawReturnType for for-loop element extraction via extractElementTypeFromString. */ -export function buildImportedRawReturnTypes( - filePath: string, - namedImportMap: ReadonlyMap< - string, - ReadonlyMap - >, - symbolTable: { - lookupExactFull(filePath: string, name: string): { returnType?: string } | undefined; - }, -): ReadonlyMap { - const result = new Map(); - const fileImports = namedImportMap.get(filePath); - if (!fileImports) return result; - - for (const [localName, binding] of fileImports) { - const def = symbolTable.lookupExactFull(binding.sourcePath, binding.exportedName); - if (!def?.returnType) continue; - result.set(localName, def.returnType); - } - return result; -} - -/** Collect resolved type bindings for exported file-scope symbols. - * Uses graph node isExported flag — does NOT require isExported on SymbolDefinition. - * - * **Counterpart**: the worker path populates `exportedTypeMap` via the - * accumulator enrichment loop in `pipeline.ts` (search for "Worker path - * quality enrichment"). Both sites populate the same map with subtly - * different export-check semantics — this site uses SymbolTable + - * graph lookup, the worker loop uses three-candidate-ID graph lookup. - * They must stay in sync until unified. If you edit one, check the other. - * - * Overlapping mechanism (2 of 3): this is the TypeEnv + graph isExported path. - * See also: - * 1. buildImportedReturnTypes (~line 109) — namedImportMap + SymbolTable - * 3. Phase 9 fallback in verifyConstructorBindings (~line 563) — namedImportMap + BindingAccumulator - * A future cleanup should merge these into a single resolution pass. */ -function collectExportedBindings( - typeEnv: { fileScope(): ReadonlyMap }, - filePath: string, - symbolTable: { lookupExact(filePath: string, name: string): string | undefined }, - graph: { getNode(id: string): { properties?: { isExported?: boolean } } | undefined }, -): Map | null { - const fileScope = typeEnv.fileScope(); - if (!fileScope || fileScope.size === 0) return null; - - const exported = new Map(); - for (const [varName, typeName] of fileScope) { - if (exported.size >= MAX_EXPORTS_PER_FILE) break; - if (!typeName || typeName.length > MAX_TYPE_NAME_LENGTH) continue; - const nodeId = symbolTable.lookupExact(filePath, varName); - if (!nodeId) continue; - const node = graph.getNode(nodeId); - if (node?.properties?.isExported) { - exported.set(varName, typeName); - } - } - return exported.size > 0 ? exported : null; -} - -/** Build ExportedTypeMap from graph nodes — used for worker path where TypeEnv - * is not available in the main thread. Collects returnType/declaredType from - * exported symbols that have callables with known return types. */ -export function buildExportedTypeMapFromGraph( - graph: KnowledgeGraph, - symbolTable: SymbolTableReader, -): ExportedTypeMap { - const result: ExportedTypeMap = new Map(); - graph.forEachNode((node) => { - if (!node.properties?.isExported) return; - if (!node.properties?.filePath || !node.properties?.name) return; - const filePath = node.properties.filePath as string; - const name = node.properties.name as string; - if (!name || name.length > MAX_TYPE_NAME_LENGTH) return; - // For callable symbols, use returnType; for properties/variables, use declaredType. - // Use lookupExactAll + nodeId match to handle same-name methods in different classes. - const defs = symbolTable.lookupExactAll(filePath, name); - const def = defs.find((d) => d.nodeId === node.id) ?? defs[0]; - if (!def) return; - const typeName = def.returnType ?? def.declaredType; - if (!typeName || typeName.length > MAX_TYPE_NAME_LENGTH) return; - // Extract simple type name (strip Promise<>, etc.) — reuse shared utility - const simpleType = extractReturnTypeName(typeName) ?? typeName; - if (!simpleType) return; - let fileExports = result.get(filePath); - if (!fileExports) { - fileExports = new Map(); - result.set(filePath, fileExports); - } - if (fileExports.size < MAX_EXPORTS_PER_FILE) { - fileExports.set(name, simpleType); - } - }); - return result; -} - -/** Seed cross-file receiver types into pre-extracted call records. - * Fills missing receiverTypeName for single-hop imported variables - * using ExportedTypeMap + namedImportMap — zero disk I/O, zero AST re-parsing. - * Mutates calls in-place. Runs BEFORE processCallsFromExtracted. */ -export function seedCrossFileReceiverTypes( - calls: ExtractedCall[], - namedImportMap: ReadonlyMap< - string, - ReadonlyMap - >, - exportedTypeMap: ReadonlyMap>, -): { enrichedCount: number } { - if (namedImportMap.size === 0 || exportedTypeMap.size === 0) { - return { enrichedCount: 0 }; - } - let enrichedCount = 0; - for (const call of calls) { - if (call.receiverTypeName || !call.receiverName) continue; - if (call.callForm !== 'member') continue; - - const fileImports = namedImportMap.get(call.filePath); - if (!fileImports) continue; - - const binding = fileImports.get(call.receiverName); - if (!binding) continue; - - const upstream = exportedTypeMap.get(binding.sourcePath); - if (!upstream) continue; - - const type = upstream.get(binding.exportedName); - if (type) { - call.receiverTypeName = type; - enrichedCount++; - } - } - return { enrichedCount }; -} - -// Stdlib methods that preserve the receiver's type identity. When TypeEnv already -// strips nullable wrappers (Option → User), these chain steps are no-ops -// for type resolution — the current type passes through unchanged. -const TYPE_PRESERVING_METHODS = new Set([ - 'unwrap', - 'expect', - 'unwrap_or', - 'unwrap_or_default', - 'unwrap_or_else', // Rust Option/Result - 'clone', - 'to_owned', - 'as_ref', - 'as_mut', - 'borrow', - 'borrow_mut', // Rust clone/borrow - 'get', // Kotlin/Java Optional.get() - 'orElseThrow', // Java Optional -]); - -/** Cache for method extraction results in findEnclosingFunction fallback path. - * Keyed by classNode.id to avoid re-extracting the same class body per call site. - * Cleared between files at line ~611 in the processCalls file loop. */ -const enclosingFnExtractCache = new Map< - number, - import('./method-types.js').ExtractedMethods | null ->(); - -/** - * Walk up the AST from a node to find the enclosing function/method. - * Returns null if the call is at module/file level (top-level code). - */ -const findEnclosingFunction = ( - node: SyntaxNode, - filePath: string, - ctx: ResolutionContext, - provider: import('./language-provider.js').LanguageProvider, -): string | null => { - let current = node.parent; - - while (current) { - if (FUNCTION_NODE_TYPES.has(current.type)) { - const efnResult = provider.methodExtractor?.extractFunctionName?.(current, filePath); - const funcName = efnResult?.funcName ?? genericFuncName(current); - const label = efnResult?.label ?? inferFunctionLabel(current.type); - - if (funcName) { - const resolved = ctx.resolve(funcName, filePath); - if (resolved?.tier === 'same-file' && resolved.candidates.length > 0) { - // Disambiguate by enclosing class when multiple candidates - if (resolved.candidates.length === 1) { - return resolved.candidates[0].nodeId; - } - const classInfo = findEnclosingClassInfo(current, filePath); - if (classInfo) { - const classMatches = resolved.candidates.filter((c) => c.ownerId === classInfo.classId); - // Unique class match — return it (no same-arity ambiguity) - if (classMatches.length === 1) return classMatches[0].nodeId; - // Multiple same-class candidates (same-arity overloads) — fall through - // to the fallback path which computes the exact ID with type-hash. - if (classMatches.length > 1) { - /* fall through to manual ID construction below */ - } else { - // No class match — return first candidate as before - return resolved.candidates[0].nodeId; - } - } else { - return resolved.candidates[0].nodeId; - } - } - - // Fallback: qualify the generated ID to match definition-phase node IDs - let finalLabel = label; - if (provider.labelOverride) { - const override = provider.labelOverride(current, label); - if (override !== null) finalLabel = override; - } - const classInfo2 = findEnclosingClassInfo(current, filePath); - const qualifiedName = classInfo2 ? `${classInfo2.className}.${funcName}` : funcName; - // Include # and ~typeTag suffix to match definition-phase Method/Constructor IDs. - const language = getLanguageFromFilename(filePath); - let arity: number | undefined; - let encTypeTag = ''; - if ( - (finalLabel === 'Method' || finalLabel === 'Constructor') && - provider.methodExtractor && - language - ) { - // Get class method map (cached per classNode.id) and look up current method - // by funcName:line. This avoids per-call-site extractFromNode AST walks. - let classNode = current.parent; - while (classNode && !provider.methodExtractor.isTypeDeclaration(classNode)) { - classNode = classNode.parent; - } - let info: MethodInfo | undefined; - if (classNode) { - let extracted = enclosingFnExtractCache.get(classNode.id); - if (extracted === undefined) { - extracted = - provider.methodExtractor.extract(classNode, { filePath, language }) ?? null; - enclosingFnExtractCache.set(classNode.id, extracted); - } - if (extracted?.methods?.length) { - const defLine = current.startPosition.row + 1; - info = extracted.methods.find((m) => m.name === funcName && m.line === defLine); - if (info) { - arity = info.parameters.some((p) => p.isVariadic) - ? undefined - : info.parameters.length; - } - if (arity !== undefined && info) { - const methodMap = new Map(); - for (const m of extracted.methods) methodMap.set(`${m.name}:${m.line}`, m); - const groups = buildCollisionGroups(methodMap); - encTypeTag = - typeTagForId(methodMap, funcName, arity, info, language, groups) + - constTagForId(methodMap, funcName, arity, info, groups); - } - } - } - // Fallback: extractFromNode for top-level methods without a class - if (!info && provider.methodExtractor.extractFromNode) { - const nodeInfo = provider.methodExtractor.extractFromNode(current, { - filePath, - language, - }); - if (nodeInfo) { - arity = nodeInfo.parameters.some((p) => p.isVariadic) - ? undefined - : nodeInfo.parameters.length; - } - } - } - const arityTag = arity !== undefined ? `#${arity}${encTypeTag}` : ''; - return generateId(finalLabel, `${filePath}:${qualifiedName}${arityTag}`); - } - } - - // Language-specific enclosing function resolution (e.g., Dart where - // function_body is a sibling of function_signature, not a child). - if (provider.enclosingFunctionFinder) { - const customResult = provider.enclosingFunctionFinder(current); - if (customResult) { - const resolved = ctx.resolve(customResult.funcName, filePath); - if (resolved?.tier === 'same-file' && resolved.candidates.length > 0) { - if (resolved.candidates.length === 1) { - return resolved.candidates[0].nodeId; - } - const classInfo = findEnclosingClassInfo(current.previousSibling ?? current, filePath); - if (classInfo) { - const classMatches = resolved.candidates.filter((c) => c.ownerId === classInfo.classId); - if (classMatches.length === 1) return classMatches[0].nodeId; - if (classMatches.length > 1) { - /* fall through to manual ID construction below */ - } else { - return resolved.candidates[0].nodeId; - } - } else { - return resolved.candidates[0].nodeId; - } - } - let finalLabel = customResult.label; - if (provider.labelOverride) { - const override = provider.labelOverride(current.previousSibling!, finalLabel); - if (override !== null) finalLabel = override; - } - const classInfo2 = findEnclosingClassInfo(current.previousSibling ?? current, filePath); - const qualifiedName = classInfo2 - ? `${classInfo2.className}.${customResult.funcName}` - : customResult.funcName; - // Include # and ~typeTag suffix to match definition-phase Method/Constructor IDs. - const sigNode = current.previousSibling ?? current; - const language2 = getLanguageFromFilename(filePath); - let arity2: number | undefined; - let encTypeTag2 = ''; - if ( - (finalLabel === 'Method' || finalLabel === 'Constructor') && - provider.methodExtractor && - language2 - ) { - let classNode2 = (current.previousSibling ?? current).parent; - while (classNode2 && !provider.methodExtractor.isTypeDeclaration(classNode2)) { - classNode2 = classNode2.parent; - } - let info2: MethodInfo | undefined; - if (classNode2) { - let extracted2 = enclosingFnExtractCache.get(classNode2.id); - if (extracted2 === undefined) { - extracted2 = - provider.methodExtractor.extract(classNode2, { filePath, language: language2 }) ?? - null; - enclosingFnExtractCache.set(classNode2.id, extracted2); - } - if (extracted2?.methods?.length) { - const defLine2 = sigNode.startPosition.row + 1; - info2 = extracted2.methods.find( - (m) => m.name === customResult.funcName && m.line === defLine2, - ); - if (info2) { - arity2 = info2.parameters.some((p) => p.isVariadic) - ? undefined - : info2.parameters.length; - } - if (arity2 !== undefined && info2) { - const methodMap = new Map(); - for (const m of extracted2.methods) methodMap.set(`${m.name}:${m.line}`, m); - const groups2 = buildCollisionGroups(methodMap); - encTypeTag2 = - typeTagForId( - methodMap, - customResult.funcName, - arity2, - info2, - language2, - groups2, - ) + constTagForId(methodMap, customResult.funcName, arity2, info2, groups2); - } - } - } - if (!info2 && provider.methodExtractor.extractFromNode) { - const nodeInfo = provider.methodExtractor.extractFromNode(sigNode, { - filePath, - language: language2, - }); - if (nodeInfo) { - arity2 = nodeInfo.parameters.some((p) => p.isVariadic) - ? undefined - : nodeInfo.parameters.length; - } - } - } - const arityTag2 = arity2 !== undefined ? `#${arity2}${encTypeTag2}` : ''; - return generateId(finalLabel, `${filePath}:${qualifiedName}${arityTag2}`); - } - } - - current = current.parent; - } - - return null; -}; - -/** - * Verify constructor bindings against SymbolTable and infer receiver types. - * Shared between sequential (processCalls) and worker (processCallsFromExtracted) paths. - */ -const verifyConstructorBindings = ( - bindings: readonly ConstructorBinding[], - filePath: string, - ctx: ResolutionContext, - graph?: KnowledgeGraph, - bindingAccumulator?: BindingAccumulator, -): Map => { - const verified = new Map(); - - for (const { scope, varName, calleeName, receiverClassName } of bindings) { - const tiered = ctx.resolve(calleeName, filePath); - const isClass = tiered?.candidates.some((def) => def.type === 'Class') ?? false; - - if (isClass) { - verified.set(receiverKey(scope, varName), calleeName); - } else { - let callableDefs = tiered?.candidates.filter( - (d) => d.type === 'Function' || d.type === 'Method', - ); - - // When receiver class is known (e.g. $this->method() in PHP), narrow - // candidates to methods owned by that class to avoid false disambiguation failures. - if (callableDefs && callableDefs.length > 1 && receiverClassName) { - if (graph) { - // Worker path: use graph.getNode (fast, already in-memory) - const narrowed = callableDefs.filter((d) => { - if (!d.ownerId) return false; - const owner = graph.getNode(d.ownerId); - return owner?.properties.name === receiverClassName; - }); - if (narrowed.length > 0) callableDefs = narrowed; - } else { - // Sequential path: use ctx.resolve (no graph available) - const classResolved = ctx.resolve(receiverClassName, filePath); - if (classResolved && classResolved.candidates.length > 0) { - const classNodeIds = new Set(classResolved.candidates.map((c) => c.nodeId)); - const narrowed = callableDefs.filter((d) => d.ownerId && classNodeIds.has(d.ownerId)); - if (narrowed.length > 0) callableDefs = narrowed; - } - } - } - - let typeName: string | undefined; - if (callableDefs && callableDefs.length === 1 && callableDefs[0].returnType) { - typeName = extractReturnTypeName(callableDefs[0].returnType); - } - - // Phase 9: BindingAccumulator fallback for cross-file return types. - // Used when the SymbolTable has no return type for a cross-file callee - // (e.g., a return type that TypeEnv resolved via fixpoint in the source - // file but was not stored as a SymbolTable returnType annotation). - // namedImportMap tells us which source file exported the callee so we - // can look up its file-scope binding via the O(1) fileScopeGet method. - // - // Tier gating: only fall back to the accumulator when resolution is - // unambiguously import-scoped or global. When tiered.tier is 'same-file', - // the local definition is authoritative even without a return type - // annotation — using the accumulator here would let an imported callee - // with the same name shadow the local one, producing false CALLS edges. - // When multiple callable candidates exist, the accumulator would pick - // arbitrarily — skip to avoid fabricated edges. - // - // Quality note: worker-path accumulator entries are Tier 0/1 only - // (annotation-declared + same-file constructor inference) — see the - // BindingAccumulator class JSDoc. For large repos where the worker - // path dominates, Phase 9 binding accuracy is structurally lower - // than for sequential-path repos where Tier 2 cross-file propagation - // is available. - // - // Overlapping mechanism note: this is one of three cross-file - // return-type resolution paths in the codebase: - // 1. buildImportedReturnTypes (~line 109) — namedImportMap + - // SymbolTable.lookupExactFull (structure-processor captured) - // 2. collectExportedBindings (~line 168) / enrichExportedTypeMap - // — TypeEnv + graph isExported flag - // 3. This fallback — namedImportMap + BindingAccumulator - // A future cleanup should merge these into a single resolution pass. - const shouldFallback = - tiered?.tier !== 'same-file' && (!callableDefs || callableDefs.length <= 1); - if (!typeName && bindingAccumulator && shouldFallback) { - const namedImports = ctx.namedImportMap.get(filePath); - const importBinding = namedImports?.get(calleeName); - if (importBinding) { - const rawType = bindingAccumulator.fileScopeGet( - importBinding.sourcePath, - importBinding.exportedName, - ); - if (rawType) { - typeName = extractReturnTypeName(rawType); - } - } - } - - if (typeName) { - verified.set(receiverKey(scope, varName), typeName); - } - } - } - - return verified; -}; - -/** - * Resolution result with confidence scoring - */ -interface ResolveResult { - nodeId: string; - confidence: number; - reason: string; - returnType?: string; -} - -/** - * After resolving a call to an interface method, find additional targets - * in classes implementing that interface. Returns implementation method - * results with lower confidence ('interface-dispatch'). - */ -function findInterfaceDispatchTargets( - calledName: string, - receiverTypeName: string, - currentFile: string, - ctx: ResolutionContext, - heritageMap: HeritageMap, - primaryNodeId: string, -): ResolveResult[] { - const implFiles = heritageMap.getImplementorFiles(receiverTypeName); - if (implFiles.size === 0) return []; - - const typeResolved = ctx.resolve(receiverTypeName, currentFile); - if (!typeResolved) return []; - if (!typeResolved.candidates.some((c) => c.type === 'Interface')) return []; - - const results: ResolveResult[] = []; - for (const implFile of implFiles) { - const methods = ctx.model.symbols.lookupExactAll(implFile, calledName); - for (const method of methods) { - if (method.nodeId !== primaryNodeId) { - results.push({ - nodeId: method.nodeId, - confidence: 0.7, - reason: 'interface-dispatch', - }); - } - } - } - return results; -} - -export const processCalls = async ( - graph: KnowledgeGraph, - files: { path: string; content: string }[], - astCache: ASTCache, - ctx: ResolutionContext, - onProgress?: (current: number, total: number) => void, - exportedTypeMap?: ExportedTypeMap, - /** Phase 14: pre-resolved cross-file bindings to seed into buildTypeEnv. Keyed by filePath → Map. */ - importedBindingsMap?: ReadonlyMap>, - /** Phase 14 E3: cross-file return types for imported callables. Keyed by filePath → Map. - * Consulted ONLY when SymbolTable has no unambiguous match (local-first principle). */ - importedReturnTypesMap?: ReadonlyMap>, - /** Phase 14 E3: cross-file RAW return types for for-loop element extraction. Keyed by filePath → Map. */ - importedRawReturnTypesMap?: ReadonlyMap>, - heritageMap?: HeritageMap, - bindingAccumulator?: BindingAccumulator, - /** - * Optional cache for compiled `Parser.Query` objects keyed by language name. - * When provided, compiled queries are reused across calls instead of being - * re-compiled from the query string for every file. Callers that invoke - * `processCalls` many times with single-file batches (e.g. the cross-file - * propagation phase) should pass a long-lived map here to avoid O(N) - * query recompilation overhead. - */ - compiledQueryCache?: Map, -): Promise => { - const parser = await loadParser(); - const collectedHeritage: ExtractedHeritage[] = []; - const pendingWrites: { - receiverTypeName: string; - propertyName: string; - filePath: string; - srcId: string; - line?: number; - }[] = []; - // Phase P cross-file: accumulate heritage across files for cross-file isSubclassOf. - // Used as a secondary check when per-file parentMap lacks the relationship — helps - // when the heritage-declaring file is processed before the call site file. - // For remaining cases (reverse file order), the SymbolTable class-type fallback applies. - const globalParentMap = new Map(); - const globalParentSeen = new Map>(); - const logSkipped = isVerboseIngestionEnabled(); - const skippedByLang = logSkipped ? new Map() : null; - - // ── Prepare-then-resolve: single preparation loop, deferred resolution ── - // All files are prepared (parse → query → heritage → TypeEnv) in one loop, - // then resolved (verifyConstructorBindings → call edges) in a second loop. - // This ensures: - // 1. When bindingAccumulator is present, ALL files flush their TypeEnv - // bindings before ANY verifyConstructorBindings reads — fixing the - // consumer-before-provider ordering bug on the sequential path. - // 2. globalParentMap is fully populated before resolution, improving - // cross-file isSubclassOf accuracy regardless of file order. - // For the sequential path (<15 files), buffering per-file state is negligible. - interface PreparedFile { - file: { path: string; content: string }; - language: SupportedLanguages; - provider: ReturnType; - tree: ReturnType; - matches: ReturnType; - parentMap: ReadonlyMap; - typeEnv: ReturnType; - } - const prepared: PreparedFile[] = []; - - for (let i = 0; i < files.length; i++) { - const file = files[i]; - if (i % 20 === 0) await yieldToEventLoop(); - - const language = getLanguageFromFilename(file.path); - if (!language) continue; - // Registry-primary gate: scope-based phase owns CALLS for this lang. - if (isRegistryPrimary(language)) continue; - if (!isLanguageAvailable(language)) { - if (skippedByLang) { - skippedByLang.set(language, (skippedByLang.get(language) ?? 0) + 1); - } - continue; - } - - const provider = getProvider(language); - const queryStr = provider.treeSitterQueries; - if (!queryStr) continue; - - await loadLanguage(language, file.path); - - let tree = astCache.get(file.path); - if (!tree) { - const parseContent = provider.preprocessSource?.(file.content, file.path) ?? file.content; - try { - tree = parseSourceSafe(parser, parseContent, undefined, { - bufferSize: getTreeSitterBufferSize(parseContent), - }); - } catch (parseError) { - continue; - } - astCache.set(file.path, tree); - } - - let matches; - try { - const lang = parser.getLanguage(); - let query = compiledQueryCache?.get(language); - if (!query) { - query = new Parser.Query(lang, queryStr); - compiledQueryCache?.set(language, query); - } - matches = query.matches(tree.rootNode); - } catch (queryError) { - logger.warn({ queryError }, `Query error for ${file.path}:`); - continue; - } - - // Extract heritage from query matches to build parentMap for buildTypeEnv. - // Heritage-processor runs in PARALLEL, so graph edges don't exist when buildTypeEnv runs. - const fileParentMap = new Map(); - if (provider.heritageExtractor) { - for (const match of matches) { - const captureMap: Record = {}; - match.captures.forEach((c) => (captureMap[c.name] = c.node)); - if (captureMap['heritage.class']) { - const heritageItems = provider.heritageExtractor.extract(captureMap, { - filePath: file.path, - language, - }); - for (const item of heritageItems) { - if (item.kind === 'extends') { - let parents = fileParentMap.get(item.className); - if (!parents) { - parents = []; - fileParentMap.set(item.className, parents); - } - if (!parents.includes(item.parentName)) parents.push(item.parentName); - } - } - } - } - } - const parentMap: ReadonlyMap = fileParentMap; - // Merge per-file heritage into globalParentMap for cross-file isSubclassOf lookups. - for (const [cls, parents] of fileParentMap) { - let global = globalParentMap.get(cls); - let seen = globalParentSeen.get(cls); - if (!global) { - global = []; - globalParentMap.set(cls, global); - } - if (!seen) { - seen = new Set(); - globalParentSeen.set(cls, seen); - } - for (const p of parents) { - if (!seen.has(p)) { - seen.add(p); - global.push(p); - } - } - } - - const importedBindings = importedBindingsMap?.get(file.path); - const importedReturnTypes = importedReturnTypesMap?.get(file.path); - const importedRawReturnTypes = importedRawReturnTypesMap?.get(file.path); - const typeEnv = buildTypeEnv(tree, language, { - filePath: file.path, - model: ctx.model, - parentMap, - importedBindings, - importedReturnTypes, - importedRawReturnTypes, - enclosingFunctionFinder: provider?.enclosingFunctionFinder, - extractFunctionName: provider?.methodExtractor?.extractFunctionName, - }); - if (typeEnv && exportedTypeMap) { - const fileExports = collectExportedBindings(typeEnv, file.path, ctx.model.symbols, graph); - if (fileExports) exportedTypeMap.set(file.path, fileExports); - } - if (bindingAccumulator) { - typeEnv.flush(file.path, bindingAccumulator); - } - - prepared.push({ file, language, provider, tree, matches, parentMap, typeEnv }); - } - - // ── Property-registration pre-pass ── - // Register all routed properties (e.g. Ruby attr_accessor) BEFORE the - // resolution loop so cross-file field-type lookups (e.g. - // `user.address.save → Address#save`) succeed regardless of file - // processing order. This MUST stay in lockstep with the equivalent - // worker-path block in parse-worker.ts (kind === 'properties') — any - // divergence between the two paths breaks the `incremental ≡ --force` - // invariant once a repo crosses the worker threshold between runs. - const fieldInfoCache = new Map>(); - for (const { file, language, provider, matches, typeEnv } of prepared) { - const callRouter = provider.callRouter; - if (!callRouter) continue; - matches.forEach((match) => { - const captureMap: Record = {}; - match.captures.forEach((c) => (captureMap[c.name] = c.node)); - if (!captureMap['call']) return; - const callNameNode = captureMap['call.name']; - if (!callNameNode) return; - const routed = callRouter(callNameNode.text, captureMap['call']); - if (!routed || routed.kind !== 'properties') return; - - // #1978: thread the qualifier so a routed property's owner edge points at - // the *qualified* nested-class node (Shapes.Circle) instead of a now-nonexistent - // simple `Class:file:Circle` id. Gated on the flag → byte-identical when off. - // MUST stay in lockstep with the worker `kind === 'properties'` block. - const propGetQualifiedOwnerName = - provider.classExtractor?.qualifiedNodeId === true - ? (node: SyntaxNode, simpleName: string): string | null => - provider.classExtractor!.extractQualifiedName(node, simpleName) - : undefined; - const propEnclosingInfo = findEnclosingClassInfo( - captureMap['call'], - file.path, - provider.resolveEnclosingOwner, - propGetQualifiedOwnerName, - ); - const propEnclosingClassId = - propEnclosingInfo?.qualifiedClassId ?? propEnclosingInfo?.classId ?? null; - - // Enrich routed properties with FieldExtractor metadata so types - // discovered from constructor assignments (e.g. `@address = Address.new`) - // are propagated even when the routing payload itself lacks declaredType. - let routedFieldMap: Map | undefined; - if (provider.fieldExtractor && typeEnv) { - const classNode = findEnclosingClassNode(captureMap['call']); - if (classNode) { - routedFieldMap = getFieldInfo( - classNode, - provider, - { - typeEnv, - symbolTable: NOOP_SYMBOL_TABLE, - filePath: file.path, - language, - }, - fieldInfoCache, - ); - } - } - - const fileId = generateId('File', file.path); - for (const item of routed.items) { - const routedFieldInfo = routedFieldMap?.get(item.propName); - const propQualifiedName = propEnclosingInfo - ? `${propEnclosingInfo.className}.${item.propName}` - : item.propName; - const nodeId = generateId('Property', `${file.path}:${propQualifiedName}`); - graph.addNode({ - id: nodeId, - label: 'Property', - properties: { - name: item.propName, - filePath: file.path, - startLine: item.startLine, - endLine: item.endLine, - language, - isExported: true, - description: item.accessorType, - ...(item.declaredType - ? { declaredType: item.declaredType } - : routedFieldInfo?.type - ? { declaredType: routedFieldInfo.type } - : {}), - ...(routedFieldInfo?.visibility !== undefined - ? { visibility: routedFieldInfo.visibility } - : {}), - ...(routedFieldInfo?.isStatic !== undefined - ? { isStatic: routedFieldInfo.isStatic } - : {}), - ...(routedFieldInfo?.isReadonly !== undefined - ? { isReadonly: routedFieldInfo.isReadonly } - : {}), - }, - }); - ctx.model.symbols.add(file.path, item.propName, nodeId, 'Property', { - ...(propEnclosingClassId ? { ownerId: propEnclosingClassId } : {}), - ...(item.declaredType - ? { declaredType: item.declaredType } - : routedFieldInfo?.type - ? { declaredType: routedFieldInfo.type } - : {}), - }); - // Only emit File -> Property DEFINES for top-level properties (issue #1944). - if (!propEnclosingClassId) { - const relId = generateId('DEFINES', `${fileId}->${nodeId}`); - graph.addRelationship({ - id: relId, - sourceId: fileId, - targetId: nodeId, - type: 'DEFINES', - confidence: 1.0, - reason: '', - }); - } - if (propEnclosingClassId) { - graph.addRelationship({ - id: generateId('HAS_PROPERTY', `${propEnclosingClassId}->${nodeId}`), - sourceId: propEnclosingClassId, - targetId: nodeId, - type: 'HAS_PROPERTY', - confidence: 1.0, - reason: '', - }); - } - } - }); - } - - // ── Resolution loop: verify constructor bindings and resolve calls ── - // The accumulator (if present) is now fully populated from the preparation - // loop above, so verifyConstructorBindings sees all provider bindings - // regardless of file processing order. - for (let i = 0; i < prepared.length; i++) { - const { file, language, provider, tree, matches, parentMap, typeEnv } = prepared[i]; - - enclosingFnExtractCache.clear(); - onProgress?.(i + 1, files.length); - if (i % 20 === 0) await yieldToEventLoop(); - - const callRouter = provider.callRouter; - - const verifiedReceivers = - typeEnv.constructorBindings.length > 0 - ? verifyConstructorBindings( - typeEnv.constructorBindings, - file.path, - ctx, - undefined, // graph not available on the sequential path here - bindingAccumulator, // Phase 9 fallback — same as worker path (R3 parity) - ) - : new Map(); - const receiverIndex = buildReceiverTypeIndex(verifiedReceivers); - - ctx.enableCache(file.path); - const widenCache: WidenCache = new Map(); - - matches.forEach((match) => { - const captureMap: Record = {}; - match.captures.forEach((c) => (captureMap[c.name] = c.node)); - // ── Write access: emit ACCESSES {reason: 'write'} for assignments to member fields ── - if ( - captureMap['assignment'] && - captureMap['assignment.receiver'] && - captureMap['assignment.property'] - ) { - const receiverNode = captureMap['assignment.receiver']; - const propertyName: string = captureMap['assignment.property'].text; - // Resolve receiver type: simple identifier → TypeEnv lookup or class resolution - let receiverTypeName: string | undefined; - const receiverText = receiverNode.text; - if (receiverText && typeEnv) { - receiverTypeName = typeEnv.lookup(receiverText, captureMap['assignment']); - } - // Fall back to verified constructor bindings (mirrors CALLS resolution tier 2) - if (!receiverTypeName && receiverText && receiverIndex.size > 0) { - const enclosing = findEnclosingFunction( - captureMap['assignment'], - file.path, - ctx, - provider, - ); - const funcName = enclosing ? extractFuncNameFromSourceId(enclosing) : ''; - receiverTypeName = lookupReceiverType(receiverIndex, funcName, receiverText); - } - if (!receiverTypeName && receiverText) { - const resolved = ctx.resolve(receiverText, file.path); - if (resolved?.candidates.some((d) => CLASS_LIKE_TYPES.has(d.type))) { - receiverTypeName = receiverText; - } - } - if (receiverTypeName) { - const enclosing = findEnclosingFunction( - captureMap['assignment'], - file.path, - ctx, - provider, - ); - const srcId = enclosing || generateId('File', file.path); - // Defer resolution: Ruby attr_accessor properties are registered during - // this same loop, so cross-file lookups fail if the declaring file hasn't - // been processed yet. Collect now, resolve after all files are done. - pendingWrites.push({ - receiverTypeName, - propertyName, - filePath: file.path, - srcId, - line: captureMap['assignment'].startPosition.row + 1, - }); - } - // Assignment-only capture (no @call sibling): skip the rest of this - // forEach iteration — this acts as a `continue` in the match loop. - if (!captureMap['call']) return; - } - - if (!captureMap['call']) return; - - const callNode = captureMap['call']; - const callExtractor = provider.callExtractor; - - // ── Language-specific call site (e.g. Java :: method references) ── - if (callExtractor) { - const langCallSite = callExtractor.extract(callNode, undefined); - if (langCallSite) { - if (provider.isBuiltInName(langCallSite.calledName)) return; - - const sourceId = - findEnclosingFunction(callNode, file.path, ctx, provider) || - generateId('File', file.path); - const receiverName = - langCallSite.callForm === 'member' ? langCallSite.receiverName : undefined; - let receiverTypeName = - receiverName && typeEnv ? typeEnv.lookup(receiverName, callNode) : undefined; - - if ( - langCallSite.typeAsReceiverHeuristic && - receiverName !== undefined && - receiverTypeName === undefined && - langCallSite.callForm === 'member' - ) { - const c0 = receiverName.charCodeAt(0); - if (c0 >= 65 && c0 <= 90) receiverTypeName = receiverName; - } - - const resolved = resolveCallTarget( - { - calledName: langCallSite.calledName, - callForm: langCallSite.callForm, - ...(receiverTypeName !== undefined ? { receiverTypeName } : {}), - ...(receiverName !== undefined ? { receiverName } : {}), - }, - file.path, - ctx, - undefined, - widenCache, - undefined, - heritageMap, - ); - - if (!resolved) return; - graph.addRelationship({ - id: generateId('CALLS', `${sourceId}:${langCallSite.calledName}->${resolved.nodeId}`), - sourceId, - targetId: resolved.nodeId, - type: 'CALLS', - confidence: resolved.confidence, - reason: resolved.reason, - }); - - if (heritageMap && langCallSite.callForm === 'member' && receiverTypeName) { - const implTargets = findInterfaceDispatchTargets( - langCallSite.calledName, - receiverTypeName, - file.path, - ctx, - heritageMap, - resolved.nodeId, - ); - for (const impl of implTargets) { - graph.addRelationship({ - id: generateId('CALLS', `${sourceId}:${langCallSite.calledName}->${impl.nodeId}`), - sourceId, - targetId: impl.nodeId, - type: 'CALLS', - confidence: impl.confidence, - reason: impl.reason, - }); - } - } - return; - } - } - - const nameNode = captureMap['call.name']; - if (!nameNode) return; - - const calledName = nameNode.text; - - // Check heritage extractor for call-based heritage (e.g., Ruby include/extend/prepend) - if (provider.heritageExtractor?.extractFromCall) { - const heritageItems = provider.heritageExtractor.extractFromCall( - calledName, - captureMap['call'], - { filePath: file.path, language }, - ); - if (heritageItems !== null) { - for (const item of heritageItems) { - collectedHeritage.push({ - filePath: file.path, - className: item.className, - parentName: item.parentName, - kind: item.kind, - }); - } - return; - } - } - - // Dispatch: route language-specific calls (properties, imports) - // Heritage routing is handled by heritageExtractor.extractFromCall above. - const routed = callRouter?.(calledName, captureMap['call']); - if (routed) { - switch (routed.kind) { - case 'skip': - case 'import': - return; - - case 'properties': { - // Properties already registered in the pre-pass above. - // Skip to avoid duplicate nodes/edges. - return; - } - - case 'call': - break; - } - } - - if (provider.isBuiltInName(calledName)) return; - - // --- DAG stage 2-3: classify-form + infer-receiver (shared defaults) --- - // These stages run the shared inference chain. Language providers can - // customize infer-receiver (stage 3) via the inferImplicitReceiver hook - // which runs AFTER this default chain (typed-binding → constructor-map → - // module-alias → class-as-receiver → mixed-chain), and selectDispatch - // (stage 4) which picks the resolver branch. - let callForm = inferCallForm(callNode, nameNode); - let receiverName = callForm === 'member' ? extractReceiverName(nameNode) : undefined; - let receiverTypeName = - receiverName && typeEnv ? typeEnv.lookup(receiverName, callNode) : undefined; - let receiverSource: ReceiverSource = receiverTypeName ? 'typed-binding' : 'none'; - // Phase P: virtual dispatch override — when the declared type is a base class but - // the constructor created a known subclass, prefer the more specific type. - // Checks per-file parentMap first, then falls back to globalParentMap for - // cross-file heritage (e.g. Dog extends Animal declared in a different file). - // Reconstructs the exact scope key (funcName@startIndex\0varName) from the - // enclosing function AST node for a correct, O(1) map lookup. - if (receiverTypeName && receiverName && typeEnv && typeEnv.constructorTypeMap.size > 0) { - // Reconstruct scope key to match constructorTypeMap's scope\0varName format - let scope = ''; - let p = callNode.parent; - while (p) { - if (FUNCTION_NODE_TYPES.has(p.type)) { - const funcName = - provider.methodExtractor?.extractFunctionName?.(p, file.path)?.funcName ?? - genericFuncName(p); - if (funcName) { - scope = `${funcName}@${p.startIndex}`; - break; - } - } - p = p.parent; - } - const ctorType = typeEnv.constructorTypeMap.get(`${scope}\0${receiverName}`); - if (ctorType && ctorType !== receiverTypeName) { - // Verify subclass relationship: per-file parentMap first, then cross-file - // globalParentMap, then fall back to SymbolTable class verification. - // The SymbolTable fallback handles cross-file cases where heritage is declared - // in a file not yet processed (e.g. Dog extends Animal in models/Dog.kt when - // processing services/App.kt). Since constructorTypeMap only records entries - // when a type annotation AND constructor are both present (val x: Base = Sub()), - // confirming both are class-like types is sufficient — the original code would - // not compile if Sub didn't extend Base. - if ( - isSubclassOf(ctorType, receiverTypeName, parentMap) || - isSubclassOf(ctorType, receiverTypeName, globalParentMap) || - (ctx.model.types.lookupClassByName(ctorType).length > 0 && - ctx.model.types.lookupClassByName(receiverTypeName).length > 0) - ) { - receiverTypeName = ctorType; - receiverSource = 'constructor-map'; - } - } - } - // Fall back to verified constructor bindings for return type inference - if (!receiverTypeName && receiverName && receiverIndex.size > 0) { - const enclosingFunc = findEnclosingFunction(callNode, file.path, ctx, provider); - const funcName = enclosingFunc ? extractFuncNameFromSourceId(enclosingFunc) : ''; - receiverTypeName = lookupReceiverType(receiverIndex, funcName, receiverName); - if (receiverTypeName) receiverSource = 'constructor-map'; - } - // Fall back to class-as-receiver for static method calls (e.g. UserService.find_user(), - // Greetable.format()). When the receiver name is not a variable in TypeEnv but - // resolves to a class-like symbol (Class / Interface / Struct / Enum / Trait) via - // tiered resolution, use it directly as the receiver type. `Trait` is included so - // Ruby module class-method calls flow through the class-as-receiver path and reach - // the `selectDispatch` hook's singleton branch. - if (!receiverTypeName && receiverName && callForm === 'member') { - const typeResolved = ctx.resolve(receiverName, file.path); - if ( - typeResolved && - typeResolved.candidates.some( - (d) => - d.type === 'Class' || - d.type === 'Interface' || - d.type === 'Struct' || - d.type === 'Enum' || - d.type === 'Trait', - ) - ) { - receiverTypeName = receiverName; - receiverSource = 'class-as-receiver'; - } - } - // Hoist sourceId so it's available for ACCESSES edge emission during chain walk. - const enclosingFuncId = findEnclosingFunction(callNode, file.path, ctx, provider); - const sourceId = enclosingFuncId || generateId('File', file.path); - - // Fall back to mixed chain resolution when the receiver is a complex expression - // (field chain, call chain, or interleaved — e.g. user.address.city.save() or - // svc.getUser().address.save()). Handles all cases with a single unified walk. - if (callForm === 'member' && !receiverTypeName && !receiverName) { - const receiverNode = extractReceiverNode(nameNode); - if (receiverNode) { - const extracted = extractMixedChain(receiverNode); - if (extracted && extracted.chain.length > 0) { - let currentType = - extracted.baseReceiverName && typeEnv - ? typeEnv.lookup(extracted.baseReceiverName, callNode) - : undefined; - if (!currentType && extracted.baseReceiverName && receiverIndex.size > 0) { - const funcName = enclosingFuncId ? extractFuncNameFromSourceId(enclosingFuncId) : ''; - currentType = lookupReceiverType(receiverIndex, funcName, extracted.baseReceiverName); - } - if (!currentType && extracted.baseReceiverName) { - const cr = ctx.resolve(extracted.baseReceiverName, file.path); - if ( - cr?.candidates.some( - (d) => - d.type === 'Class' || - d.type === 'Interface' || - d.type === 'Struct' || - d.type === 'Enum', - ) - ) { - currentType = extracted.baseReceiverName; - } - } - if (currentType) { - receiverTypeName = walkMixedChain( - extracted.chain, - currentType, - file.path, - ctx, - makeAccessEmitter(graph, sourceId), - heritageMap, - ); - if (receiverTypeName) receiverSource = 'mixed-chain'; - } - } - } - } - - // --- DAG stage 3: infer-receiver (provider hook) --- - // Synthesize implicit receivers for languages that omit them (e.g., Ruby bare-call). - // This hook runs AFTER the shared inference chain so explicit receivers / - // typed bindings always take precedence. Output (if non-null) overlays onto - // the ReceiverEnriched for the next stage. - let dispatchHint: string | undefined; - if (provider.inferImplicitReceiver) { - const override = provider.inferImplicitReceiver({ - calledName, - callForm, - receiverName, - receiverTypeName, - callNode, - filePath: file.path, - }); - if (override) { - callForm = override.callForm; - receiverName = override.receiverName; - receiverTypeName = override.receiverTypeName; - receiverSource = override.receiverSource; - dispatchHint = override.hint; - } - } - - // --- DAG stage 4: select-dispatch (provider hook + default fallback) --- - // Decide which resolver path to try first (primary) and fallback strategy. - // Language providers can customize dispatch via selectDispatch hook; all - // others use the shared defaultDispatchDecision. Always non-null after this - // block so downstream resolvers are table-driven. - const dispatchDecision: DispatchDecision = - provider.selectDispatch?.({ - calledName, - callForm, - receiverName, - receiverTypeName, - receiverSource, - hint: dispatchHint, - }) ?? defaultDispatchDecision(callForm); - - // Build overload hints for languages with inferLiteralType (Java/Kotlin/C#/C++). - // Only used when multiple candidates survive arity filtering — ~1-3% of calls. - const langConfig = provider.typeConfig; - const hints: OverloadHints | undefined = langConfig?.inferLiteralType - ? { callNode, inferLiteralType: langConfig.inferLiteralType, typeEnv } - : undefined; - - const resolved = resolveCallTarget( - { - calledName, - argCount: countCallArguments(callNode), - callForm, - receiverTypeName, - receiverName, - }, - file.path, - ctx, - hints, - widenCache, - undefined, - heritageMap, - dispatchDecision, - ); - - if (!resolved) return; - const relId = generateId('CALLS', `${sourceId}:${calledName}->${resolved.nodeId}`); - - graph.addRelationship({ - id: relId, - sourceId, - targetId: resolved.nodeId, - type: 'CALLS', - confidence: resolved.confidence, - reason: resolved.reason, - }); - - if (heritageMap && callForm === 'member' && receiverTypeName) { - const implTargets = findInterfaceDispatchTargets( - calledName, - receiverTypeName, - file.path, - ctx, - heritageMap, - resolved.nodeId, - ); - for (const impl of implTargets) { - graph.addRelationship({ - id: generateId('CALLS', `${sourceId}:${calledName}->${impl.nodeId}`), - sourceId, - targetId: impl.nodeId, - type: 'CALLS', - confidence: impl.confidence, - reason: impl.reason, - }); - } - } - }); - - // Vue: emit CALLS edges for PascalCase components used in