diff --git a/gitnexus-shared/src/scope-resolution/types.ts b/gitnexus-shared/src/scope-resolution/types.ts index 1cff701156..c3d8b80fbc 100644 --- a/gitnexus-shared/src/scope-resolution/types.ts +++ b/gitnexus-shared/src/scope-resolution/types.ts @@ -10,8 +10,16 @@ * Lifecycle contract (RFC §2.8): scopes are **constructed during extraction, * linked during finalize, immutable after finalize**. All fields are * `readonly` at the type level; `Object.freeze` is applied at runtime in dev - * builds. `ReferenceIndex` is the sole structure populated after freeze — by - * resolution, before emission. + * builds. + * + * Two structures are populated after freeze: + * 1. `ReferenceIndex` — by resolution, before emission. + * 2. `ScopeResolutionIndexes.bindingAugmentations` — the dedicated + * append-only post-finalize binding channel (e.g. C# same-namespace + * cross-file fanout). The companion `indexes.bindings` is the + * finalize-output channel and is deep-frozen by `materializeBindings`; + * walkers consult both via `lookupBindingsAt`. See `ScopeResolver` + * Invariant I8 for the full lifecycle contract. */ import type { NodeLabel } from '../graph/types.js'; diff --git a/gitnexus/src/core/ingestion/call-processor.ts b/gitnexus/src/core/ingestion/call-processor.ts index 2debe56b31..4eb7ea6ceb 100644 --- a/gitnexus/src/core/ingestion/call-processor.ts +++ b/gitnexus/src/core/ingestion/call-processor.ts @@ -770,7 +770,7 @@ export const processCalls = async ( if (!tree) { try { tree = parser.parse(file.content, undefined, { - bufferSize: getTreeSitterBufferSize(file.content.length), + bufferSize: getTreeSitterBufferSize(file.content), }); } catch (parseError) { continue; @@ -3257,7 +3257,7 @@ export const extractFetchCallsFromFiles = async ( if (!tree) { try { tree = parser.parse(file.content, undefined, { - bufferSize: getTreeSitterBufferSize(file.content.length), + bufferSize: getTreeSitterBufferSize(file.content), }); } catch { continue; diff --git a/gitnexus/src/core/ingestion/constants.ts b/gitnexus/src/core/ingestion/constants.ts index 81f1107e14..4862d31a0f 100644 --- a/gitnexus/src/core/ingestion/constants.ts +++ b/gitnexus/src/core/ingestion/constants.ts @@ -1,3 +1,5 @@ +import { Buffer } from 'node:buffer'; + /** * Default minimum buffer size for tree-sitter parsing (512 KB). * tree-sitter requires bufferSize >= file size in bytes. @@ -12,8 +14,13 @@ export const TREE_SITTER_MAX_BUFFER = 32 * 1024 * 1024; /** * Compute adaptive buffer size for tree-sitter parsing. - * Uses 2× file size, clamped between 512 KB and 32 MB. - * Previous 256 KB fixed limit silently skipped files > ~200 KB (e.g., imgui.h at 411 KB). + * Uses 2x UTF-8 byte size, clamped between 512 KB and 32 MB. + * Keeps tree-sitter's byte-sized buffer above large ASCII and multibyte sources. */ -export const getTreeSitterBufferSize = (contentLength: number): number => - Math.min(Math.max(contentLength * 2, TREE_SITTER_BUFFER_SIZE), TREE_SITTER_MAX_BUFFER); +export const getTreeSitterContentByteLength = (sourceText: string): number => + Buffer.byteLength(sourceText, 'utf8'); + +export const getTreeSitterBufferSize = (sourceText: string): number => { + const byteLength = getTreeSitterContentByteLength(sourceText); + return Math.min(Math.max(byteLength * 2, TREE_SITTER_BUFFER_SIZE), TREE_SITTER_MAX_BUFFER); +}; diff --git a/gitnexus/src/core/ingestion/finalize-orchestrator.ts b/gitnexus/src/core/ingestion/finalize-orchestrator.ts index 8541e58aaf..942faa1c96 100644 --- a/gitnexus/src/core/ingestion/finalize-orchestrator.ts +++ b/gitnexus/src/core/ingestion/finalize-orchestrator.ts @@ -141,6 +141,11 @@ export function finalizeScopeModel( methodDispatch, imports: finalizeOut.imports, bindings: finalizeOut.bindings, + // Empty post-finalize augmentation channel. Populated (if at all) + // by language hooks like `populateCsharpNamespaceSiblings` running + // AFTER `finalizeScopeModel` returns, before `resolveReferenceSites` + // consumes the bundle. Most languages leave it empty. + bindingAugmentations: new Map(), referenceSites: Object.freeze([...allReferenceSites]), sccs: finalizeOut.sccs, stats: finalizeOut.stats, diff --git a/gitnexus/src/core/ingestion/heritage-processor.ts b/gitnexus/src/core/ingestion/heritage-processor.ts index 6692d1c950..12e59a19ac 100644 --- a/gitnexus/src/core/ingestion/heritage-processor.ts +++ b/gitnexus/src/core/ingestion/heritage-processor.ts @@ -220,7 +220,7 @@ export const processHeritage = async ( // Use larger bufferSize for files > 32KB try { tree = parser.parse(file.content, undefined, { - bufferSize: getTreeSitterBufferSize(file.content.length), + bufferSize: getTreeSitterBufferSize(file.content), }); } catch (parseError) { // Skip files that can't be parsed @@ -414,7 +414,7 @@ export async function extractExtractedHeritageFromFiles( if (!tree) { try { tree = parser.parse(file.content, undefined, { - bufferSize: getTreeSitterBufferSize(file.content.length), + bufferSize: getTreeSitterBufferSize(file.content), }); } catch { continue; diff --git a/gitnexus/src/core/ingestion/import-processor.ts b/gitnexus/src/core/ingestion/import-processor.ts index d1b039d932..b669d27445 100644 --- a/gitnexus/src/core/ingestion/import-processor.ts +++ b/gitnexus/src/core/ingestion/import-processor.ts @@ -306,7 +306,7 @@ export const processImports = async ( if (!tree) { try { tree = parser.parse(file.content, undefined, { - bufferSize: getTreeSitterBufferSize(file.content.length), + bufferSize: getTreeSitterBufferSize(file.content), }); } catch (parseError) { continue; diff --git a/gitnexus/src/core/ingestion/languages/csharp/captures.ts b/gitnexus/src/core/ingestion/languages/csharp/captures.ts index dc8356346f..2f913a14e3 100644 --- a/gitnexus/src/core/ingestion/languages/csharp/captures.ts +++ b/gitnexus/src/core/ingestion/languages/csharp/captures.ts @@ -23,6 +23,7 @@ import { computeCsharpArityMetadata } from './arity-metadata.js'; import { synthesizeCsharpReceiverBinding } from './receiver-binding.js'; import { getCsharpParser, getCsharpScopeQuery } from './query.js'; import { recordCacheHit, recordCacheMiss } from './cache-stats.js'; +import { getTreeSitterBufferSize } from '../../constants.js'; /** Declaration anchors that carry function-like arity metadata. */ const FUNCTION_DECL_TAGS = [ @@ -52,7 +53,9 @@ export function emitCsharpScopeCaptures( // the LanguageProvider contract layer; cast here at the use site. let tree = cachedTree as ReturnType['parse']> | undefined; if (tree === undefined) { - tree = getCsharpParser().parse(sourceText); + tree = getCsharpParser().parse(sourceText, undefined, { + bufferSize: getTreeSitterBufferSize(sourceText), + }); recordCacheMiss(); } else { recordCacheHit(); diff --git a/gitnexus/src/core/ingestion/languages/csharp/namespace-siblings.ts b/gitnexus/src/core/ingestion/languages/csharp/namespace-siblings.ts index 3fdaad41ed..2cd2cf7241 100644 --- a/gitnexus/src/core/ingestion/languages/csharp/namespace-siblings.ts +++ b/gitnexus/src/core/ingestion/languages/csharp/namespace-siblings.ts @@ -11,17 +11,18 @@ * field-chain resolution fails at `findClassBindingInScope('User')` * in the Service.cs scope chain. * - * Implementation: after the finalize pass populates `indexes.bindings` - * (from explicit `using` directives), walk each file's tree-sitter - * AST for `namespace_declaration` / `file_scoped_namespace_declaration` - * and `using_directive` nodes. The orchestrator hands us its - * `treeCache` so files already parsed by `extractParsedFile` are - * re-used instead of re-parsed — `ParsedFile`'s underlying tree is - * the single source of truth. Group classes by namespace, and inject - * cross-file sibling classes into each Namespace scope's finalized - * bindings with `origin: 'namespace'` — a tier below `local` so a - * local declaration still shadows a cross-file sibling with the same - * name. + * Implementation: after the finalize pass populates immutable + * `indexes.bindings` (from explicit `using` directives), walk each + * file's tree-sitter AST for `namespace_declaration` / + * `file_scoped_namespace_declaration` and `using_directive` nodes. + * The orchestrator hands us its `treeCache` so files already parsed + * by `extractParsedFile` are re-used instead of re-parsed — + * `ParsedFile`'s underlying tree is the single source of truth. + * Group classes by namespace, and append cross-file sibling classes + * into each Namespace scope's `bindingAugmentations` bucket with + * `origin: 'namespace'`. Finalized bindings remain first in + * `lookupBindingsAt`, and local lexical `Scope.bindings` remains the + * first-tier shadowing channel. * * The tree-sitter walk is authoritative: it sees `global using static`, * aliased `using static X = Y.Z;`, attributed namespace declarations, @@ -34,6 +35,7 @@ import type { SyntaxNode } from 'tree-sitter'; import type { BindingRef, ParsedFile, Scope, ScopeId, SymbolDefinition } from 'gitnexus-shared'; import type { ScopeResolutionIndexes } from '../../model/scope-resolution-indexes.js'; import { getCsharpParser } from './query.js'; +import { getTreeSitterBufferSize } from '../../constants.js'; interface CsharpFileStructure { /** Declared namespace names in file source order. Empty array means @@ -52,7 +54,11 @@ interface CsharpFileStructure { * shared across calls. */ function extractFileStructure(content: string, cachedTree: unknown): CsharpFileStructure { type CsharpTree = ReturnType['parse']>; - const tree = (cachedTree as CsharpTree | undefined) ?? getCsharpParser().parse(content); + const tree = + (cachedTree as CsharpTree | undefined) ?? + getCsharpParser().parse(content, undefined, { + bufferSize: getTreeSitterBufferSize(content), + }); const namespaces: string[] = []; const usingStaticPaths: string[] = []; @@ -106,8 +112,8 @@ export interface CsharpSiblingInputs { } /** - * Mutate `indexes.bindings` in-place, adding cross-file sibling class - * defs to each Namespace scope. Class-like defs (Class / Interface / + * Append cross-file sibling class defs to each Namespace scope's + * `bindingAugmentations` bucket. Class-like defs (Class / Interface / * Struct / Record / Enum) are visible cross-file; method / field * members are not. */ @@ -198,12 +204,15 @@ export function populateCsharpNamespaceSiblings( } } - // Inject cross-file siblings into each namespace scope's finalized - // bindings. `indexes.bindings` is typed `ReadonlyMap` - // but is a plain Map at runtime; mutating here is the established - // pattern (see `propagateImportedReturnTypes` which does the same - // for module-scope typeBindings). - const finalized = indexes.bindings as Map>; + // Inject cross-file siblings into each namespace scope's + // post-finalize augmentation channel (per I8). The + // `indexes.bindingAugmentations` map is the dedicated mutable + // append-only buffer for post-finalize hooks: inner `BindingRef[]` + // arrays here are NEVER frozen (unlike `indexes.bindings`, which + // `materializeBindings` freezes). Walkers consult both channels + // via `lookupBindingsAt`; we never need to consult or mutate + // `indexes.bindings`. + const augmentations = indexes.bindingAugmentations as Map>; // Cross-namespace type-binding propagation: for each file, mirror // method return-type bindings from same-namespace sibling files and @@ -301,17 +310,13 @@ export function populateCsharpNamespaceSiblings( const simpleName = mq.includes('.') ? mq.slice(mq.lastIndexOf('.') + 1) : mq; if (simpleName === '') continue; - // Add to `indexes.bindings[moduleScope]` so - // `findCallableBindingInScope` picks it up. - let scopeBindings = finalized.get(moduleScope.id); - if (scopeBindings === undefined) { - scopeBindings = new Map(); - finalized.set(moduleScope.id, scopeBindings); - } - const existing = scopeBindings.get(simpleName) ?? []; - if (existing.some((b) => b.def.nodeId === memberDef.nodeId)) continue; - existing.push({ def: memberDef, origin: 'import' }); - scopeBindings.set(simpleName, existing); + // Append to the augmentation bucket for the importer's module + // scope. `findCallableBindingInScope` reads via + // `lookupBindingsAt`, which fans out across `bindings` + + // `bindingAugmentations`. + const bucketArr = getAugmentationBucket(augmentations, moduleScope.id, simpleName); + if (bucketArr.some((b) => b.def.nodeId === memberDef.nodeId)) continue; + bucketArr.push({ def: memberDef, origin: 'import' }); } } } @@ -337,15 +342,9 @@ export function populateCsharpNamespaceSiblings( const q = def.qualifiedName ?? ''; const simpleName = q.includes('.') ? q.slice(q.lastIndexOf('.') + 1) : q; if (simpleName === '') continue; - let scopeBindings = finalized.get(moduleScope.id); - if (scopeBindings === undefined) { - scopeBindings = new Map(); - finalized.set(moduleScope.id, scopeBindings); - } - const existing = scopeBindings.get(simpleName) ?? []; - if (existing.some((b) => b.def.nodeId === def.nodeId)) continue; - existing.push({ def, origin: 'namespace' }); - scopeBindings.set(simpleName, existing); + const bucketArr = getAugmentationBucket(augmentations, moduleScope.id, simpleName); + if (bucketArr.some((b) => b.def.nodeId === def.nodeId)) continue; + bucketArr.push({ def, origin: 'namespace' }); } } } @@ -366,11 +365,6 @@ export function populateCsharpNamespaceSiblings( } for (const { scopeId, filePath } of bucket.scopes) { - let scopeBindings = finalized.get(scopeId); - if (scopeBindings === undefined) { - scopeBindings = new Map(); - finalized.set(scopeId, scopeBindings); - } for (const [name, defs] of defsByName) { // Skip names already present locally — `origin: 'local'` in // scope.bindings would naturally shadow the cross-file @@ -378,18 +372,42 @@ export function populateCsharpNamespaceSiblings( const local = bucket.scopes.find((s) => s.filePath === filePath)?.scope.bindings.get(name); if (local !== undefined && local.some((b) => b.origin === 'local')) continue; - const existing = scopeBindings.get(name) ?? []; + let bucketArr: BindingRef[] | null = null; for (const def of defs) { if (def.filePath === filePath) continue; // don't self-reference - if (existing.some((b) => b.def.nodeId === def.nodeId)) continue; - existing.push({ def, origin: 'namespace' }); + if (bucketArr === null) bucketArr = getAugmentationBucket(augmentations, scopeId, name); + if (bucketArr.some((b) => b.def.nodeId === def.nodeId)) continue; + bucketArr.push({ def, origin: 'namespace' }); } - if (existing.length > 0) scopeBindings.set(name, existing); } } } } +/** Get-or-create a mutable inner bucket inside the `bindingAugmentations` + * channel. The inner arrays here are mutable by contract (see + * `ScopeResolutionIndexes.bindingAugmentations` doc + scope-resolver I8); + * callers may `push` directly. Allocating the outer/inner Maps lazily + * keeps the augmentation footprint zero for files with no cross-file + * fanout. */ +function getAugmentationBucket( + augmentations: Map>, + scopeId: ScopeId, + name: string, +): BindingRef[] { + let scopeBindings = augmentations.get(scopeId); + if (scopeBindings === undefined) { + scopeBindings = new Map(); + augmentations.set(scopeId, scopeBindings); + } + let bucketArr = scopeBindings.get(name); + if (bucketArr === undefined) { + bucketArr = []; + scopeBindings.set(name, bucketArr); + } + return bucketArr; +} + function isTypeDef(def: SymbolDefinition): boolean { return ( def.type === 'Class' || diff --git a/gitnexus/src/core/ingestion/languages/python/captures.ts b/gitnexus/src/core/ingestion/languages/python/captures.ts index e96a60644f..9653c7b9bf 100644 --- a/gitnexus/src/core/ingestion/languages/python/captures.ts +++ b/gitnexus/src/core/ingestion/languages/python/captures.ts @@ -23,6 +23,7 @@ import { getPythonParser, getPythonScopeQuery } from './query.js'; import { synthesizeReceiverTypeBinding } from './receiver-binding.js'; import { computePythonArityMetadata } from './arity-metadata.js'; import { recordCacheHit, recordCacheMiss } from './cache-stats.js'; +import { getTreeSitterBufferSize } from '../../constants.js'; export function emitPythonScopeCaptures( sourceText: string, @@ -36,7 +37,9 @@ export function emitPythonScopeCaptures( // here at the use site. let tree = cachedTree as ReturnType['parse']> | undefined; if (tree === undefined) { - tree = getPythonParser().parse(sourceText); + tree = getPythonParser().parse(sourceText, undefined, { + bufferSize: getTreeSitterBufferSize(sourceText), + }); recordCacheMiss(); } else { recordCacheHit(); diff --git a/gitnexus/src/core/ingestion/languages/typescript/captures.ts b/gitnexus/src/core/ingestion/languages/typescript/captures.ts index 1b60fb6bdc..e7bb916988 100644 --- a/gitnexus/src/core/ingestion/languages/typescript/captures.ts +++ b/gitnexus/src/core/ingestion/languages/typescript/captures.ts @@ -37,6 +37,7 @@ import { getTsParser, getTsScopeQuery, tsCachedTreeMatchesGrammar } from './quer import { recordCacheHit, recordCacheMiss } from './cache-stats.js'; import { synthesizeTsReceiverBinding } from './receiver-binding.js'; import { computeTsArityMetadata } from './arity-metadata.js'; +import { getTreeSitterBufferSize } from '../../constants.js'; /** tree-sitter-typescript node types for function-like scopes that may * carry a synthesized `this` binding. Kept in sync with the @@ -125,7 +126,9 @@ export function emitTsScopeCaptures( tree = undefined; } if (tree === undefined) { - tree = getTsParser(filePath).parse(sourceText); + tree = getTsParser(filePath).parse(sourceText, undefined, { + bufferSize: getTreeSitterBufferSize(sourceText), + }); recordCacheMiss(); } else { recordCacheHit(); diff --git a/gitnexus/src/core/ingestion/model/scope-resolution-indexes.ts b/gitnexus/src/core/ingestion/model/scope-resolution-indexes.ts index 67e32fc0e8..a3fba7b302 100644 --- a/gitnexus/src/core/ingestion/model/scope-resolution-indexes.ts +++ b/gitnexus/src/core/ingestion/model/scope-resolution-indexes.ts @@ -62,8 +62,21 @@ export interface ScopeResolutionIndexes { readonly methodDispatch: MethodDispatchIndex; /** Finalized `ImportEdge[]` per module scope. */ readonly imports: ReadonlyMap; - /** Merged bindings (local + imports + wildcards) per module scope. */ + /** Finalize-output bindings (local + imports + wildcards) per module scope. + * Inner `BindingRef[]` arrays are frozen by `materializeBindings`; + * this channel is permanently immutable post-finalize. Consumers + * MUST read via `lookupBindingsAt` so the augmentation channel is + * consulted alongside. See I8 in `contract/scope-resolver.ts`. */ readonly bindings: ReadonlyMap>; + /** Append-only post-finalize augmentation channel. Populated by + * language hooks such as `populateNamespaceSiblings` for cross-file + * bindings synthesized after finalize (e.g. C# same-namespace + * visibility, `using static` member exposure). Inner arrays are + * NOT frozen — hooks `push()` directly. Walkers must consult both + * this map and `bindings` via `lookupBindingsAt`; finalized refs + * are returned first and win duplicate `def.nodeId` metadata, with + * unique augmentations appended after. See I8. */ + readonly bindingAugmentations: ReadonlyMap>; /** Pre-resolution usage facts; consumed by the resolution phase. */ readonly referenceSites: readonly ReferenceSite[]; /** SCC condensation of the file-level import graph — callers that want diff --git a/gitnexus/src/core/ingestion/parsing-processor.ts b/gitnexus/src/core/ingestion/parsing-processor.ts index d3395e782b..4756bcd659 100644 --- a/gitnexus/src/core/ingestion/parsing-processor.ts +++ b/gitnexus/src/core/ingestion/parsing-processor.ts @@ -48,7 +48,11 @@ import type { FileScopeBindings, ExtractedORMQuery, } from './workers/parse-worker.js'; -import { getTreeSitterBufferSize, TREE_SITTER_MAX_BUFFER } from './constants.js'; +import { + getTreeSitterBufferSize, + getTreeSitterContentByteLength, + TREE_SITTER_MAX_BUFFER, +} from './constants.js'; export type FileProgressCallback = (current: number, total: number, filePath: string) => void; @@ -352,7 +356,7 @@ const processParsingSequential = async ( } // Skip files larger than the max tree-sitter buffer (32 MB) - if (file.content.length > TREE_SITTER_MAX_BUFFER) continue; + if (getTreeSitterContentByteLength(file.content) > TREE_SITTER_MAX_BUFFER) continue; // Vue SFC preprocessing: extract