Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions gitnexus-shared/src/scope-resolution/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,16 @@
* Lifecycle contract (RFC §2.8): scopes are **constructed during extraction,
* linked during finalize, immutable after finalize**. All fields are
* `readonly` at the type level; `Object.freeze` is applied at runtime in dev
* builds. `ReferenceIndex` is the sole structure populated after freeze — by
* resolution, before emission.
* builds.
*
* Two structures are populated after freeze:
* 1. `ReferenceIndex` — by resolution, before emission.
* 2. `ScopeResolutionIndexes.bindingAugmentations` — the dedicated
* append-only post-finalize binding channel (e.g. C# same-namespace
* cross-file fanout). The companion `indexes.bindings` is the
* finalize-output channel and is deep-frozen by `materializeBindings`;
* walkers consult both via `lookupBindingsAt`. See `ScopeResolver`
* Invariant I8 for the full lifecycle contract.
*/

import type { NodeLabel } from '../graph/types.js';
Expand Down
4 changes: 2 additions & 2 deletions gitnexus/src/core/ingestion/call-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ export const processCalls = async (
if (!tree) {
try {
tree = parser.parse(file.content, undefined, {
bufferSize: getTreeSitterBufferSize(file.content.length),
bufferSize: getTreeSitterBufferSize(file.content),
});
} catch (parseError) {
continue;
Expand Down Expand Up @@ -3257,7 +3257,7 @@ export const extractFetchCallsFromFiles = async (
if (!tree) {
try {
tree = parser.parse(file.content, undefined, {
bufferSize: getTreeSitterBufferSize(file.content.length),
bufferSize: getTreeSitterBufferSize(file.content),
});
} catch {
continue;
Expand Down
15 changes: 11 additions & 4 deletions gitnexus/src/core/ingestion/constants.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { Buffer } from 'node:buffer';

/**
* Default minimum buffer size for tree-sitter parsing (512 KB).
* tree-sitter requires bufferSize >= file size in bytes.
Expand All @@ -12,8 +14,13 @@ export const TREE_SITTER_MAX_BUFFER = 32 * 1024 * 1024;

/**
* Compute adaptive buffer size for tree-sitter parsing.
* Uses 2× file size, clamped between 512 KB and 32 MB.
* Previous 256 KB fixed limit silently skipped files > ~200 KB (e.g., imgui.h at 411 KB).
* Uses 2x UTF-8 byte size, clamped between 512 KB and 32 MB.
* Keeps tree-sitter's byte-sized buffer above large ASCII and multibyte sources.
*/
export const getTreeSitterBufferSize = (contentLength: number): number =>
Math.min(Math.max(contentLength * 2, TREE_SITTER_BUFFER_SIZE), TREE_SITTER_MAX_BUFFER);
export const getTreeSitterContentByteLength = (sourceText: string): number =>
Buffer.byteLength(sourceText, 'utf8');

export const getTreeSitterBufferSize = (sourceText: string): number => {
const byteLength = getTreeSitterContentByteLength(sourceText);
return Math.min(Math.max(byteLength * 2, TREE_SITTER_BUFFER_SIZE), TREE_SITTER_MAX_BUFFER);
};
5 changes: 5 additions & 0 deletions gitnexus/src/core/ingestion/finalize-orchestrator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ export function finalizeScopeModel(
methodDispatch,
imports: finalizeOut.imports,
bindings: finalizeOut.bindings,
// Empty post-finalize augmentation channel. Populated (if at all)
// by language hooks like `populateCsharpNamespaceSiblings` running
// AFTER `finalizeScopeModel` returns, before `resolveReferenceSites`
// consumes the bundle. Most languages leave it empty.
bindingAugmentations: new Map(),
referenceSites: Object.freeze([...allReferenceSites]),
sccs: finalizeOut.sccs,
stats: finalizeOut.stats,
Expand Down
4 changes: 2 additions & 2 deletions gitnexus/src/core/ingestion/heritage-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ export const processHeritage = async (
// Use larger bufferSize for files > 32KB
try {
tree = parser.parse(file.content, undefined, {
bufferSize: getTreeSitterBufferSize(file.content.length),
bufferSize: getTreeSitterBufferSize(file.content),
});
} catch (parseError) {
// Skip files that can't be parsed
Expand Down Expand Up @@ -414,7 +414,7 @@ export async function extractExtractedHeritageFromFiles(
if (!tree) {
try {
tree = parser.parse(file.content, undefined, {
bufferSize: getTreeSitterBufferSize(file.content.length),
bufferSize: getTreeSitterBufferSize(file.content),
});
} catch {
continue;
Expand Down
2 changes: 1 addition & 1 deletion gitnexus/src/core/ingestion/import-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ export const processImports = async (
if (!tree) {
try {
tree = parser.parse(file.content, undefined, {
bufferSize: getTreeSitterBufferSize(file.content.length),
bufferSize: getTreeSitterBufferSize(file.content),
});
} catch (parseError) {
continue;
Expand Down
5 changes: 4 additions & 1 deletion gitnexus/src/core/ingestion/languages/csharp/captures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { computeCsharpArityMetadata } from './arity-metadata.js';
import { synthesizeCsharpReceiverBinding } from './receiver-binding.js';
import { getCsharpParser, getCsharpScopeQuery } from './query.js';
import { recordCacheHit, recordCacheMiss } from './cache-stats.js';
import { getTreeSitterBufferSize } from '../../constants.js';

/** Declaration anchors that carry function-like arity metadata. */
const FUNCTION_DECL_TAGS = [
Expand Down Expand Up @@ -52,7 +53,9 @@ export function emitCsharpScopeCaptures(
// the LanguageProvider contract layer; cast here at the use site.
let tree = cachedTree as ReturnType<ReturnType<typeof getCsharpParser>['parse']> | undefined;
if (tree === undefined) {
tree = getCsharpParser().parse(sourceText);
tree = getCsharpParser().parse(sourceText, undefined, {
bufferSize: getTreeSitterBufferSize(sourceText),
});
recordCacheMiss();
} else {
recordCacheHit();
Expand Down
116 changes: 67 additions & 49 deletions gitnexus/src/core/ingestion/languages/csharp/namespace-siblings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,18 @@
* field-chain resolution fails at `findClassBindingInScope('User')`
* in the Service.cs scope chain.
*
* Implementation: after the finalize pass populates `indexes.bindings`
* (from explicit `using` directives), walk each file's tree-sitter
* AST for `namespace_declaration` / `file_scoped_namespace_declaration`
* and `using_directive` nodes. The orchestrator hands us its
* `treeCache` so files already parsed by `extractParsedFile` are
* re-used instead of re-parsed — `ParsedFile`'s underlying tree is
* the single source of truth. Group classes by namespace, and inject
* cross-file sibling classes into each Namespace scope's finalized
* bindings with `origin: 'namespace'` — a tier below `local` so a
* local declaration still shadows a cross-file sibling with the same
* name.
* Implementation: after the finalize pass populates immutable
* `indexes.bindings` (from explicit `using` directives), walk each
* file's tree-sitter AST for `namespace_declaration` /
* `file_scoped_namespace_declaration` and `using_directive` nodes.
* The orchestrator hands us its `treeCache` so files already parsed
* by `extractParsedFile` are re-used instead of re-parsed —
* `ParsedFile`'s underlying tree is the single source of truth.
* Group classes by namespace, and append cross-file sibling classes
* into each Namespace scope's `bindingAugmentations` bucket with
* `origin: 'namespace'`. Finalized bindings remain first in
* `lookupBindingsAt`, and local lexical `Scope.bindings` remains the
* first-tier shadowing channel.
*
* The tree-sitter walk is authoritative: it sees `global using static`,
* aliased `using static X = Y.Z;`, attributed namespace declarations,
Expand All @@ -34,6 +35,7 @@ import type { SyntaxNode } from 'tree-sitter';
import type { BindingRef, ParsedFile, Scope, ScopeId, SymbolDefinition } from 'gitnexus-shared';
import type { ScopeResolutionIndexes } from '../../model/scope-resolution-indexes.js';
import { getCsharpParser } from './query.js';
import { getTreeSitterBufferSize } from '../../constants.js';

interface CsharpFileStructure {
/** Declared namespace names in file source order. Empty array means
Expand All @@ -52,7 +54,11 @@ interface CsharpFileStructure {
* shared across calls. */
function extractFileStructure(content: string, cachedTree: unknown): CsharpFileStructure {
type CsharpTree = ReturnType<ReturnType<typeof getCsharpParser>['parse']>;
const tree = (cachedTree as CsharpTree | undefined) ?? getCsharpParser().parse(content);
const tree =
(cachedTree as CsharpTree | undefined) ??
getCsharpParser().parse(content, undefined, {
bufferSize: getTreeSitterBufferSize(content),
});
const namespaces: string[] = [];
const usingStaticPaths: string[] = [];

Expand Down Expand Up @@ -106,8 +112,8 @@ export interface CsharpSiblingInputs {
}

/**
* Mutate `indexes.bindings` in-place, adding cross-file sibling class
* defs to each Namespace scope. Class-like defs (Class / Interface /
* Append cross-file sibling class defs to each Namespace scope's
* `bindingAugmentations` bucket. Class-like defs (Class / Interface /
* Struct / Record / Enum) are visible cross-file; method / field
* members are not.
*/
Expand Down Expand Up @@ -198,12 +204,15 @@ export function populateCsharpNamespaceSiblings(
}
}

// Inject cross-file siblings into each namespace scope's finalized
// bindings. `indexes.bindings` is typed `ReadonlyMap<ScopeId, ...>`
// but is a plain Map at runtime; mutating here is the established
// pattern (see `propagateImportedReturnTypes` which does the same
// for module-scope typeBindings).
const finalized = indexes.bindings as Map<ScopeId, Map<string, BindingRef[]>>;
// Inject cross-file siblings into each namespace scope's
// post-finalize augmentation channel (per I8). The
// `indexes.bindingAugmentations` map is the dedicated mutable
// append-only buffer for post-finalize hooks: inner `BindingRef[]`
// arrays here are NEVER frozen (unlike `indexes.bindings`, which
// `materializeBindings` freezes). Walkers consult both channels
// via `lookupBindingsAt`; we never need to consult or mutate
// `indexes.bindings`.
const augmentations = indexes.bindingAugmentations as Map<ScopeId, Map<string, BindingRef[]>>;

// Cross-namespace type-binding propagation: for each file, mirror
// method return-type bindings from same-namespace sibling files and
Expand Down Expand Up @@ -301,17 +310,13 @@ export function populateCsharpNamespaceSiblings(
const simpleName = mq.includes('.') ? mq.slice(mq.lastIndexOf('.') + 1) : mq;
if (simpleName === '') continue;

// Add to `indexes.bindings[moduleScope]` so
// `findCallableBindingInScope` picks it up.
let scopeBindings = finalized.get(moduleScope.id);
if (scopeBindings === undefined) {
scopeBindings = new Map<string, BindingRef[]>();
finalized.set(moduleScope.id, scopeBindings);
}
const existing = scopeBindings.get(simpleName) ?? [];
if (existing.some((b) => b.def.nodeId === memberDef.nodeId)) continue;
existing.push({ def: memberDef, origin: 'import' });
scopeBindings.set(simpleName, existing);
// Append to the augmentation bucket for the importer's module
// scope. `findCallableBindingInScope` reads via
// `lookupBindingsAt`, which fans out across `bindings` +
// `bindingAugmentations`.
const bucketArr = getAugmentationBucket(augmentations, moduleScope.id, simpleName);
if (bucketArr.some((b) => b.def.nodeId === memberDef.nodeId)) continue;
bucketArr.push({ def: memberDef, origin: 'import' });
}
}
}
Expand All @@ -337,15 +342,9 @@ export function populateCsharpNamespaceSiblings(
const q = def.qualifiedName ?? '';
const simpleName = q.includes('.') ? q.slice(q.lastIndexOf('.') + 1) : q;
if (simpleName === '') continue;
let scopeBindings = finalized.get(moduleScope.id);
if (scopeBindings === undefined) {
scopeBindings = new Map<string, BindingRef[]>();
finalized.set(moduleScope.id, scopeBindings);
}
const existing = scopeBindings.get(simpleName) ?? [];
if (existing.some((b) => b.def.nodeId === def.nodeId)) continue;
existing.push({ def, origin: 'namespace' });
scopeBindings.set(simpleName, existing);
const bucketArr = getAugmentationBucket(augmentations, moduleScope.id, simpleName);
if (bucketArr.some((b) => b.def.nodeId === def.nodeId)) continue;
bucketArr.push({ def, origin: 'namespace' });
}
}
}
Expand All @@ -366,30 +365,49 @@ export function populateCsharpNamespaceSiblings(
}

for (const { scopeId, filePath } of bucket.scopes) {
let scopeBindings = finalized.get(scopeId);
if (scopeBindings === undefined) {
scopeBindings = new Map<string, BindingRef[]>();
finalized.set(scopeId, scopeBindings);
}
for (const [name, defs] of defsByName) {
// Skip names already present locally — `origin: 'local'` in
// scope.bindings would naturally shadow the cross-file
// namespace entry, but we also keep this index lean.
const local = bucket.scopes.find((s) => s.filePath === filePath)?.scope.bindings.get(name);
if (local !== undefined && local.some((b) => b.origin === 'local')) continue;

const existing = scopeBindings.get(name) ?? [];
let bucketArr: BindingRef[] | null = null;
for (const def of defs) {
if (def.filePath === filePath) continue; // don't self-reference
if (existing.some((b) => b.def.nodeId === def.nodeId)) continue;
existing.push({ def, origin: 'namespace' });
if (bucketArr === null) bucketArr = getAugmentationBucket(augmentations, scopeId, name);
if (bucketArr.some((b) => b.def.nodeId === def.nodeId)) continue;
bucketArr.push({ def, origin: 'namespace' });
}
if (existing.length > 0) scopeBindings.set(name, existing);
}
}
}
}

/** Get-or-create a mutable inner bucket inside the `bindingAugmentations`
* channel. The inner arrays here are mutable by contract (see
* `ScopeResolutionIndexes.bindingAugmentations` doc + scope-resolver I8);
* callers may `push` directly. Allocating the outer/inner Maps lazily
* keeps the augmentation footprint zero for files with no cross-file
* fanout. */
function getAugmentationBucket(
augmentations: Map<ScopeId, Map<string, BindingRef[]>>,
scopeId: ScopeId,
name: string,
): BindingRef[] {
let scopeBindings = augmentations.get(scopeId);
if (scopeBindings === undefined) {
scopeBindings = new Map<string, BindingRef[]>();
augmentations.set(scopeId, scopeBindings);
}
let bucketArr = scopeBindings.get(name);
if (bucketArr === undefined) {
bucketArr = [];
scopeBindings.set(name, bucketArr);
}
return bucketArr;
}

function isTypeDef(def: SymbolDefinition): boolean {
return (
def.type === 'Class' ||
Expand Down
5 changes: 4 additions & 1 deletion gitnexus/src/core/ingestion/languages/python/captures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { getPythonParser, getPythonScopeQuery } from './query.js';
import { synthesizeReceiverTypeBinding } from './receiver-binding.js';
import { computePythonArityMetadata } from './arity-metadata.js';
import { recordCacheHit, recordCacheMiss } from './cache-stats.js';
import { getTreeSitterBufferSize } from '../../constants.js';

export function emitPythonScopeCaptures(
sourceText: string,
Expand All @@ -36,7 +37,9 @@ export function emitPythonScopeCaptures(
// here at the use site.
let tree = cachedTree as ReturnType<ReturnType<typeof getPythonParser>['parse']> | undefined;
if (tree === undefined) {
tree = getPythonParser().parse(sourceText);
tree = getPythonParser().parse(sourceText, undefined, {
bufferSize: getTreeSitterBufferSize(sourceText),
});
recordCacheMiss();
} else {
recordCacheHit();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import { getTsParser, getTsScopeQuery, tsCachedTreeMatchesGrammar } from './quer
import { recordCacheHit, recordCacheMiss } from './cache-stats.js';
import { synthesizeTsReceiverBinding } from './receiver-binding.js';
import { computeTsArityMetadata } from './arity-metadata.js';
import { getTreeSitterBufferSize } from '../../constants.js';

/** tree-sitter-typescript node types for function-like scopes that may
* carry a synthesized `this` binding. Kept in sync with the
Expand Down Expand Up @@ -125,7 +126,9 @@ export function emitTsScopeCaptures(
tree = undefined;
}
if (tree === undefined) {
tree = getTsParser(filePath).parse(sourceText);
tree = getTsParser(filePath).parse(sourceText, undefined, {
bufferSize: getTreeSitterBufferSize(sourceText),
});
recordCacheMiss();
} else {
recordCacheHit();
Expand Down
15 changes: 14 additions & 1 deletion gitnexus/src/core/ingestion/model/scope-resolution-indexes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,21 @@ export interface ScopeResolutionIndexes {
readonly methodDispatch: MethodDispatchIndex;
/** Finalized `ImportEdge[]` per module scope. */
readonly imports: ReadonlyMap<ScopeId, readonly ImportEdge[]>;
/** Merged bindings (local + imports + wildcards) per module scope. */
/** Finalize-output bindings (local + imports + wildcards) per module scope.
* Inner `BindingRef[]` arrays are frozen by `materializeBindings`;
* this channel is permanently immutable post-finalize. Consumers
* MUST read via `lookupBindingsAt` so the augmentation channel is
* consulted alongside. See I8 in `contract/scope-resolver.ts`. */
readonly bindings: ReadonlyMap<ScopeId, ReadonlyMap<string, readonly BindingRef[]>>;
/** Append-only post-finalize augmentation channel. Populated by
* language hooks such as `populateNamespaceSiblings` for cross-file
* bindings synthesized after finalize (e.g. C# same-namespace
* visibility, `using static` member exposure). Inner arrays are
* NOT frozen — hooks `push()` directly. Walkers must consult both
* this map and `bindings` via `lookupBindingsAt`; finalized refs
* are returned first and win duplicate `def.nodeId` metadata, with
* unique augmentations appended after. See I8. */
readonly bindingAugmentations: ReadonlyMap<ScopeId, ReadonlyMap<string, readonly BindingRef[]>>;
/** Pre-resolution usage facts; consumed by the resolution phase. */
readonly referenceSites: readonly ReferenceSite[];
/** SCC condensation of the file-level import graph — callers that want
Expand Down
10 changes: 7 additions & 3 deletions gitnexus/src/core/ingestion/parsing-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,11 @@ import type {
FileScopeBindings,
ExtractedORMQuery,
} from './workers/parse-worker.js';
import { getTreeSitterBufferSize, TREE_SITTER_MAX_BUFFER } from './constants.js';
import {
getTreeSitterBufferSize,
getTreeSitterContentByteLength,
TREE_SITTER_MAX_BUFFER,
} from './constants.js';

export type FileProgressCallback = (current: number, total: number, filePath: string) => void;

Expand Down Expand Up @@ -352,7 +356,7 @@ const processParsingSequential = async (
}

// Skip files larger than the max tree-sitter buffer (32 MB)
if (file.content.length > TREE_SITTER_MAX_BUFFER) continue;
if (getTreeSitterContentByteLength(file.content) > TREE_SITTER_MAX_BUFFER) continue;

// Vue SFC preprocessing: extract <script> block content
let parseContent = file.content;
Expand All @@ -375,7 +379,7 @@ const processParsingSequential = async (
let tree: Parser.Tree;
try {
tree = parser.parse(parseContent, undefined, {
bufferSize: getTreeSitterBufferSize(parseContent.length),
bufferSize: getTreeSitterBufferSize(parseContent),
});
} catch (parseError) {
console.warn(`Skipping unparseable file: ${file.path}`);
Expand Down
Loading
Loading