diff --git a/gitnexus/src/core/ingestion/language-provider.ts b/gitnexus/src/core/ingestion/language-provider.ts index 770d15aeeb..351b339a7c 100644 --- a/gitnexus/src/core/ingestion/language-provider.ts +++ b/gitnexus/src/core/ingestion/language-provider.ts @@ -321,12 +321,15 @@ interface LanguageProviderConfig { * Providers that have not yet migrated continue to run through the * legacy DAG path (feature-flagged per `REGISTRY_PRIMARY_`). * + * **Sync return.** Tree-sitter query execution and COBOL's regex + * tagger are both synchronous; no current or foreseeable provider + * needs async work inside this hook. The sync signature lets + * `parse-worker.ts` (#920) invoke it inline in its already-sync + * per-file loop without cascading `async` through the batch pipeline. + * * Default: undefined (language continues to use legacy DAG). */ - readonly emitScopeCaptures?: ( - sourceText: string, - filePath: string, - ) => Promise; + readonly emitScopeCaptures?: (sourceText: string, filePath: string) => readonly CaptureMatch[]; /** * Interpret a raw `@import.statement` capture group into a `ParsedImport`. diff --git a/gitnexus/src/core/ingestion/parsing-processor.ts b/gitnexus/src/core/ingestion/parsing-processor.ts index 90186c6595..b7b143039a 100644 --- a/gitnexus/src/core/ingestion/parsing-processor.ts +++ b/gitnexus/src/core/ingestion/parsing-processor.ts @@ -31,6 +31,7 @@ import { buildCollisionGroups, } from './utils/method-props.js'; import type { LanguageProvider } from './language-provider.js'; +import type { ParsedFile } from 'gitnexus-shared'; import { WorkerPool } from './workers/worker-pool.js'; import type { ParseWorkerResult, @@ -62,6 +63,14 @@ export interface WorkerExtractedData { ormQueries: ExtractedORMQuery[]; constructorBindings: FileConstructorBindings[]; fileScopeBindings: FileScopeBindings[]; + /** + * Per-file `ParsedFile` artifacts from the new scope-based resolution + * pipeline (RFC #909 Ring 2). Empty until a provider implements + * `emitScopeCaptures` — additive to the legacy DAG path. Aggregated + * from every worker chunk; consumed downstream by #921's + * finalize-orchestrator. + */ + parsedFiles: ParsedFile[]; } // ============================================================================ @@ -96,6 +105,7 @@ const processParsingWithWorkers = async ( ormQueries: [], constructorBindings: [], fileScopeBindings: [], + parsedFiles: [], }; const total = files.length; @@ -120,6 +130,7 @@ const processParsingWithWorkers = async ( const allORMQueries: ExtractedORMQuery[] = []; const allConstructorBindings: FileConstructorBindings[] = []; const fileScopeBindingsByFile: FileScopeBindings[] = []; + const allParsedFiles: ParsedFile[] = []; for (const result of chunkResults) { for (const node of result.nodes) { graph.addNode({ @@ -157,6 +168,11 @@ const processParsingWithWorkers = async ( for (const item of result.constructorBindings) allConstructorBindings.push(item); if (result.fileScopeBindings) for (const item of result.fileScopeBindings) fileScopeBindingsByFile.push(item); + // RFC #909 Ring 2: aggregate per-file scope artifacts. Tolerant of + // workers that don't emit the field yet (older worker builds or + // partial rollouts), since the additive contract means undefined = + // "this worker produced no ParsedFiles for this chunk". + if (result.parsedFiles) for (const item of result.parsedFiles) allParsedFiles.push(item); } // Merge and log skipped languages from workers @@ -187,6 +203,7 @@ const processParsingWithWorkers = async ( ormQueries: allORMQueries, constructorBindings: allConstructorBindings, fileScopeBindings: fileScopeBindingsByFile, + parsedFiles: allParsedFiles, }; }; diff --git a/gitnexus/src/core/ingestion/scope-extractor-bridge.ts b/gitnexus/src/core/ingestion/scope-extractor-bridge.ts new file mode 100644 index 0000000000..dfa3b9b4fe --- /dev/null +++ b/gitnexus/src/core/ingestion/scope-extractor-bridge.ts @@ -0,0 +1,54 @@ +/** + * Bridge between a language provider's `emitScopeCaptures` hook and the + * `ScopeExtractor` (RFC #909 Ring 2 PKG #920). + * + * Extracted into its own module so it can be imported by test code + * without pulling in `parse-worker.ts` — which has a top-level + * `parentPort!.on('message', ...)` call that assumes a worker-thread + * context and throws on direct import. + * + * The bridge: + * + * 1. Short-circuits when the provider has NOT implemented + * `emitScopeCaptures`. Returns `undefined`; zero work done. This is + * the state of every language today — `ParsedFile` production stays + * dormant until a language migrates. + * 2. Invokes the hook + feeds its output to `ScopeExtractor.extract`. + * 3. **Swallows exceptions from either side.** A failure here returns + * `undefined` and emits a warning via `onWarn`; legacy parsing on + * the same file continues unaffected by the scope-extraction miss. + * Scope-based resolution is the new path under construction — it + * must not destabilize the legacy DAG. + */ + +import type { ParsedFile } from 'gitnexus-shared'; +import { extract as extractScope } from './scope-extractor.js'; +import type { LanguageProvider } from './language-provider.js'; + +/** Callback used to report scope-extraction warnings to the host (worker or direct). */ +export type ScopeBridgeWarn = (message: string) => void; + +/** + * Produce a `ParsedFile` for the given file, or `undefined` when the + * provider hasn't migrated / the extractor throws. Never propagates + * exceptions. + */ +export function extractParsedFile( + provider: LanguageProvider, + sourceText: string, + filePath: string, + onWarn?: ScopeBridgeWarn, +): ParsedFile | undefined { + if (provider.emitScopeCaptures === undefined) return undefined; + try { + const captures = provider.emitScopeCaptures(sourceText, filePath); + return extractScope(captures, filePath, provider); + } catch (err) { + const message = `scope extraction failed for ${filePath}: ${ + err instanceof Error ? err.message : String(err) + }`; + if (onWarn !== undefined) onWarn(message); + else console.warn(message); + return undefined; + } +} diff --git a/gitnexus/src/core/ingestion/workers/parse-worker.ts b/gitnexus/src/core/ingestion/workers/parse-worker.ts index 203e027fae..8b56c1c160 100644 --- a/gitnexus/src/core/ingestion/workers/parse-worker.ts +++ b/gitnexus/src/core/ingestion/workers/parse-worker.ts @@ -77,6 +77,8 @@ import { buildCollisionGroups, } from '../utils/method-props.js'; import type { LanguageProvider } from '../language-provider.js'; +import type { ParsedFile } from 'gitnexus-shared'; +import { extractParsedFile } from '../scope-extractor-bridge.js'; // ============================================================================ // Types for serializable results @@ -269,6 +271,14 @@ export interface ParseWorkerResult { constructorBindings: FileConstructorBindings[]; /** All-scope type bindings from TypeEnv for BindingAccumulator (includes function-local). */ fileScopeBindings: FileScopeBindings[]; + /** + * Per-file `ParsedFile` artifacts from the new scope-based resolution + * pipeline (RFC #909 Ring 2). Empty unless the file's provider implements + * `emitScopeCaptures` — default for every language today, so this is + * additive and leaves the legacy DAG untouched. Consumed by #921's + * finalize-orchestrator. + */ + parsedFiles: ParsedFile[]; skippedLanguages: Record; fileCount: number; } @@ -711,6 +721,7 @@ const processBatch = ( ormQueries: [], constructorBindings: [], fileScopeBindings: [], + parsedFiles: [], skippedLanguages: {}, fileCount: 0, }; @@ -1396,11 +1407,24 @@ const processFileGroup = ( continue; } + const provider = getProvider(language); + + // RFC #909 Ring 2: produce a `ParsedFile` for the new scope-based + // resolution pipeline. No-op (returns undefined) for every language + // today — only fires once a provider implements `emitScopeCaptures`. + // Runs BEFORE legacy extraction and its result is independent: a + // failure here is caught inside `extractParsedFile` and does NOT + // affect the legacy DAG path that follows. + const parsedFile = extractParsedFile(provider, parseContent, file.path, (message) => { + if (parentPort) parentPort.postMessage({ type: 'warning', message }); + else console.warn(message); + }); + if (parsedFile !== undefined) result.parsedFiles.push(parsedFile); + // Pre-pass: extract heritage from query matches to build parentMap for buildTypeEnv. // Heritage edges (EXTENDS/IMPLEMENTS) are created by heritage-processor which runs // in PARALLEL with call-processor, so the graph edges don't exist when buildTypeEnv // runs. This pre-pass makes parent class information available for type resolution. - const provider = getProvider(language); const fileParentMap = new Map(); if (provider.heritageExtractor) { for (const match of matches) { @@ -2282,6 +2306,7 @@ let accumulated: ParseWorkerResult = { ormQueries: [], constructorBindings: [], fileScopeBindings: [], + parsedFiles: [], skippedLanguages: {}, fileCount: 0, }; @@ -2309,6 +2334,7 @@ const mergeResult = (target: ParseWorkerResult, src: ParseWorkerResult) => { appendAll(target.ormQueries, src.ormQueries); appendAll(target.constructorBindings, src.constructorBindings); appendAll(target.fileScopeBindings, src.fileScopeBindings); + appendAll(target.parsedFiles, src.parsedFiles); for (const [lang, count] of Object.entries(src.skippedLanguages)) { target.skippedLanguages[lang] = (target.skippedLanguages[lang] || 0) + count; } @@ -2360,6 +2386,7 @@ parentPort!.on('message', (msg: WorkerIncomingMessage) => { ormQueries: [], constructorBindings: [], fileScopeBindings: [], + parsedFiles: [], skippedLanguages: {}, fileCount: 0, }; diff --git a/gitnexus/test/unit/scope-resolution/parse-worker-scope-integration.test.ts b/gitnexus/test/unit/scope-resolution/parse-worker-scope-integration.test.ts new file mode 100644 index 0000000000..5d6959298c --- /dev/null +++ b/gitnexus/test/unit/scope-resolution/parse-worker-scope-integration.test.ts @@ -0,0 +1,166 @@ +/** + * Unit tests for `extractParsedFile` — the parse-worker → ScopeExtractor + * bridge (RFC #909 Ring 2 PKG #920). + * + * The goal is to pin three invariants: + * + * 1. When a provider does NOT implement `emitScopeCaptures`, the helper + * returns `undefined` silently. This is the state of every language + * today — `ParseWorkerResult.parsedFiles` stays empty and the legacy + * DAG continues unaffected. + * 2. When a provider DOES implement the hook, the helper threads its + * output through `ScopeExtractor.extract` and returns a `ParsedFile`. + * 3. Exceptions from either the hook or the extractor are caught + * locally. The helper returns `undefined` — scope-extraction + * failures must NEVER break legacy parsing on the same file. + */ + +import { describe, it, expect } from 'vitest'; +import type { Capture, CaptureMatch } from 'gitnexus-shared'; +import { extractParsedFile } from '../../../src/core/ingestion/scope-extractor-bridge.js'; +import type { LanguageProvider } from '../../../src/core/ingestion/language-provider.js'; + +// ─── Capture helpers ──────────────────────────────────────────────────────── + +const cap = ( + name: string, + startLine: number, + startCol: number, + endLine: number, + endCol: number, + text = '', +): Capture => ({ name, range: { startLine, startCol, endLine, endCol }, text }); + +const moduleScopeMatch = (): CaptureMatch => ({ + '@scope.module': cap('@scope.module', 1, 0, 100, 0), +}); + +/** + * Build a `LanguageProvider` whose shape is only as narrow as + * `extractParsedFile` reads. Tests cast to the full provider type since + * `extractParsedFile` is typed against `LanguageProvider` (not the narrow + * `ScopeExtractorHooks`); the real worker always has a full provider. + */ +function fakeProvider( + hooks: Partial< + Pick + >, +): LanguageProvider { + return hooks as unknown as LanguageProvider; +} + +// ─── Tests ───────────────────────────────────────────────────────────────── + +describe('extractParsedFile', () => { + describe('provider has NOT migrated (no emitScopeCaptures)', () => { + it('returns undefined — silent no-op for legacy languages', () => { + const provider = fakeProvider({}); // no hook + const result = extractParsedFile(provider, 'source text', 'src/file.ts'); + expect(result).toBeUndefined(); + }); + + it('never calls the scope extractor when the hook is absent — cannot throw', () => { + // If the extractor was wrongly invoked, it would complain about the + // missing Module scope for empty captures. This test proves the + // short-circuit actually fires. + const provider = fakeProvider({}); + expect(() => extractParsedFile(provider, '', 'x.ts')).not.toThrow(); + }); + }); + + describe('provider HAS migrated', () => { + it('threads emitScopeCaptures output through ScopeExtractor', () => { + const provider = fakeProvider({ + emitScopeCaptures: () => [moduleScopeMatch()], + }); + const result = extractParsedFile(provider, 'source text', 'src/file.ts'); + expect(result).toBeDefined(); + expect(result!.filePath).toBe('src/file.ts'); + expect(result!.scopes).toHaveLength(1); + expect(result!.scopes[0]!.kind).toBe('Module'); + }); + + it('forwards the correct arguments to emitScopeCaptures', () => { + let seenText: string | undefined; + let seenPath: string | undefined; + const provider = fakeProvider({ + emitScopeCaptures: (text, path) => { + seenText = text; + seenPath = path; + return [moduleScopeMatch()]; + }, + }); + extractParsedFile(provider, 'the real text', 'deep/path/file.ts'); + expect(seenText).toBe('the real text'); + expect(seenPath).toBe('deep/path/file.ts'); + }); + + it('honors provider hooks beyond emitScopeCaptures (shouldCreateScope)', () => { + // A Block scope the provider declines to create — the resulting + // ParsedFile should have only the Module scope, not the Block. + const provider = fakeProvider({ + emitScopeCaptures: () => [ + moduleScopeMatch(), + { '@scope.block': cap('@scope.block', 10, 0, 20, 0) }, + ], + shouldCreateScope: (match) => match['@scope.block'] === undefined, + }); + const result = extractParsedFile(provider, 'src', 'a.ts'); + expect(result!.scopes).toHaveLength(1); + expect(result!.scopes[0]!.kind).toBe('Module'); + }); + }); + + describe('error resilience — never breaks legacy parsing', () => { + it('returns undefined when emitScopeCaptures throws', () => { + const provider = fakeProvider({ + emitScopeCaptures: () => { + throw new Error('provider boom'); + }, + }); + const result = extractParsedFile(provider, 'src', 'a.ts'); + expect(result).toBeUndefined(); + }); + + it('routes errors through the onWarn callback when provided', () => { + const warnings: string[] = []; + const provider = fakeProvider({ + emitScopeCaptures: () => { + throw new Error('provider boom'); + }, + }); + const result = extractParsedFile(provider, 'src', 'path/to/file.ts', (msg) => { + warnings.push(msg); + }); + expect(result).toBeUndefined(); + expect(warnings).toHaveLength(1); + expect(warnings[0]).toContain('path/to/file.ts'); + expect(warnings[0]).toContain('provider boom'); + }); + + it('returns undefined when ScopeExtractor throws (missing Module scope)', () => { + // Emits a Class scope but no Module — extractor throws; helper + // swallows and returns undefined. Legacy parsing on the same file + // continues unaffected by this failure. + const provider = fakeProvider({ + emitScopeCaptures: () => [{ '@scope.class': cap('@scope.class', 5, 0, 10, 0) }], + }); + const result = extractParsedFile(provider, 'src', 'a.ts'); + expect(result).toBeUndefined(); + }); + + it('returns undefined when ScopeExtractor throws on malformed captures (overlap)', () => { + // Siblings with overlapping ranges trip the ScopeTreeInvariantError + // from #912. The helper catches it and returns undefined. + const provider = fakeProvider({ + emitScopeCaptures: () => [ + moduleScopeMatch(), + { '@scope.function': cap('@scope.function', 10, 0, 20, 0) }, + { '@scope.function': cap('@scope.function', 15, 0, 25, 0) }, // overlap + ], + }); + const result = extractParsedFile(provider, 'src', 'a.ts'); + expect(result).toBeUndefined(); + }); + }); +});