Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions gitnexus/src/core/ingestion/language-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -321,12 +321,15 @@ interface LanguageProviderConfig {
* Providers that have not yet migrated continue to run through the
* legacy DAG path (feature-flagged per `REGISTRY_PRIMARY_<LANG>`).
*
* **Sync return.** Tree-sitter query execution and COBOL's regex
* tagger are both synchronous; no current or foreseeable provider
* needs async work inside this hook. The sync signature lets
* `parse-worker.ts` (#920) invoke it inline in its already-sync
* per-file loop without cascading `async` through the batch pipeline.
*
* Default: undefined (language continues to use legacy DAG).
*/
readonly emitScopeCaptures?: (
sourceText: string,
filePath: string,
) => Promise<readonly CaptureMatch[]>;
readonly emitScopeCaptures?: (sourceText: string, filePath: string) => readonly CaptureMatch[];

/**
* Interpret a raw `@import.statement` capture group into a `ParsedImport`.
Expand Down
17 changes: 17 additions & 0 deletions gitnexus/src/core/ingestion/parsing-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import {
buildCollisionGroups,
} from './utils/method-props.js';
import type { LanguageProvider } from './language-provider.js';
import type { ParsedFile } from 'gitnexus-shared';
import { WorkerPool } from './workers/worker-pool.js';
import type {
ParseWorkerResult,
Expand Down Expand Up @@ -62,6 +63,14 @@ export interface WorkerExtractedData {
ormQueries: ExtractedORMQuery[];
constructorBindings: FileConstructorBindings[];
fileScopeBindings: FileScopeBindings[];
/**
* Per-file `ParsedFile` artifacts from the new scope-based resolution
* pipeline (RFC #909 Ring 2). Empty until a provider implements
* `emitScopeCaptures` — additive to the legacy DAG path. Aggregated
* from every worker chunk; consumed downstream by #921's
* finalize-orchestrator.
*/
parsedFiles: ParsedFile[];
}

// ============================================================================
Expand Down Expand Up @@ -96,6 +105,7 @@ const processParsingWithWorkers = async (
ormQueries: [],
constructorBindings: [],
fileScopeBindings: [],
parsedFiles: [],
};

const total = files.length;
Expand All @@ -120,6 +130,7 @@ const processParsingWithWorkers = async (
const allORMQueries: ExtractedORMQuery[] = [];
const allConstructorBindings: FileConstructorBindings[] = [];
const fileScopeBindingsByFile: FileScopeBindings[] = [];
const allParsedFiles: ParsedFile[] = [];
for (const result of chunkResults) {
for (const node of result.nodes) {
graph.addNode({
Expand Down Expand Up @@ -157,6 +168,11 @@ const processParsingWithWorkers = async (
for (const item of result.constructorBindings) allConstructorBindings.push(item);
if (result.fileScopeBindings)
for (const item of result.fileScopeBindings) fileScopeBindingsByFile.push(item);
// RFC #909 Ring 2: aggregate per-file scope artifacts. Tolerant of
// workers that don't emit the field yet (older worker builds or
// partial rollouts), since the additive contract means undefined =
// "this worker produced no ParsedFiles for this chunk".
if (result.parsedFiles) for (const item of result.parsedFiles) allParsedFiles.push(item);
}

// Merge and log skipped languages from workers
Expand Down Expand Up @@ -187,6 +203,7 @@ const processParsingWithWorkers = async (
ormQueries: allORMQueries,
constructorBindings: allConstructorBindings,
fileScopeBindings: fileScopeBindingsByFile,
parsedFiles: allParsedFiles,
};
};

Expand Down
54 changes: 54 additions & 0 deletions gitnexus/src/core/ingestion/scope-extractor-bridge.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/**
* Bridge between a language provider's `emitScopeCaptures` hook and the
* `ScopeExtractor` (RFC #909 Ring 2 PKG #920).
*
* Extracted into its own module so it can be imported by test code
* without pulling in `parse-worker.ts` — which has a top-level
* `parentPort!.on('message', ...)` call that assumes a worker-thread
* context and throws on direct import.
*
* The bridge:
*
* 1. Short-circuits when the provider has NOT implemented
* `emitScopeCaptures`. Returns `undefined`; zero work done. This is
* the state of every language today — `ParsedFile` production stays
* dormant until a language migrates.
* 2. Invokes the hook + feeds its output to `ScopeExtractor.extract`.
* 3. **Swallows exceptions from either side.** A failure here returns
* `undefined` and emits a warning via `onWarn`; legacy parsing on
* the same file continues unaffected by the scope-extraction miss.
* Scope-based resolution is the new path under construction — it
* must not destabilize the legacy DAG.
*/

import type { ParsedFile } from 'gitnexus-shared';
import { extract as extractScope } from './scope-extractor.js';
import type { LanguageProvider } from './language-provider.js';

/** Callback used to report scope-extraction warnings to the host (worker or direct). */
export type ScopeBridgeWarn = (message: string) => void;

/**
* Produce a `ParsedFile` for the given file, or `undefined` when the
* provider hasn't migrated / the extractor throws. Never propagates
* exceptions.
*/
export function extractParsedFile(
provider: LanguageProvider,
sourceText: string,
filePath: string,
onWarn?: ScopeBridgeWarn,
): ParsedFile | undefined {
if (provider.emitScopeCaptures === undefined) return undefined;
try {
const captures = provider.emitScopeCaptures(sourceText, filePath);
return extractScope(captures, filePath, provider);
} catch (err) {
const message = `scope extraction failed for ${filePath}: ${
err instanceof Error ? err.message : String(err)
}`;
if (onWarn !== undefined) onWarn(message);
else console.warn(message);
return undefined;
}
}
29 changes: 28 additions & 1 deletion gitnexus/src/core/ingestion/workers/parse-worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ import {
buildCollisionGroups,
} from '../utils/method-props.js';
import type { LanguageProvider } from '../language-provider.js';
import type { ParsedFile } from 'gitnexus-shared';
import { extractParsedFile } from '../scope-extractor-bridge.js';

// ============================================================================
// Types for serializable results
Expand Down Expand Up @@ -269,6 +271,14 @@ export interface ParseWorkerResult {
constructorBindings: FileConstructorBindings[];
/** All-scope type bindings from TypeEnv for BindingAccumulator (includes function-local). */
fileScopeBindings: FileScopeBindings[];
/**
* Per-file `ParsedFile` artifacts from the new scope-based resolution
* pipeline (RFC #909 Ring 2). Empty unless the file's provider implements
* `emitScopeCaptures` — default for every language today, so this is
* additive and leaves the legacy DAG untouched. Consumed by #921's
* finalize-orchestrator.
*/
parsedFiles: ParsedFile[];
skippedLanguages: Record<string, number>;
fileCount: number;
}
Expand Down Expand Up @@ -711,6 +721,7 @@ const processBatch = (
ormQueries: [],
constructorBindings: [],
fileScopeBindings: [],
parsedFiles: [],
skippedLanguages: {},
fileCount: 0,
};
Expand Down Expand Up @@ -1396,11 +1407,24 @@ const processFileGroup = (
continue;
}

const provider = getProvider(language);

// RFC #909 Ring 2: produce a `ParsedFile` for the new scope-based
// resolution pipeline. No-op (returns undefined) for every language
// today — only fires once a provider implements `emitScopeCaptures`.
// Runs BEFORE legacy extraction and its result is independent: a
// failure here is caught inside `extractParsedFile` and does NOT
// affect the legacy DAG path that follows.
const parsedFile = extractParsedFile(provider, parseContent, file.path, (message) => {
if (parentPort) parentPort.postMessage({ type: 'warning', message });
else console.warn(message);
});
if (parsedFile !== undefined) result.parsedFiles.push(parsedFile);

// Pre-pass: extract heritage from query matches to build parentMap for buildTypeEnv.
// Heritage edges (EXTENDS/IMPLEMENTS) are created by heritage-processor which runs
// in PARALLEL with call-processor, so the graph edges don't exist when buildTypeEnv
// runs. This pre-pass makes parent class information available for type resolution.
const provider = getProvider(language);
const fileParentMap = new Map<string, string[]>();
if (provider.heritageExtractor) {
for (const match of matches) {
Expand Down Expand Up @@ -2282,6 +2306,7 @@ let accumulated: ParseWorkerResult = {
ormQueries: [],
constructorBindings: [],
fileScopeBindings: [],
parsedFiles: [],
skippedLanguages: {},
fileCount: 0,
};
Expand Down Expand Up @@ -2309,6 +2334,7 @@ const mergeResult = (target: ParseWorkerResult, src: ParseWorkerResult) => {
appendAll(target.ormQueries, src.ormQueries);
appendAll(target.constructorBindings, src.constructorBindings);
appendAll(target.fileScopeBindings, src.fileScopeBindings);
appendAll(target.parsedFiles, src.parsedFiles);
for (const [lang, count] of Object.entries(src.skippedLanguages)) {
target.skippedLanguages[lang] = (target.skippedLanguages[lang] || 0) + count;
}
Expand Down Expand Up @@ -2360,6 +2386,7 @@ parentPort!.on('message', (msg: WorkerIncomingMessage) => {
ormQueries: [],
constructorBindings: [],
fileScopeBindings: [],
parsedFiles: [],
skippedLanguages: {},
fileCount: 0,
};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/**
* Unit tests for `extractParsedFile` — the parse-worker → ScopeExtractor
* bridge (RFC #909 Ring 2 PKG #920).
*
* The goal is to pin three invariants:
*
* 1. When a provider does NOT implement `emitScopeCaptures`, the helper
* returns `undefined` silently. This is the state of every language
* today — `ParseWorkerResult.parsedFiles` stays empty and the legacy
* DAG continues unaffected.
* 2. When a provider DOES implement the hook, the helper threads its
* output through `ScopeExtractor.extract` and returns a `ParsedFile`.
* 3. Exceptions from either the hook or the extractor are caught
* locally. The helper returns `undefined` — scope-extraction
* failures must NEVER break legacy parsing on the same file.
*/

import { describe, it, expect } from 'vitest';
import type { Capture, CaptureMatch } from 'gitnexus-shared';
import { extractParsedFile } from '../../../src/core/ingestion/scope-extractor-bridge.js';
import type { LanguageProvider } from '../../../src/core/ingestion/language-provider.js';

// ─── Capture helpers ────────────────────────────────────────────────────────

const cap = (
name: string,
startLine: number,
startCol: number,
endLine: number,
endCol: number,
text = '',
): Capture => ({ name, range: { startLine, startCol, endLine, endCol }, text });

const moduleScopeMatch = (): CaptureMatch => ({
'@scope.module': cap('@scope.module', 1, 0, 100, 0),
});

/**
* Build a `LanguageProvider` whose shape is only as narrow as
* `extractParsedFile` reads. Tests cast to the full provider type since
* `extractParsedFile` is typed against `LanguageProvider` (not the narrow
* `ScopeExtractorHooks`); the real worker always has a full provider.
*/
function fakeProvider(
hooks: Partial<
Pick<LanguageProvider, 'emitScopeCaptures' | 'shouldCreateScope' | 'resolveScopeKind'>
>,
): LanguageProvider {
return hooks as unknown as LanguageProvider;
}

// ─── Tests ─────────────────────────────────────────────────────────────────

describe('extractParsedFile', () => {
describe('provider has NOT migrated (no emitScopeCaptures)', () => {
it('returns undefined — silent no-op for legacy languages', () => {
const provider = fakeProvider({}); // no hook
const result = extractParsedFile(provider, 'source text', 'src/file.ts');
expect(result).toBeUndefined();
});

it('never calls the scope extractor when the hook is absent — cannot throw', () => {
// If the extractor was wrongly invoked, it would complain about the
// missing Module scope for empty captures. This test proves the
// short-circuit actually fires.
const provider = fakeProvider({});
expect(() => extractParsedFile(provider, '', 'x.ts')).not.toThrow();
});
});

describe('provider HAS migrated', () => {
it('threads emitScopeCaptures output through ScopeExtractor', () => {
const provider = fakeProvider({
emitScopeCaptures: () => [moduleScopeMatch()],
});
const result = extractParsedFile(provider, 'source text', 'src/file.ts');
expect(result).toBeDefined();
expect(result!.filePath).toBe('src/file.ts');
expect(result!.scopes).toHaveLength(1);
expect(result!.scopes[0]!.kind).toBe('Module');
});

it('forwards the correct arguments to emitScopeCaptures', () => {
let seenText: string | undefined;
let seenPath: string | undefined;
const provider = fakeProvider({
emitScopeCaptures: (text, path) => {
seenText = text;
seenPath = path;
return [moduleScopeMatch()];
},
});
extractParsedFile(provider, 'the real text', 'deep/path/file.ts');
expect(seenText).toBe('the real text');
expect(seenPath).toBe('deep/path/file.ts');
});

it('honors provider hooks beyond emitScopeCaptures (shouldCreateScope)', () => {
// A Block scope the provider declines to create — the resulting
// ParsedFile should have only the Module scope, not the Block.
const provider = fakeProvider({
emitScopeCaptures: () => [
moduleScopeMatch(),
{ '@scope.block': cap('@scope.block', 10, 0, 20, 0) },
],
shouldCreateScope: (match) => match['@scope.block'] === undefined,
});
const result = extractParsedFile(provider, 'src', 'a.ts');
expect(result!.scopes).toHaveLength(1);
expect(result!.scopes[0]!.kind).toBe('Module');
});
});

describe('error resilience — never breaks legacy parsing', () => {
it('returns undefined when emitScopeCaptures throws', () => {
const provider = fakeProvider({
emitScopeCaptures: () => {
throw new Error('provider boom');
},
});
const result = extractParsedFile(provider, 'src', 'a.ts');
expect(result).toBeUndefined();
});

it('routes errors through the onWarn callback when provided', () => {
const warnings: string[] = [];
const provider = fakeProvider({
emitScopeCaptures: () => {
throw new Error('provider boom');
},
});
const result = extractParsedFile(provider, 'src', 'path/to/file.ts', (msg) => {
warnings.push(msg);
});
expect(result).toBeUndefined();
expect(warnings).toHaveLength(1);
expect(warnings[0]).toContain('path/to/file.ts');
expect(warnings[0]).toContain('provider boom');
});

it('returns undefined when ScopeExtractor throws (missing Module scope)', () => {
// Emits a Class scope but no Module — extractor throws; helper
// swallows and returns undefined. Legacy parsing on the same file
// continues unaffected by this failure.
const provider = fakeProvider({
emitScopeCaptures: () => [{ '@scope.class': cap('@scope.class', 5, 0, 10, 0) }],
});
const result = extractParsedFile(provider, 'src', 'a.ts');
expect(result).toBeUndefined();
});

it('returns undefined when ScopeExtractor throws on malformed captures (overlap)', () => {
// Siblings with overlapping ranges trip the ScopeTreeInvariantError
// from #912. The helper catches it and returns undefined.
const provider = fakeProvider({
emitScopeCaptures: () => [
moduleScopeMatch(),
{ '@scope.function': cap('@scope.function', 10, 0, 20, 0) },
{ '@scope.function': cap('@scope.function', 15, 0, 25, 0) }, // overlap
],
});
const result = extractParsedFile(provider, 'src', 'a.ts');
expect(result).toBeUndefined();
});
});
});
Loading