Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
a46eb85
fix(parse): survive non-cloneable worker results so large-repo analyz…
magyargergo Jun 10, 2026
e162ea5
Merge branch 'main' into fix/2112-large-repo-degradation
magyargergo Jun 10, 2026
b18585b
Merge branch 'main' into fix/2112-large-repo-degradation
magyargergo Jun 10, 2026
14f79c3
fix(parse): fail-closed clone-safety recovery + bound recursion depth…
magyargergo Jun 10, 2026
45c9f34
fix(parse): harden clone-safety against throwing getters and detached…
magyargergo Jun 10, 2026
fb8093e
fix(parse): memoize stripped copies so DAG-aliased records aren't ove…
magyargergo Jun 10, 2026
9c440b9
perf(parse): single-pass clone-safety scan preserving array identity …
magyargergo Jun 10, 2026
873938a
refactor(parse): drop unused generic + pin clone-safe field names to …
magyargergo Jun 10, 2026
04bbe0c
fix(parse): keep the per-file reason in the clone-safety skip log (#2…
magyargergo Jun 10, 2026
1840f1a
fix(parse): deterministic findFilePath attribution for ParsedNode (#2…
magyargergo Jun 10, 2026
c3f9c4a
fix(parse): zero skippedPaths in the slim cache result (#2135 review)
magyargergo Jun 10, 2026
bc4a0a0
test(parse): exercise real postResultCloneSafe wiring + tighten RED c…
magyargergo Jun 10, 2026
567684c
Merge branch 'main' into fix/2112-large-repo-degradation
magyargergo Jun 10, 2026
335b31a
Merge branch 'main' into fix/2112-large-repo-degradation
magyargergo Jun 10, 2026
c33e045
fix(parse): recover the clone-safety net from any post failure, not o…
magyargergo Jun 10, 2026
4327421
feat(parse): name the exact offending key path in the clone-skip diag…
magyargergo Jun 10, 2026
baa2382
test(parse): clone contract — a representative ParseWorkerResult is s…
magyargergo Jun 10, 2026
1c0a425
feat(parse): strict-mode clone gate (GITNEXUS_STRICT_CLONE) — fail lo…
magyargergo Jun 10, 2026
bd1756a
Merge branch 'main' into fix/2112-large-repo-degradation
magyargergo Jun 10, 2026
3a0985b
Merge branch 'main' into fix/2112-large-repo-degradation
magyargergo Jun 10, 2026
dd93e0e
fix(server): don't ship pipelineResult across the analyze-worker IPC …
magyargergo Jun 10, 2026
1922d50
feat(ingestion): Cloneable<T> + assertCloneable() compile-time clone-…
magyargergo Jun 10, 2026
2de7037
feat(ingestion): guard provider clone-boundary hooks with assertClone…
magyargergo Jun 10, 2026
106bb54
Merge branch 'main' into fix/2112-large-repo-degradation
magyargergo Jun 10, 2026
2402ada
fix(parse): scan an array's non-index own properties in the clone san…
magyargergo Jun 10, 2026
19c36ed
fix(parse): contain a throw inside the clone sanitizer instead of esc…
magyargergo Jun 10, 2026
a7f9dd4
fix(parse): add a final cloneable postcondition gate to the clone san…
magyargergo Jun 10, 2026
b304f1a
feat(parse): reject an `any`-typed member in the Cloneable<T> compile…
magyargergo Jun 10, 2026
0687ba6
fix(server): type the analyze-worker IPC projection as a Pick allowli…
magyargergo Jun 10, 2026
772ea75
refactor(parse): remove the now-dead isDataCloneError export (#2135 r…
magyargergo Jun 10, 2026
08afb92
refactor(parse): use the exported SkippedPath type in parsing-process…
magyargergo Jun 10, 2026
c0372fa
docs(parse): document the cloneable-return contract on the worker-bou…
magyargergo Jun 10, 2026
4f396b7
test(parse): assert the clone-skip telemetry surfaces in the GREEN in…
magyargergo Jun 10, 2026
920d988
test(server): cover the IPC projection against a real KnowledgeGraph …
magyargergo Jun 10, 2026
68e7543
test(parse): cover the unsalvageable-drop branch and the skippedPaths…
magyargergo Jun 10, 2026
73188af
style(parse): root-prettier format the clone-safety review-fix files …
magyargergo Jun 10, 2026
b500cdb
test(parse): avoid js/trivial-conditional in the type-level clone ass…
magyargergo Jun 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions gitnexus/src/core/ingestion/language-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,12 @@ interface LanguageProviderConfig {
* `undefined` when no constraints exist / the node isn't a templated
* function. Languages without SFINAE / concept semantics leave this
* undefined and the disambiguation is a pass-through.
*
* Cloneability contract: the returned payload crosses the worker boundary
* via structured clone, so it MUST be structured-clone-safe (no functions,
* symbols, or tree-sitter `SyntaxNode`s — only plain data). Wrap the return
* with `assertCloneable` from `workers/clone-safety.ts` so a future leak is a
* compile error at the source instead of a runtime DataCloneError (#2143).
*/
readonly extractTemplateConstraints?: (definitionNode: SyntaxNode) => unknown;

Expand Down Expand Up @@ -343,8 +349,12 @@ interface LanguageProviderConfig {
* disk store WITHOUT a main-thread re-parse. The main thread restores them
* via the matching `ScopeResolver.applyCaptureSideChannel` hook.
*
* MUST return plain data (objects / arrays / primitives) so it round-trips
* through `JSON.stringify` + the parsedfile-store interning reviver.
* Cloneability contract: MUST return plain data (objects / arrays /
* primitives — no functions, symbols, or tree-sitter `SyntaxNode`s) so it
* survives BOTH the worker→main structured clone AND `JSON.stringify` + the
* parsedfile-store interning reviver. Wrap the return with `assertCloneable`
* from `workers/clone-safety.ts` so a future non-serializable leak is a
* compile error at the source instead of a runtime DataCloneError (#2143).
*
* Default: undefined (provider has no capture-time module-level side effects).
*/
Expand Down
23 changes: 18 additions & 5 deletions gitnexus/src/core/ingestion/languages/c-cpp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ import {
cppReceiverBinding,
collectCppCaptureSideChannel,
} from './cpp/index.js';
import { extractCppTemplateConstraints } from './cpp/constraint-extractor.js';
import {
extractCppTemplateConstraints,
type CppConstraintPayload,
} from './cpp/constraint-extractor.js';
import { assertCloneable } from '../workers/clone-safety.js';

const C_BUILT_INS: ReadonlySet<string> = new Set([
'printf',
Expand Down Expand Up @@ -405,7 +409,11 @@ export const cProvider = defineLanguage({
// `static` functions look non-file-local on the main thread and leak into
// cross-file global free-call resolution / wildcard imports. See
// `c/capture-side-channel.ts`.
collectCaptureSideChannel: collectCStaticLinkageSideChannel,
// `assertCloneable` is a runtime identity; it makes a future non-serializable
// value in the side-channel payload a compile error here, at the source, rather
// than a DataCloneError at the worker boundary (#2143).
collectCaptureSideChannel: (filePath) =>
assertCloneable(collectCStaticLinkageSideChannel(filePath)),
interpretImport: interpretCImport,
interpretTypeBinding: interpretCTypeBinding,
bindingScopeFor: cBindingScopeFor,
Expand Down Expand Up @@ -480,7 +488,7 @@ export const cppProvider = defineLanguage({
// just populated for this file into plain data on `ParsedFile.captureSideChannel`,
// so the main thread can restore them via `applyCaptureSideChannel` WITHOUT a
// re-parse (#1983). See `cpp/capture-side-channel.ts`.
collectCaptureSideChannel: collectCppCaptureSideChannel,
collectCaptureSideChannel: (filePath) => assertCloneable(collectCppCaptureSideChannel(filePath)),
interpretImport: interpretCppImport,
interpretTypeBinding: interpretCppTypeBinding,
bindingScopeFor: cppBindingScopeFor,
Expand All @@ -501,7 +509,9 @@ export const cppProvider = defineLanguage({
* functions whose constraints the extractor can't model — both cases
* result in no constraint suffix on the node ID.
*/
function extractCppTemplateConstraintsForProvider(definitionNode: SyntaxNode): unknown {
function extractCppTemplateConstraintsForProvider(
definitionNode: SyntaxNode,
): CppConstraintPayload | undefined {
// Walk up to the enclosing template_declaration. Bound the walk so we
// can't accidentally land on a far-ancestor template_declaration that
// wraps an unrelated function.
Expand Down Expand Up @@ -530,5 +540,8 @@ function extractCppTemplateConstraintsForProvider(definitionNode: SyntaxNode): u
}
break;
}
return extractCppTemplateConstraints(templateDecl, declarator);
// Guard the boundary at the source: a future non-cloneable member of the
// constraint payload becomes a compile error here, not a runtime
// DataCloneError at the worker post (#2143).
return assertCloneable(extractCppTemplateConstraints(templateDecl, declarator));
}
7 changes: 6 additions & 1 deletion gitnexus/src/core/ingestion/languages/kotlin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { SupportedLanguages } from 'gitnexus-shared';
import { createClassExtractor } from '../class-extractors/generic.js';
import { kotlinClassConfig } from '../class-extractors/configs/jvm.js';
import { defineLanguage } from '../language-provider.js';
import { assertCloneable } from '../workers/clone-safety.js';
import { kotlinTypeConfig } from '../type-extractors/jvm.js';
import { kotlinExportChecker } from '../export-detection.js';
import { createImportResolver } from '../import-resolvers/resolver-factory.js';
Expand Down Expand Up @@ -182,7 +183,11 @@ export const kotlinProvider = defineLanguage({
// so the main thread can restore them via `applyCaptureSideChannel` WITHOUT a
// re-parse (#1983). Without this, companion/static dispatch emits no CALLS
// edges on the worker path. See `kotlin/capture-side-channel.ts`.
collectCaptureSideChannel: collectKotlinCaptureSideChannel,
// `assertCloneable` is a runtime identity; it makes a future non-serializable
// value in the side-channel payload a compile error here, at the source, rather
// than a DataCloneError at the worker boundary (#2143).
collectCaptureSideChannel: (filePath) =>
assertCloneable(collectKotlinCaptureSideChannel(filePath)),
interpretImport: interpretKotlinImport,
interpretTypeBinding: interpretKotlinTypeBinding,
bindingScopeFor: kotlinBindingScopeFor,
Expand Down
24 changes: 24 additions & 0 deletions gitnexus/src/core/ingestion/parsing-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { accumulateExportedTypesFromParsedNode, type ExportedTypeMap } from './c

import type { ParsedFile } from 'gitnexus-shared';
import { WorkerPool } from './workers/worker-pool.js';
import type { SkippedPath } from './workers/clone-safety.js';
import { logger } from '../logger.js';
import type {
ParseWorkerResult,
Expand Down Expand Up @@ -196,6 +197,29 @@ export const dispatchChunkParse = async (
logger.warn(` Skipped unsupported languages: ${summary}`);
}

// Clone-safety telemetry (#2112): files whose parse output carried a value
// the structured-clone algorithm couldn't serialize across the worker
// boundary. The worker sanitized/dropped the offending value so the run
// could complete; surface the (rare) data loss so it's visible and the
// offending extractor can be fixed at source.
const skippedPaths: SkippedPath[] = [];
for (const result of chunkResults) {
for (const entry of result.skippedPaths ?? []) skippedPaths.push(entry);
}
if (skippedPaths.length > 0) {
// Keep the per-file reason ("stripped N value(s) from nodes" /
// "dropped non-serializable parsedFiles entry") — it distinguishes a
// recoverable strip from a whole-record drop, which a path-only line loses.
const shown = skippedPaths
.slice(0, 10)
.map((e) => `${e.path} (${e.reason})`)
.join(', ');
const more = skippedPaths.length > 10 ? ` …and ${skippedPaths.length - 10} more` : '';
logger.warn(
` Sanitized ${skippedPaths.length} file(s) with non-serializable parse output: ${shown}${more}`,
);
}

onFileProgress?.(total, total, 'done');
return chunkResults;
};
Expand Down
Loading
Loading