Skip to content
Merged
4 changes: 2 additions & 2 deletions gitnexus/bench/scope-capture/baselines.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
"_added": "#1956: c added to the scope-capture bench (was UNBENCHED). C has no inheritance \u2014 flat scale source. Adding it exposed + fixed a pre-existing O(n^2) findNodeAtRange root-walk in c/captures.ts (threaded c.node, byte-identical over c-* fixtures); scaling 3.475 -> 0.96."
},
"cpp": {
"fingerprint": "931bf7af55dc1480d1a5d3c479ea3803003a6a2e2c4406447bd96f3e312e88de",
"fingerprint": "e21e05c92870b82468b5d73f04d205b6aafad4143331cf718131f0517ba34e0a",
"scaling_budget": 1.5,
"_added": "#1956: cpp added to the scope-capture bench (was UNBENCHED). Heritage-bearing scale source (: public Base, public Mixin) drives emitCppInheritanceCaptures at scale. Adding it exposed + fixed a pre-existing O(n^2) findNodeAtRange root-walk in cpp/captures.ts (~12 sites, threaded c.node, byte-identical over 263 cpp-* fixtures); scaling 2.30 -> 1.12.",
"_rebaselined": "#1965 / #1923 F4: uninitialized non-leading multi-declarators now emit @declaration.variable captures; cpp-adl-inner-callable-outer-noncallable data::Pair a, b adds the legitimate fixture drift. Linear (~1.06).",
"_note": "#1975: + cpp-out-of-line-class fixture (out-of-line struct Outer::Inner / Other::Inner). Pure fixture-corpus drift — the fix is the legacy structure-query qualified_identifier arm, NOT the cpp scope-extractor; existing fixtures' captures byte-identical. fixture_count 263->265."
"_note": "#1975: + cpp-out-of-line-class fixture, fixture_count 263->265. #1990: + cpp-adl-ns-plus-hidden-friend-same-name fixture (ADL hidden-friend + namespace-callable merge parity test). Pure fixture-corpus drift — no scope-extractor change; existing fixtures' captures byte-identical. fixture_count 265->267."
},
"csharp": {
"_rebaselined": "#1956 synth-widening: + csharp-qualified-base fixture; the synth now walks record_declaration + struct_declaration base_lists and handles alias_qualified_name (matching the #1940 legacy leg), so record/struct heritage now emits. csharp-record-base gains a record inherits capture. (record->record SAME-namespace EXTENDS is a separate registry resolution gap, tracked as follow-up.) Linear (~1.00). (Earlier #1956: heritage-bearing scale source.)",
Expand Down
454 changes: 312 additions & 142 deletions gitnexus/src/core/ingestion/languages/cpp/adl.ts

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include "lib.h"

// Both call sites use an unqualified name with a lib::T argument, so ordinary
// lookup fails and ADL fires via T's associated namespace `lib`. `combine` is
// only reachable as a hidden friend (friendCandidates); `process` only as a
// namespace member (nsCandidates). Both must resolve — that is what proves
// pickCppAdlCandidates consults BOTH buckets when merging.

void call_friend() {
lib::T a;
lib::T b;
combine(a, b);
}

void call_ns() {
lib::T t;
process(t);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace lib {

struct T {
// Hidden friend: a namespace-scope member of `lib` visible ONLY via ADL.
// Exercises the friendCandidates bucket.
friend void combine(T& a, T& b) {}
};

// Ordinary namespace-level callable. Exercises the nsCandidates bucket.
void process(T& x) {}

}
169 changes: 169 additions & 0 deletions gitnexus/test/integration/cpp-adl-benchmark.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
/**
* C++ ADL (argument-dependent lookup) emit-scaling benchmark.
*
* Guards the optimization in PR #1990: `pickCppAdlCandidates` used to rescan all
* parsed files (and all workspace defs) once PER unresolved ADL call site —
* O(sites × files). It now queries a once-built index — O(sites). This benchmark
* reproduces the pathological shape (many unresolved ADL sites) and asserts the
* scope-resolution EMIT phase scales sub-quadratically.
*
* Run: GITNEXUS_BENCH=1 npx vitest run test/integration/cpp-adl-benchmark.test.ts
*
* WHY EMIT MS, NOT WALL TIME: the fixture is parsed single-threaded
* (workerPoolSize: 0, so no dist build is needed), and parse dominates total
* wall time — masking the ADL cost. We isolate the scope-resolution `emit` ms
* from the profiler log (captured in-process via the logger test destination).
*
* WHY CO-SCALE FILES AND SITES: the regression is O(sites × files). At fixed
* files, both the old and new code are linear in sites and indistinguishable.
* Scaling both with N makes the OLD cost O(N²) and the NEW cost O(N); the
* end-to-end emit ratio then separates them cleanly (linear ≈ Nratio,
* quadratic ≈ Nratio²). The guard sits at Nratio^1.5.
*/
import { describe, it, expect } from 'vitest';
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { runPipelineFromRepo } from '../../src/core/ingestion/pipeline.js';
import { _captureLogger } from '../../src/core/logger.js';

const BENCH_ENABLED = process.env.GITNEXUS_BENCH === '1';

interface BenchResult {
fileCount: number;
siteCount: number;
elapsedMs: number;
emitMs: number;
peakHeapMB: number;
nodeCount: number;
callsResolved: number;
}

/**
* Generate a workspace of `fileCount` headers, each declaring its own namespace
* + struct, and one app.cpp with `siteCount` callers. Every caller makes a
* class-typed local and calls `ghost(...)` — a name declared NOWHERE — so
* ordinary lookup fails, ADL fires (the arg is class-typed), the index is
* scanned, and the site stays UNRESOLVED. That is the maximal-scan shape the
* optimization targets. Per-file work is constant; sites scale independently.
*/
function generateCppAdlFixture(fileCount: number, siteCount: number): { dir: string } {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), `cpp-adl-bench-${fileCount}-`));
for (let k = 0; k < fileCount; k++) {
const helpers = Array.from({ length: 3 }, (_, j) => `void helper${k}_${j}(T${k}& x) {}`).join(
'\n',
);
fs.writeFileSync(
path.join(dir, `lib_${k}.h`),
`namespace lib_${k} {\nstruct T${k} {};\n${helpers}\n}\n`,
);
}
const includes = Array.from({ length: fileCount }, (_, k) => `#include "lib_${k}.h"`).join('\n');
const callers = Array.from({ length: siteCount }, (_, i) => {
const k = i % fileCount;
return `void call_${i}() {\n lib_${k}::T${k} t;\n ghost(t);\n}`;
}).join('\n');
fs.writeFileSync(path.join(dir, 'app.cpp'), `${includes}\n\n${callers}\n`);
return { dir };
}

/** Largest `emit=<n>ms` across the captured scope-resolution profiler lines
* (the C++ pass dominates). Returns NaN if no profiler line was captured. */
function extractEmitMs(records: { msg?: string }[]): number {
let max = NaN;
for (const r of records) {
const m = /\[scope-resolution prof\].*emit=(\d+(?:\.\d+)?)ms/.exec(r.msg ?? '');
if (m) {
const v = Number(m[1]);
max = Number.isNaN(max) ? v : Math.max(max, v);
}
}
return max;
}

async function runBenchmark(fileCount: number, siteCount: number): Promise<BenchResult> {
const { dir } = generateCppAdlFixture(fileCount, siteCount);
let peakHeapMB = 0;
const heapSampler = setInterval(() => {
const heap = process.memoryUsage().heapUsed / 1024 / 1024;
if (heap > peakHeapMB) peakHeapMB = heap;
}, 50);

const prevProf = process.env.PROF_SCOPE_RESOLUTION;
process.env.PROF_SCOPE_RESOLUTION = '1';
const cap = _captureLogger();
try {
const start = Date.now();
const result = await runPipelineFromRepo(dir, () => {}, { workerPoolSize: 0 });
const elapsedMs = Date.now() - start;
const emitMs = extractEmitMs(cap.records());

let callsResolved = 0;
for (const rel of result.graph.iterRelationships()) {
if (rel.type === 'CALLS') callsResolved++;
}

return {
fileCount,
siteCount,
elapsedMs,
emitMs,
peakHeapMB: Math.round(peakHeapMB),
nodeCount: result.graph.nodeCount,
callsResolved,
};
} finally {
cap.restore();
if (prevProf === undefined) delete process.env.PROF_SCOPE_RESOLUTION;
else process.env.PROF_SCOPE_RESOLUTION = prevProf;
clearInterval(heapSampler);
fs.rmSync(dir, { recursive: true, force: true });
}
}

function printResults(results: BenchResult[]) {
console.log('\nC++ ADL emit-scaling benchmark (unresolved-site pattern)');
console.log('┌────────┬────────┬───────────┬──────────┬──────────┬───────┬───────────┐');
console.log('│ Files │ Sites │ Wall (ms) │ Emit (ms)│ Heap MB │ Nodes │ CALLS res │');
console.log('├────────┼────────┼───────────┼──────────┼──────────┼───────┼───────────┤');
for (const r of results) {
console.log(
`│ ${String(r.fileCount).padStart(6)} │ ${String(r.siteCount).padStart(6)} │ ${String(r.elapsedMs).padStart(9)} │ ${String(Number.isNaN(r.emitMs) ? 'n/a' : Math.round(r.emitMs)).padStart(8)} │ ${String(r.peakHeapMB).padStart(8)} │ ${String(r.nodeCount).padStart(5)} │ ${String(r.callsResolved).padStart(9)} │`,
);
}
console.log('└────────┴────────┴───────────┴──────────┴──────────┴───────┴───────────┘');
}

describe.skipIf(!BENCH_ENABLED)('C++ ADL emit benchmark', () => {
it('emit phase scales sub-quadratically with co-scaled files and sites', async () => {
// files = N, sites = 6N. OLD emit O(sites × files) = O(6N²); NEW emit O(N).
const scales = [40, 80, 160];
const results: BenchResult[] = [];
for (const n of scales) {
results.push(await runBenchmark(n, n * 6));
}
printResults(results);

const first = results[0];
const last = results[results.length - 1];
const fileRatio = last.fileCount / first.fileCount;

// Primary guard: isolated emit ms. Linear ≈ fileRatio; quadratic ≈
// fileRatio². The threshold fileRatio^1.5 sits between them with margin for
// wall-clock/GC noise. Only applied when the profiler line was captured at
// both ends (otherwise the in-process capture is unavailable in this env).
if (!Number.isNaN(first.emitMs) && !Number.isNaN(last.emitMs) && first.emitMs > 0) {
const emitRatio = last.emitMs / first.emitMs;
expect(emitRatio).toBeLessThan(Math.pow(fileRatio, 1.5));
} else {
// Fallback: a coarse catastrophe guard on total wall (parse-dominated, so
// it only catches gross blow-ups, not the constant-factor ADL regression).
const wallRatio = last.elapsedMs / first.elapsedMs;
expect(wallRatio).toBeLessThan(Math.pow(fileRatio, 2));
}

// Sanity: the sites are intentionally unresolved (ghost is declared nowhere),
// so this benchmark stresses the scan path, not edge emission.
expect(last.callsResolved).toBe(0);
}, 600_000);
});
Loading
Loading