abhigyanpatwari · magyargergo · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026 · Jun 3, 2026
@@ -16,11 +16,11 @@
     "_added": "#1956: c added to the scope-capture bench (was UNBENCHED). C has no inheritance \u2014 flat scale source. Adding it exposed + fixed a pre-existing O(n^2) findNodeAtRange root-walk in c/captures.ts (threaded c.node, byte-identical over c-* fixtures); scaling 3.475 -> 0.96."
   },
   "cpp": {
-    "fingerprint": "931bf7af55dc1480d1a5d3c479ea3803003a6a2e2c4406447bd96f3e312e88de",
+    "fingerprint": "e21e05c92870b82468b5d73f04d205b6aafad4143331cf718131f0517ba34e0a",
     "scaling_budget": 1.5,
     "_added": "#1956: cpp added to the scope-capture bench (was UNBENCHED). Heritage-bearing scale source (: public Base, public Mixin) drives emitCppInheritanceCaptures at scale. Adding it exposed + fixed a pre-existing O(n^2) findNodeAtRange root-walk in cpp/captures.ts (~12 sites, threaded c.node, byte-identical over 263 cpp-* fixtures); scaling 2.30 -> 1.12.",
     "_rebaselined": "#1965 / #1923 F4: uninitialized non-leading multi-declarators now emit @declaration.variable captures; cpp-adl-inner-callable-outer-noncallable data::Pair a, b adds the legitimate fixture drift. Linear (~1.06).",
-    "_note": "#1975: + cpp-out-of-line-class fixture (out-of-line struct Outer::Inner / Other::Inner). Pure fixture-corpus drift — the fix is the legacy structure-query qualified_identifier arm, NOT the cpp scope-extractor; existing fixtures' captures byte-identical. fixture_count 263->265."
+    "_note": "#1975: + cpp-out-of-line-class fixture, fixture_count 263->265. #1990: + cpp-adl-ns-plus-hidden-friend-same-name fixture (ADL hidden-friend + namespace-callable merge parity test). Pure fixture-corpus drift — no scope-extractor change; existing fixtures' captures byte-identical. fixture_count 265->267."
   },
   "csharp": {
     "_rebaselined": "#1956 synth-widening: + csharp-qualified-base fixture; the synth now walks record_declaration + struct_declaration base_lists and handles alias_qualified_name (matching the #1940 legacy leg), so record/struct heritage now emits. csharp-record-base gains a record inherits capture. (record->record SAME-namespace EXTENDS is a separate registry resolution gap, tracked as follow-up.) Linear (~1.00). (Earlier #1956: heritage-bearing scale source.)",

@@ -0,0 +1,18 @@
+#include "lib.h"
+
+// Both call sites use an unqualified name with a lib::T argument, so ordinary
+// lookup fails and ADL fires via T's associated namespace `lib`. `combine` is
+// only reachable as a hidden friend (friendCandidates); `process` only as a
+// namespace member (nsCandidates). Both must resolve — that is what proves
+// pickCppAdlCandidates consults BOTH buckets when merging.
+
+void call_friend() {
+  lib::T a;
+  lib::T b;
+  combine(a, b);
+}
+
+void call_ns() {
+  lib::T t;
+  process(t);
+}
@@ -0,0 +1,12 @@
+namespace lib {
+
+struct T {
+  // Hidden friend: a namespace-scope member of `lib` visible ONLY via ADL.
+  // Exercises the friendCandidates bucket.
+  friend void combine(T& a, T& b) {}
+};
+
+// Ordinary namespace-level callable. Exercises the nsCandidates bucket.
+void process(T& x) {}
+
+}
@@ -0,0 +1,169 @@
+/**
+ * C++ ADL (argument-dependent lookup) emit-scaling benchmark.
+ *
+ * Guards the optimization in PR #1990: `pickCppAdlCandidates` used to rescan all
+ * parsed files (and all workspace defs) once PER unresolved ADL call site —
+ * O(sites × files). It now queries a once-built index — O(sites). This benchmark
+ * reproduces the pathological shape (many unresolved ADL sites) and asserts the
+ * scope-resolution EMIT phase scales sub-quadratically.
+ *
+ * Run: GITNEXUS_BENCH=1 npx vitest run test/integration/cpp-adl-benchmark.test.ts
+ *
+ * WHY EMIT MS, NOT WALL TIME: the fixture is parsed single-threaded
+ * (workerPoolSize: 0, so no dist build is needed), and parse dominates total
+ * wall time — masking the ADL cost. We isolate the scope-resolution `emit` ms
+ * from the profiler log (captured in-process via the logger test destination).
+ *
+ * WHY CO-SCALE FILES AND SITES: the regression is O(sites × files). At fixed
+ * files, both the old and new code are linear in sites and indistinguishable.
+ * Scaling both with N makes the OLD cost O(N²) and the NEW cost O(N); the
+ * end-to-end emit ratio then separates them cleanly (linear ≈ Nratio,
+ * quadratic ≈ Nratio²). The guard sits at Nratio^1.5.
+ */
+import { describe, it, expect } from 'vitest';
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { runPipelineFromRepo } from '../../src/core/ingestion/pipeline.js';
+import { _captureLogger } from '../../src/core/logger.js';
+
+const BENCH_ENABLED = process.env.GITNEXUS_BENCH === '1';
+
+interface BenchResult {
+  fileCount: number;
+  siteCount: number;
+  elapsedMs: number;
+  emitMs: number;
+  peakHeapMB: number;
+  nodeCount: number;
+  callsResolved: number;
+}
+
+/**
+ * Generate a workspace of `fileCount` headers, each declaring its own namespace
+ * + struct, and one app.cpp with `siteCount` callers. Every caller makes a
+ * class-typed local and calls `ghost(...)` — a name declared NOWHERE — so
+ * ordinary lookup fails, ADL fires (the arg is class-typed), the index is
+ * scanned, and the site stays UNRESOLVED. That is the maximal-scan shape the
+ * optimization targets. Per-file work is constant; sites scale independently.
+ */
+function generateCppAdlFixture(fileCount: number, siteCount: number): { dir: string } {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), `cpp-adl-bench-${fileCount}-`));
+  for (let k = 0; k < fileCount; k++) {
+    const helpers = Array.from({ length: 3 }, (_, j) => `void helper${k}_${j}(T${k}& x) {}`).join(
+      '\n',
+    );
+    fs.writeFileSync(
+      path.join(dir, `lib_${k}.h`),
+      `namespace lib_${k} {\nstruct T${k} {};\n${helpers}\n}\n`,
+    );
+  }
+  const includes = Array.from({ length: fileCount }, (_, k) => `#include "lib_${k}.h"`).join('\n');
+  const callers = Array.from({ length: siteCount }, (_, i) => {
+    const k = i % fileCount;
+    return `void call_${i}() {\n  lib_${k}::T${k} t;\n  ghost(t);\n}`;
+  }).join('\n');
+  fs.writeFileSync(path.join(dir, 'app.cpp'), `${includes}\n\n${callers}\n`);
+  return { dir };
+}
+
+/** Largest `emit=<n>ms` across the captured scope-resolution profiler lines
+ *  (the C++ pass dominates). Returns NaN if no profiler line was captured. */
+function extractEmitMs(records: { msg?: string }[]): number {
+  let max = NaN;
+  for (const r of records) {
+    const m = /\[scope-resolution prof\].*emit=(\d+(?:\.\d+)?)ms/.exec(r.msg ?? '');
+    if (m) {
+      const v = Number(m[1]);
+      max = Number.isNaN(max) ? v : Math.max(max, v);
+    }
+  }
+  return max;
+}
+
+async function runBenchmark(fileCount: number, siteCount: number): Promise<BenchResult> {
+  const { dir } = generateCppAdlFixture(fileCount, siteCount);
+  let peakHeapMB = 0;
+  const heapSampler = setInterval(() => {
+    const heap = process.memoryUsage().heapUsed / 1024 / 1024;
+    if (heap > peakHeapMB) peakHeapMB = heap;
+  }, 50);
+
+  const prevProf = process.env.PROF_SCOPE_RESOLUTION;
+  process.env.PROF_SCOPE_RESOLUTION = '1';
+  const cap = _captureLogger();
+  try {
+    const start = Date.now();
+    const result = await runPipelineFromRepo(dir, () => {}, { workerPoolSize: 0 });
+    const elapsedMs = Date.now() - start;
+    const emitMs = extractEmitMs(cap.records());
+
+    let callsResolved = 0;
+    for (const rel of result.graph.iterRelationships()) {
+      if (rel.type === 'CALLS') callsResolved++;
+    }
+
+    return {
+      fileCount,
+      siteCount,
+      elapsedMs,
+      emitMs,
+      peakHeapMB: Math.round(peakHeapMB),
+      nodeCount: result.graph.nodeCount,
+      callsResolved,
+    };
+  } finally {
+    cap.restore();
+    if (prevProf === undefined) delete process.env.PROF_SCOPE_RESOLUTION;
+    else process.env.PROF_SCOPE_RESOLUTION = prevProf;
+    clearInterval(heapSampler);
+    fs.rmSync(dir, { recursive: true, force: true });
+  }
+}
+
+function printResults(results: BenchResult[]) {
+  console.log('\nC++ ADL emit-scaling benchmark (unresolved-site pattern)');
+  console.log('┌────────┬────────┬───────────┬──────────┬──────────┬───────┬───────────┐');
+  console.log('│ Files  │ Sites  │ Wall (ms) │ Emit (ms)│ Heap MB  │ Nodes │ CALLS res │');
+  console.log('├────────┼────────┼───────────┼──────────┼──────────┼───────┼───────────┤');
+  for (const r of results) {
+    console.log(
+      `│ ${String(r.fileCount).padStart(6)} │ ${String(r.siteCount).padStart(6)} │ ${String(r.elapsedMs).padStart(9)} │ ${String(Number.isNaN(r.emitMs) ? 'n/a' : Math.round(r.emitMs)).padStart(8)} │ ${String(r.peakHeapMB).padStart(8)} │ ${String(r.nodeCount).padStart(5)} │ ${String(r.callsResolved).padStart(9)} │`,
+    );
+  }
+  console.log('└────────┴────────┴───────────┴──────────┴──────────┴───────┴───────────┘');
+}
+
+describe.skipIf(!BENCH_ENABLED)('C++ ADL emit benchmark', () => {
+  it('emit phase scales sub-quadratically with co-scaled files and sites', async () => {
+    // files = N, sites = 6N. OLD emit O(sites × files) = O(6N²); NEW emit O(N).
+    const scales = [40, 80, 160];
+    const results: BenchResult[] = [];
+    for (const n of scales) {
+      results.push(await runBenchmark(n, n * 6));
+    }
+    printResults(results);
+
+    const first = results[0];
+    const last = results[results.length - 1];
+    const fileRatio = last.fileCount / first.fileCount;
+
+    // Primary guard: isolated emit ms. Linear ≈ fileRatio; quadratic ≈
+    // fileRatio². The threshold fileRatio^1.5 sits between them with margin for
+    // wall-clock/GC noise. Only applied when the profiler line was captured at
+    // both ends (otherwise the in-process capture is unavailable in this env).
+    if (!Number.isNaN(first.emitMs) && !Number.isNaN(last.emitMs) && first.emitMs > 0) {
+      const emitRatio = last.emitMs / first.emitMs;
+      expect(emitRatio).toBeLessThan(Math.pow(fileRatio, 1.5));
+    } else {
+      // Fallback: a coarse catastrophe guard on total wall (parse-dominated, so
+      // it only catches gross blow-ups, not the constant-factor ADL regression).
+      const wallRatio = last.elapsedMs / first.elapsedMs;
+      expect(wallRatio).toBeLessThan(Math.pow(fileRatio, 2));
+    }
+
+    // Sanity: the sites are intentionally unresolved (ghost is declared nowhere),
+    // so this benchmark stresses the scan path, not edge emission.
+    expect(last.callsResolved).toBe(0);
+  }, 600_000);
+});