diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml index aa16f3c6b..73b23b468 100644 --- a/.github/workflows/benchmark.yaml +++ b/.github/workflows/benchmark.yaml @@ -1,15 +1,24 @@ name: benchmark # Runs the benchmark matrix on every PR targeting main, on pushes to main -# (baseline refresh), and on manual dispatch. The job compares PR ops/s -# against the latest main baseline and flags >5% throughput regressions -# and >10% memory regressions. +# (baseline refresh), and on manual dispatch. +# +# Variance control: the PR job benchmarks BOTH origin/main and the PR +# branch on the SAME runner within the SAME workflow invocation. This +# eliminates cross-host variance (different physical CPUs, SMT +# neighbours, thermal states) that otherwise leaks past taskset pinning +# plus median-of-5 and produces 5-7% false-positive regressions. The +# comparison is then host-identical — every factor except the code under +# test is held constant. # # Storage model (see benchmarks/baselines/README.md): -# - Artifacts hold the authoritative JSON for trend history (90-day -# retention for PRs, 365 days for the main baseline). -# - `benchmarks/baselines/main.json` is an in-repo quick-reference copy -# that gets refreshed by a follow-up chore PR after a merge to main. +# - `bench-baseline-main` artifact is still uploaded on every push to +# main so external consumers / trend dashboards keep working. PR +# comparison no longer reads from it. +# - `bench-results-` artifact holds the PR's own numbers +# with 90-day retention for post-hoc investigation. +# - `benchmarks/baselines/main.json` remains the in-repo +# quick-reference copy refreshed by a follow-up chore PR. on: pull_request: @@ -41,14 +50,18 @@ jobs: bench-matrix: name: bench-matrix (${{ github.event_name }}) runs-on: ubuntu-latest - timeout-minutes: 25 + # Two full bench passes (baseline + current) with median-of-5 take + # roughly 2x the single-pass time. Previous single-pass runs landed + # around 9 min; 40 min buffer covers the doubled work plus install + + # build overhead even on a slow runner. + timeout-minutes: 40 steps: - name: Checkout uses: actions/checkout@v6 with: - # Enough history that we can also check out the base branch for - # the baseline comparison pass if the artifact download fails. + # Enough history that we can also check out origin/main for the + # same-runner baseline pass. fetch-depth: 0 - name: Setup Node ${{ env.NODE_VERSION }} @@ -57,18 +70,66 @@ jobs: node-version: ${{ env.NODE_VERSION }} cache: "npm" - - name: Install + - name: Record commit SHAs + id: shas + run: | + set -euo pipefail + echo "current=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" + if [[ "${{ github.event_name }}" == "pull_request" ]]; then + # fetch-depth: 0 above should already have origin/main, but + # an explicit fetch makes the resolve idempotent on shallow + # edge cases. + git fetch --no-tags --depth=1 origin main + echo "main=$(git rev-parse origin/main)" >> "$GITHUB_OUTPUT" + fi + + # ---------------------------------------------------------------- + # Same-runner baseline (PR only). Check out origin/main, install, + # build, and run the matrix. Output is `baseline-results.json`. + # ---------------------------------------------------------------- + - name: Checkout main (for baseline) + if: github.event_name == 'pull_request' + run: git -c advice.detachedHead=false checkout ${{ steps.shas.outputs.main }} -- + + - name: Install (main) + if: github.event_name == 'pull_request' run: npm ci --ignore-scripts env: HUSKY: 0 - - name: Build @bufbuild/protobuf + - name: Build @bufbuild/protobuf (main) + if: github.event_name == 'pull_request' run: npx turbo run build --filter=@bufbuild/protobuf - - name: Generate benchmark code (proto + pbjs) + - name: Generate benchmark code (main) + if: github.event_name == 'pull_request' run: npx turbo run generate --filter=@bufbuild/protobuf-benchmarks - - name: Run benchmark matrix + - name: Run benchmark matrix (baseline / main) + if: github.event_name == 'pull_request' + working-directory: benchmarks + run: bash scripts/run-matrix-ci.sh baseline-results.json + + - name: Return to PR head + if: github.event_name == 'pull_request' + run: git -c advice.detachedHead=false checkout ${{ steps.shas.outputs.current }} -- + + # ---------------------------------------------------------------- + # Current run. For PRs this is the PR merge commit; for push-to-main + # it is main itself (and becomes the new baseline artifact). + # ---------------------------------------------------------------- + - name: Install (current) + run: npm ci --ignore-scripts + env: + HUSKY: 0 + + - name: Build @bufbuild/protobuf (current) + run: npx turbo run build --filter=@bufbuild/protobuf + + - name: Generate benchmark code (current) + run: npx turbo run generate --filter=@bufbuild/protobuf-benchmarks + + - name: Run benchmark matrix (current) working-directory: benchmarks run: bash scripts/run-matrix-ci.sh bench-results.json @@ -79,59 +140,22 @@ jobs: path: benchmarks/bench-results.json retention-days: 90 - # ------------------------------------------------------------------ - # Baseline acquisition (PR only). For `push` to main, the PR run - # becomes the new baseline — the artifact upload below is sufficient. - # ------------------------------------------------------------------ - - - name: Download latest main baseline artifact - if: github.event_name == 'pull_request' - id: dl-baseline - uses: dawidd6/action-download-artifact@v6 - continue-on-error: true - with: - workflow: benchmark.yaml - branch: main - name: bench-baseline-main - path: benchmarks/baseline-download - search_artifacts: true - if_no_artifact_found: warn - - - name: Resolve baseline source - if: github.event_name == 'pull_request' - run: | - set -euo pipefail - if [[ -f benchmarks/baseline-download/bench-results.json ]]; then - cp benchmarks/baseline-download/bench-results.json benchmarks/baseline-results.json - echo "Using downloaded main artifact as baseline." - elif [[ -f benchmarks/baselines/main.json ]]; then - cp benchmarks/baselines/main.json benchmarks/baseline-results.json - echo "Using in-repo benchmarks/baselines/main.json as baseline." - else - echo "No baseline available — compare step will emit an informational report." - fi - + # ---------------------------------------------------------------- + # Compare and comment (PR only). Baseline is the JSON we just + # produced on this same runner a few minutes ago. + # ---------------------------------------------------------------- - name: Compare PR against baseline if: github.event_name == 'pull_request' id: compare working-directory: benchmarks run: | set -euo pipefail - if [[ -f baseline-results.json ]]; then - npx tsx scripts/compare-results.ts \ - --baseline=baseline-results.json \ - --current=bench-results.json \ - --output=bench-report.md \ - --threshold-ops=5 \ - --threshold-mem=10 - else - npx tsx scripts/compare-results.ts \ - --current=bench-results.json \ - --output=bench-report.md \ - --threshold-ops=5 \ - --threshold-mem=10 \ - --no-baseline - fi + npx tsx scripts/compare-results.ts \ + --baseline=baseline-results.json \ + --current=bench-results.json \ + --output=bench-report.md \ + --threshold-ops=5 \ + --threshold-mem=10 if grep -q "REGRESSION" bench-report.md 2>/dev/null; then echo "status=regression" >> "$GITHUB_OUTPUT" else @@ -150,12 +174,10 @@ jobs: run: | echo "::warning::Benchmark matrix flagged a regression. See the PR comment for the full table." - # ------------------------------------------------------------------ - # Baseline refresh (push-to-main only). The PR run becomes the new - # authoritative baseline and gets uploaded as a stable-named artifact - # so subsequent PR jobs can pull it. - # ------------------------------------------------------------------ - + # ---------------------------------------------------------------- + # Baseline refresh (push-to-main only). Uploaded for external / + # historical consumers; PR jobs no longer read from it. + # ---------------------------------------------------------------- - name: Upload baseline artifact (main only) if: github.event_name == 'push' && github.ref == 'refs/heads/main' uses: actions/upload-artifact@v4 diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore index 7c9a233e5..2b72def35 100644 --- a/benchmarks/.gitignore +++ b/benchmarks/.gitignore @@ -3,5 +3,7 @@ src/gen-protobufjs node_modules dist bench-results.json +baseline-results.json +bench-report.md bench-streaming-results.json .heap-profs diff --git a/benchmarks/README.md b/benchmarks/README.md index 5863de41c..c0400ed7b 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -42,7 +42,7 @@ npm run bench:heap-prof -w @bufbuild/protobuf-benchmarks | `bench-comparison-protobufjs.ts` | Cross-library comparison: protobuf-es vs `protobufjs` (pbjs static codegen) on the same `.proto` fixture. Covers full roundtrip, encode-only, decode-only | | `bench-matrix.ts` | `toBinary` + `fromBinary` across the full realistic-fixture matrix (OTel traces/metrics/logs, K8s Pod list, GraphQL request/response, RPC envelope, stress). Emits a JSON summary on stdout for CI diffing | | `bench-memory.ts` | Heap allocations per operation (`heapUsed` delta after forced GC) for both libraries. Requires `--expose-gc` | -| `bench-streaming.ts` | gRPC-style streaming encode throughput via `sizeDelimitedEncode`. Three stream shapes (small/medium/large) × three encoders (`toBinary`, `toBinaryFast`, `protobufjs encodeDelimited`). Emits `bench-streaming-results.json` with ops/sec + MB/s | +| `bench-streaming.ts` | gRPC-style streaming encode throughput via `sizeDelimitedEncode`. Three stream shapes (small/medium/large) × two encoders (`toBinary`, `protobufjs encodeDelimited`). Emits `bench-streaming-results.json` with ops/sec + MB/s | | `heap-prof-driver.ts` + `scripts/analyze-heap-prof.ts` | Per-call-site allocation attribution via V8's sampling heap profiler. Replaces the coarse `heapUsed` delta in `bench-memory.ts` with function/file-level bytes | ## Methodology @@ -121,34 +121,68 @@ table below. See `src/report.ts` for the generator, for the per-fixture protobufjs adapters. The log-scale chart below shows absolute throughput per fixture across -`toBinary`, `toBinaryFast`, and `protobufjs` (pbjs static-module codegen). -Numeric labels above each bar carry the ops/sec figure so the legend -cross-references the table without requiring a second lookup. +three encoders: + +- **`upstream-protobuf-es`** — unmodified `@bufbuild/protobuf@latest` + published on npm, installed via the `upstream-protobuf-es` alias. This + is the pre-L0 baseline; the red column represents how original + protobuf-es performs. +- **`toBinary` (fork)** — this fork's in-tree `toBinary`, which already + ships the L0 contiguous-writer optimisation (PR #8). Byte-identical + output with upstream; public API unchanged. +- **`protobufjs`** — ahead-of-time codegen via pbjs static-module, included + as a cross-library reference. + +The experimental L1+L2 schema-plan encoder (`toBinaryFast`) is intentionally +not shown here — it lives on branch +`archive/l1-l2-schema-plans-experimental` for future iteration (see the +"Current state" note below). Numeric labels above each bar carry the +ops/sec figure so the legend cross-references the table without requiring +a second lookup. ![chart](./chart.svg) -The linear-scale delta chart shows `toBinaryFast`'s percentage speedup over -each baseline per fixture — this is the view to read for "how much faster, -in plain terms". Positive bars indicate `toBinaryFast` is faster; negative -bars indicate the baseline is ahead on that fixture. Both baselines -(`toBinary`, `protobufjs`) are drawn where available. +The linear-scale delta chart shows the fork's `toBinary` percentage +speedup over each baseline per fixture — this is the view to read for +"how much faster, in plain terms". Positive bars indicate the fork's +`toBinary` is faster; negative bars indicate the baseline is ahead on +that fixture. "vs upstream" is the honest cumulative gain over original +`@bufbuild/protobuf@latest`; "vs protobufjs" is the cross-library +reference. ![chart-delta](./chart-delta.svg) +### Current state + +Main ships `toBinary` only (L0 contiguous writer, PR #8). The +experimental L1+L2 schema-plan work (`toBinaryFast`) was prototyped on +branches `feat/l1-l2-schema-plans` and +`feat/merge-l1-l2-into-tobinary` — merging it into the public `toBinary` +broke three extension-related unit tests and six conformance cases (the +plan walk does not cover extension encoding). That work is preserved on +branch `archive/l1-l2-schema-plans-experimental` for future iteration +and is intentionally absent from this chart and from the public API on +main. + +The `toBinary` column here is this fork's L0-optimised writer, compared +against upstream `@bufbuild/protobuf@latest` (the red column) which is +the pre-L0 reflective baseline. That comparison is the honest measure +of what main delivers today. + -| Fixture | Bytes | toBinary | toBinaryFast | protobufjs | Best | -| ---------------------------------- | -----: | -------: | -----------: | ---------: | -------------------- | -| SimpleMessage | 19 | 923,094 | 1.02M | 2.17M | protobufjs (2.13x) | -| ExportTraceRequest (100 spans) | 32,926 | 1,299 | 1,352 | 1,194 | toBinaryFast (1.04x) | -| ExportMetricsRequest (50 series) | 17,696 | 1,722 | 2,081 | 1,939 | toBinaryFast (1.07x) | -| ExportLogsRequest (100 records) | 21,319 | 1,434 | 1,617 | 1,768 | protobufjs (1.09x) | -| K8sPodList (20 pods) | 28,900 | 1,191 | 1,185 | 1,834 | protobufjs (1.54x) | -| GraphQLRequest | 624 | 91,755 | 79,812 | 318,278 | protobufjs (3.47x) | -| GraphQLResponse | 1,366 | 94,188 | 137,500 | 482,558 | protobufjs (3.51x) | -| RpcRequest | 501 | 159,562 | 135,941 | 209,268 | protobufjs (1.31x) | -| RpcResponse | 602 | 269,090 | 256,630 | 343,420 | protobufjs (1.28x) | -| StressMessage (depth=8, width=200) | 12,868 | 4,590 | 6,005 | 7,547 | protobufjs (1.26x) | +| Fixture | Bytes | upstream | toBinary (fork) | protobufjs | Best | +| ---------------------------------- | -----: | -------: | --------------: | ---------: | ------------------ | +| SimpleMessage | 19 | 1.27M | 1.35M | 2.99M | protobufjs (2.21x) | +| ExportTraceRequest (100 spans) | 32,926 | 536 | 1,634 | 1,849 | protobufjs (1.13x) | +| ExportMetricsRequest (50 series) | 17,696 | 872 | 2,701 | 3,107 | protobufjs (1.15x) | +| ExportLogsRequest (100 records) | 21,319 | 910 | 2,973 | 3,094 | protobufjs (1.04x) | +| K8sPodList (20 pods) | 28,900 | 837 | 3,140 | 3,819 | protobufjs (1.22x) | +| GraphQLRequest | 624 | 145,574 | 213,975 | 612,621 | protobufjs (2.86x) | +| GraphQLResponse | 1,366 | 149,786 | 315,185 | 869,456 | protobufjs (2.76x) | +| RpcRequest | 501 | 133,777 | 408,083 | 415,477 | protobufjs (1.02x) | +| RpcResponse | 602 | 184,283 | 629,814 | 531,720 | toBinary (1.18x) | +| StressMessage (depth=8, width=200) | 12,868 | 2,873 | 10,476 | 12,371 | protobufjs (1.18x) | @@ -156,26 +190,29 @@ bars indicate the baseline is ahead on that fixture. Both baselines The authoritative numbers live in the auto-generated table and charts above — regenerate via `npm run bench:report -w -@bufbuild/protobuf-benchmarks`. The writer stack that produces those -numbers is: +@bufbuild/protobuf-benchmarks` (median of 5 runs by default; override +via `BENCH_REPORT_RUNS`). The public `toBinary` on main ships a single +optimisation layer: - **L0 (contiguous BinaryWriter).** Single growable `Uint8Array` + `pos` - cursor. Replaced the fork/join chunk-list writer. Direct gain from - eliminating per-submessage allocations. -- **L1 (schema plan).** Each `DescMessage` compiles once into a plan that - pre-computes tag bytes, field numbers, and wire types; subsequent - encodes walk the plan instead of the descriptor. -- **L2 (specialized field writers).** Per-scalar-type inlined writers - keyed by `ScalarType`, skipping the reflective dispatch on the hot - path. ASCII fast-path in the string writer. - -Combined, the stack brings `toBinaryFast` to **roughly 0.80x protobufjs -on the OTel 100-span fixture** without codegen, up from the baseline -reflective path at ~0.18x. Fixtures where protobufjs is dramatically -ahead (SimpleMessage, GraphQLRequest, RpcRequest) are dominated by -per-call overhead on small payloads — pbjs static-module codegen wins on -those because its generated encoder inlines the entire write without a -single function-pointer indirection. + cursor. Replaced upstream's fork/join chunk-list writer. Eliminates + per-submessage allocations on nested messages, preserves byte-identical + output vs upstream, and keeps the public `toBinary` signature unchanged. + +On the OTel 100-span fixture the fork's `toBinary` runs at **roughly +0.88x protobufjs** — `toBinary` 1,634 ops/s vs protobufjs 1,849 ops/s — +up from upstream's reflective baseline at ~0.29x protobufjs (536 ops/s). +The cumulative gain over upstream ranges from **+6%** on tiny flat +messages (SimpleMessage) to **+275%** on map-heavy configs (K8sPodList) +and **+265%** on deeply-nested synthetic stress messages. Fixtures where +protobufjs is dramatically ahead (SimpleMessage, GraphQLRequest, +GraphQLResponse) are dominated by per-call overhead on small payloads — +pbjs static-module codegen wins on those because its generated encoder +inlines the entire write without a single function-pointer indirection. + +Higher-level optimisations (L1 schema plans, L2 specialized field +writers) were prototyped as `toBinaryFast` but removed from main — see +"Archived work" below. ### Known-pathological case: `fromJsonString + toBinary` @@ -214,10 +251,9 @@ Three stream shapes cover the realistic distribution: | medium | 10 × `ExportTraceRequest` (100 spans each, ~33 KB each) | OTel export: batched uploads | | large | 5 × `K8sPodList` (20 pods each, ~29 KB each) | kubelet list pagination | -Three encoders are compared per shape: +Two encoders are compared per shape: -- `toBinary` — reflective encoder (baseline) -- `toBinaryFast` — L0 contiguous writer + L1 tag caching + L2 field dispatch +- `toBinary` — reflective encoder with L0 contiguous writer (shipped on main) - `protobufjs encodeDelimited` — ahead-of-time codegen (not available on the large stream; pbjs init-shape lives with the main report in `report-pbjs.ts`) @@ -235,7 +271,7 @@ is responsible for those bytes. Run: ```bash -# Default: OTel 100-span workload, 1000 iterations, toBinaryFast encoder +# Default: OTel 100-span workload, 1000 iterations, toBinary encoder (L0) npm run bench:heap-prof -w @bufbuild/protobuf-benchmarks # Narrow to the protobuf encoder source tree (drops one-time schema @@ -258,10 +294,13 @@ Pipeline: `(function, file, line)`, and prints a markdown table of the top-N allocation sites plus a per-file summary. -Fixtures: `otel100` (default), `metrics50`, `k8s20`, `rpc`. Encoders: -`toBinary`, `toBinaryFast`. +Fixtures: `otel100` (default), `metrics50`, `k8s20`, `rpc`. Encoder: +`toBinary` (the L0 contiguous writer shipped on main). -Example output (`--focus-encoder`, OTel 100-span, `toBinaryFast`, 5000 iters): +Example output (`--focus-encoder`, OTel 100-span, `toBinary`, 5000 iters — +historical snapshot from the L1+L2 prototype branch; kept as a reference +for how the per-call-site report reads. Current main produces a different +profile dominated by the L0 reflective walk): ``` ## Top 14 allocation sites (by self bytes) @@ -298,13 +337,32 @@ The `.heapprofile` file is also directly openable in Chrome DevTools - **`ts-proto` comparison** on the same fixtures (separate package, opt-in dependency). Would round out the "ahead-of-time codegen" comparison group alongside protobufjs. -- **Multi-shape benchmark in CI matrix.** `bench-multishape.ts` exists - but only runs locally; CI currently measures single-shape repeated - encode, which underweights scenarios where the same schema is encoded - with multiple distinct field-presence patterns (RPC request/response - variants, oneof arms). A future pass should integrate multi-shape - rows into `bench-matrix.ts` so regressions on that axis surface in - PR reports. -- **Decoder fast path.** `toBinaryFast` ships; an equivalent - `fromBinaryFast` would close the remaining gap to protobufjs on the - decode column of the matrix. +- **Multi-shape benchmark in CI matrix.** CI currently measures + single-shape repeated encode, which underweights scenarios where the + same schema is encoded with multiple distinct field-presence patterns + (RPC request/response variants, oneof arms). A future pass should + add multi-shape rows into `bench-matrix.ts` so regressions on that + axis surface in PR reports. +- **Decoder optimisation.** `fromBinary` on main is still the reflective + walk — a contiguous-reader equivalent of L0 would close the remaining + gap to protobufjs on the decode column of the matrix. + +## Archived work + +Earlier passes prototyped two higher-level optimisations on top of L0. +Both were removed from main and preserved for future iteration: + +- **L1 (schema plan) + L2 (specialized field writers)** — compile each + `DescMessage` into an opcode plan that pre-computes tag bytes and + field wire types, then walk the plan instead of the descriptor on each + encode. Prototyped as an opt-in `toBinaryFast` export; merging it into + the public `toBinary` broke three extension-related unit tests and + six conformance cases because the plan walk does not cover extension + encoding. Preserved on branch + [`archive/l1-l2-schema-plans-experimental`](../../../tree/archive/l1-l2-schema-plans-experimental). +- **L3 (runtime monomorphization)** — observe shape of messages handed + to the encoder, graduate frequently-seen shapes into specialised plan + variants that skip field-presence checks. Draft PR showed a 4-variant + cap with seal-on-breach; CI revealed a net regression on single-shape + workloads once the observation/lookup overhead was added to the hot + path. Code also archived on the same branch. diff --git a/benchmarks/chart-delta.svg b/benchmarks/chart-delta.svg index 6a020b410..a2bf83e0e 100644 --- a/benchmarks/chart-delta.svg +++ b/benchmarks/chart-delta.svg @@ -9,111 +9,111 @@ .zero { stroke: #333; stroke-width: 1.5; } ]]> - toBinaryFast speedup vs baselines (linear %) - higher is better — "+300%" means 4x throughput + Fork toBinary (L0) speedup vs baselines (linear %) + higher is better — "+300%" means 4x throughput; "vs upstream" is the cumulative gain over @bufbuild/protobuf@latest - - vs toBinary - - vs protobufjs + + vs upstream (@bufbuild/protobuf@latest) + + vs protobufjs - - 0% - - 100% - - 200% + + 0% + + 100% + + 200% 300% - + SimpleMessage - - SimpleMessage: toBinaryFast is 10.1% faster than toBinary + + SimpleMessage: fork toBinary vs upstream = 6.8% - 10% - - SimpleMessage: toBinaryFast vs protobufjs = -53.1% + 7% + + SimpleMessage: fork toBinary vs protobufjs = -54.8% - -53% + -55% ExportTraceRequest (100 spans) - - ExportTraceRequest (100 spans): toBinaryFast is 4.1% faster than toBinary + + ExportTraceRequest (100 spans): fork toBinary vs upstream = 204.7% - 4% - - ExportTraceRequest (100 spans): toBinaryFast vs protobufjs = 13.2% + 205% + + ExportTraceRequest (100 spans): fork toBinary vs protobufjs = -11.6% - 13% + -12% ExportMetricsRequest (50 series) - - ExportMetricsRequest (50 series): toBinaryFast is 20.8% faster than toBinary + + ExportMetricsRequest (50 series): fork toBinary vs upstream = 209.9% - 21% - - ExportMetricsRequest (50 series): toBinaryFast vs protobufjs = 7.3% + 210% + + ExportMetricsRequest (50 series): fork toBinary vs protobufjs = -13.1% - 7% + -13% ExportLogsRequest (100 records) - - ExportLogsRequest (100 records): toBinaryFast is 12.7% faster than toBinary + + ExportLogsRequest (100 records): fork toBinary vs upstream = 226.6% - 13% - - ExportLogsRequest (100 records): toBinaryFast vs protobufjs = -8.6% + 227% + + ExportLogsRequest (100 records): fork toBinary vs protobufjs = -3.9% - -9% + -4% K8sPodList (20 pods) - - K8sPodList (20 pods): toBinaryFast is -0.5% faster than toBinary + + K8sPodList (20 pods): fork toBinary vs upstream = 275.0% - -1% - - K8sPodList (20 pods): toBinaryFast vs protobufjs = -35.4% + 275% + + K8sPodList (20 pods): fork toBinary vs protobufjs = -17.8% - -35% + -18% GraphQLRequest - - GraphQLRequest: toBinaryFast is -13.0% faster than toBinary + + GraphQLRequest: fork toBinary vs upstream = 47.0% - -13% - - GraphQLRequest: toBinaryFast vs protobufjs = -74.9% + 47% + + GraphQLRequest: fork toBinary vs protobufjs = -65.1% - -75% + -65% GraphQLResponse - - GraphQLResponse: toBinaryFast is 46.0% faster than toBinary + + GraphQLResponse: fork toBinary vs upstream = 110.4% - 46% - - GraphQLResponse: toBinaryFast vs protobufjs = -71.5% + 110% + + GraphQLResponse: fork toBinary vs protobufjs = -63.7% - -72% + -64% RpcRequest - - RpcRequest: toBinaryFast is -14.8% faster than toBinary + + RpcRequest: fork toBinary vs upstream = 205.0% - -15% - - RpcRequest: toBinaryFast vs protobufjs = -35.0% + 205% + + RpcRequest: fork toBinary vs protobufjs = -1.8% - -35% + -2% RpcResponse - - RpcResponse: toBinaryFast is -4.6% faster than toBinary + + RpcResponse: fork toBinary vs upstream = 241.8% - -5% - - RpcResponse: toBinaryFast vs protobufjs = -25.3% + 242% + + RpcResponse: fork toBinary vs protobufjs = 18.4% - -25% + 18% StressMessage (depth=8, width=200) - - StressMessage (depth=8, width=200): toBinaryFast is 30.8% faster than toBinary + + StressMessage (depth=8, width=200): fork toBinary vs upstream = 264.6% - 31% - - StressMessage (depth=8, width=200): toBinaryFast vs protobufjs = -20.4% + 265% + + StressMessage (depth=8, width=200): fork toBinary vs protobufjs = -15.3% - -20% + -15% diff --git a/benchmarks/chart.svg b/benchmarks/chart.svg index b01758c32..b0d740ef6 100644 --- a/benchmarks/chart.svg +++ b/benchmarks/chart.svg @@ -28,222 +28,222 @@ 10M - - toBinary: 923,094 ops/sec (SimpleMessage) + + upstream-protobuf-es: 1,267,579 ops/sec (SimpleMessage) - - 923K + + 1.27M - - toBinaryFast: 1,016,337 ops/sec (SimpleMessage) + + toBinary: 1,353,495 ops/sec (SimpleMessage) - - 1.02M + + 1.35M - - protobufjs: 2,168,541 ops/sec (SimpleMessage) + + protobufjs: 2,991,471 ops/sec (SimpleMessage) - - 2.17M + + 2.99M SimpleMessage - - toBinary: 1,299 ops/sec (ExportTraceRequest (100 spans)) + + upstream-protobuf-es: 536 ops/sec (ExportTraceRequest (100 spans)) - - 1.30K + + 536 - - toBinaryFast: 1,352 ops/sec (ExportTraceRequest (100 spans)) + + toBinary: 1,634 ops/sec (ExportTraceRequest (100 spans)) - - 1.35K + + 1.63K - - protobufjs: 1,194 ops/sec (ExportTraceRequest (100 spans)) + + protobufjs: 1,849 ops/sec (ExportTraceRequest (100 spans)) - - 1.19K + + 1.85K ExportTraceRequest (100 spans) - - toBinary: 1,722 ops/sec (ExportMetricsRequest (50 series)) + + upstream-protobuf-es: 872 ops/sec (ExportMetricsRequest (50 series)) - - 1.72K + + 872 - - toBinaryFast: 2,081 ops/sec (ExportMetricsRequest (50 series)) + + toBinary: 2,701 ops/sec (ExportMetricsRequest (50 series)) - - 2.08K + + 2.70K - - protobufjs: 1,939 ops/sec (ExportMetricsRequest (50 series)) + + protobufjs: 3,107 ops/sec (ExportMetricsRequest (50 series)) - - 1.94K + + 3.11K ExportMetricsRequest (50 series) - - toBinary: 1,434 ops/sec (ExportLogsRequest (100 records)) + + upstream-protobuf-es: 910 ops/sec (ExportLogsRequest (100 records)) - - 1.43K + + 910 - - toBinaryFast: 1,617 ops/sec (ExportLogsRequest (100 records)) + + toBinary: 2,973 ops/sec (ExportLogsRequest (100 records)) - - 1.62K + + 2.97K - - protobufjs: 1,768 ops/sec (ExportLogsRequest (100 records)) + + protobufjs: 3,094 ops/sec (ExportLogsRequest (100 records)) - - 1.77K + + 3.09K ExportLogsRequest (100 records) - - toBinary: 1,191 ops/sec (K8sPodList (20 pods)) + + upstream-protobuf-es: 837 ops/sec (K8sPodList (20 pods)) - - 1.19K + + 837 - - toBinaryFast: 1,185 ops/sec (K8sPodList (20 pods)) + + toBinary: 3,140 ops/sec (K8sPodList (20 pods)) - - 1.18K + + 3.14K - - protobufjs: 1,834 ops/sec (K8sPodList (20 pods)) + + protobufjs: 3,819 ops/sec (K8sPodList (20 pods)) - - 1.83K + + 3.82K K8sPodList (20 pods) - - toBinary: 91,755 ops/sec (GraphQLRequest) + + upstream-protobuf-es: 145,574 ops/sec (GraphQLRequest) - - 91.8K + + 146K - - toBinaryFast: 79,812 ops/sec (GraphQLRequest) + + toBinary: 213,975 ops/sec (GraphQLRequest) - - 79.8K + + 214K - - protobufjs: 318,278 ops/sec (GraphQLRequest) + + protobufjs: 612,621 ops/sec (GraphQLRequest) - - 318K + + 613K GraphQLRequest - - toBinary: 94,188 ops/sec (GraphQLResponse) + + upstream-protobuf-es: 149,786 ops/sec (GraphQLResponse) - - 94.2K + + 150K - - toBinaryFast: 137,500 ops/sec (GraphQLResponse) + + toBinary: 315,185 ops/sec (GraphQLResponse) - - 137K + + 315K - - protobufjs: 482,558 ops/sec (GraphQLResponse) + + protobufjs: 869,456 ops/sec (GraphQLResponse) - - 483K + + 869K GraphQLResponse - - toBinary: 159,562 ops/sec (RpcRequest) + + upstream-protobuf-es: 133,777 ops/sec (RpcRequest) - - 160K + + 134K - - toBinaryFast: 135,941 ops/sec (RpcRequest) + + toBinary: 408,083 ops/sec (RpcRequest) - - 136K + + 408K - - protobufjs: 209,268 ops/sec (RpcRequest) + + protobufjs: 415,477 ops/sec (RpcRequest) - - 209K + + 415K RpcRequest - - toBinary: 269,090 ops/sec (RpcResponse) + + upstream-protobuf-es: 184,283 ops/sec (RpcResponse) - - 269K + + 184K - - toBinaryFast: 256,630 ops/sec (RpcResponse) + + toBinary: 629,814 ops/sec (RpcResponse) - - 257K + + 630K - - protobufjs: 343,420 ops/sec (RpcResponse) + + protobufjs: 531,720 ops/sec (RpcResponse) - - 343K + + 532K RpcResponse - - toBinary: 4,590 ops/sec (StressMessage (depth=8, width=200)) + + upstream-protobuf-es: 2,873 ops/sec (StressMessage (depth=8, width=200)) - - 4.59K + + 2.87K - - toBinaryFast: 6,005 ops/sec (StressMessage (depth=8, width=200)) + + toBinary: 10,476 ops/sec (StressMessage (depth=8, width=200)) - - 6.00K + + 10.5K - - protobufjs: 7,547 ops/sec (StressMessage (depth=8, width=200)) + + protobufjs: 12,371 ops/sec (StressMessage (depth=8, width=200)) - - 7.55K + + 12.4K StressMessage (depth=8, width=200) - - toBinary - - toBinaryFast - - protobufjs + + upstream-protobuf-es + + toBinary + + protobufjs diff --git a/benchmarks/package.json b/benchmarks/package.json index 8057bdff5..711c306a5 100644 --- a/benchmarks/package.json +++ b/benchmarks/package.json @@ -35,5 +35,8 @@ "tinybench": "^4.0.1", "tsx": "^4.21.0", "typescript": "^5.6.3" + }, + "devDependencies": { + "upstream-protobuf-es": "npm:@bufbuild/protobuf@^2.11.0" } } diff --git a/benchmarks/scripts/compare-results.ts b/benchmarks/scripts/compare-results.ts index a40c9308d..cf5800d61 100644 --- a/benchmarks/scripts/compare-results.ts +++ b/benchmarks/scripts/compare-results.ts @@ -233,7 +233,8 @@ function renderMarkdown( out.push(""); out.push( `Thresholds: throughput regression \`>${opts.thresholdOps}%\`, memory regression \`>${opts.thresholdMem}%\`. ` + - `Runner pinned to CPU 0 via taskset. Current run on \`${opts.current.platform}\`, Node \`${opts.current.node}\`, captured \`${opts.current.timestamp}\`.`, + `Runner pinned to CPU 0 via taskset; baseline and current are benchmarked on the same runner within one workflow invocation. ` + + `Current run on \`${opts.current.platform}\`, Node \`${opts.current.node}\`, captured \`${opts.current.timestamp}\`.`, ); if (opts.baseline) { out.push( diff --git a/benchmarks/scripts/run-heap-prof.sh b/benchmarks/scripts/run-heap-prof.sh index c60ee7a49..fa2dbab27 100755 --- a/benchmarks/scripts/run-heap-prof.sh +++ b/benchmarks/scripts/run-heap-prof.sh @@ -22,7 +22,7 @@ set -euo pipefail # Positional args are passed through to the driver (e.g. --fixture=otel100). # Defaults match the README example: OTel 100-span workload, 1000 iterations. FIXTURE="${FIXTURE:-otel100}" -ENCODER="${ENCODER:-toBinaryFast}" +ENCODER="${ENCODER:-toBinary}" ITERATIONS="${ITERATIONS:-1000}" # Allow overriding from the CLI: `npm run bench:heap-prof -- --fixture=k8s20` diff --git a/benchmarks/src/bench-comparison-protobufjs.ts b/benchmarks/src/bench-comparison-protobufjs.ts index d609df6af..fc8d0c622 100644 --- a/benchmarks/src/bench-comparison-protobufjs.ts +++ b/benchmarks/src/bench-comparison-protobufjs.ts @@ -27,7 +27,7 @@ // future protobuf-es changes can be tracked against a stable baseline. import { Bench } from "tinybench"; -import { create, toBinary, toBinaryFast, fromBinary } from "@bufbuild/protobuf"; +import { create, toBinary, fromBinary } from "@bufbuild/protobuf"; import { ExportTraceRequestSchema } from "./gen/nested_pb.js"; import { SPAN_COUNT } from "./fixtures.js"; @@ -163,14 +163,6 @@ export async function runComparisonBench() { }, ); - bench.add( - `protobuf-es: create+toBinaryFast (${SPAN_COUNT} spans, OTel-like)`, - () => { - const msg = create(ExportTraceRequestSchema, initEs); - toBinaryFast(ExportTraceRequestSchema, msg); - }, - ); - bench.add( `protobufjs: create+encode (${SPAN_COUNT} spans, OTel-like)`, () => { @@ -187,10 +179,6 @@ export async function runComparisonBench() { toBinary(ExportTraceRequestSchema, esPrebuilt); }); - bench.add(`protobuf-es: toBinaryFast pre-built (${SPAN_COUNT} spans)`, () => { - toBinaryFast(ExportTraceRequestSchema, esPrebuilt); - }); - bench.add(`protobufjs: encode pre-built (${SPAN_COUNT} spans)`, () => { ExportTraceRequestJs.encode(pbjsPrebuilt).finish(); }); diff --git a/benchmarks/src/bench-create-toBinary.ts b/benchmarks/src/bench-create-toBinary.ts index 9fd129d9e..b15815464 100644 --- a/benchmarks/src/bench-create-toBinary.ts +++ b/benchmarks/src/bench-create-toBinary.ts @@ -18,7 +18,7 @@ // of an OTLP trace export call made once per batch. import { Bench } from "tinybench"; -import { create, toBinary, toBinaryFast } from "@bufbuild/protobuf"; +import { create, toBinary } from "@bufbuild/protobuf"; import { SimpleMessageSchema } from "./gen/small_pb.js"; import { AnyValueSchema, @@ -44,15 +44,6 @@ export async function runCreateToBinaryBench() { toBinary(SimpleMessageSchema, m); }); - bench.add("create() + toBinaryFast() SimpleMessage", () => { - const m = create(SimpleMessageSchema, { - name: "bench-message", - value: 42, - enabled: true, - }); - toBinaryFast(SimpleMessageSchema, m); - }); - bench.add( `create() + toBinary() ExportTraceRequest (${SPAN_COUNT} spans, OTel-like)`, () => { @@ -97,50 +88,6 @@ export async function runCreateToBinaryBench() { }, ); - bench.add( - `create() + toBinaryFast() ExportTraceRequest (${SPAN_COUNT} spans, OTel-like)`, - () => { - const spans = [] as ReturnType>[]; - for (let i = 0; i < SPAN_COUNT; i++) { - const attrs = [] as ReturnType>[]; - for (let j = 0; j < 10; j++) { - attrs.push( - create(KeyValueSchema, { - key: `k${j}`, - value: create(AnyValueSchema, { - value: { case: "stringValue", value: `v${i}-${j}` }, - }), - }), - ); - } - spans.push( - create(SpanSchema, { - traceId: new Uint8Array(16), - spanId: new Uint8Array(8), - name: `span-${i}`, - startTimeUnixNano: 1_700_000_000_000_000_000n, - endTimeUnixNano: 1_700_000_000_000_001_000n, - attributes: attrs, - }), - ); - } - const scope = create(InstrumentationScopeSchema, { - name: "@example/tracer", - version: "1.0.0", - }); - const resource = create(ResourceSchema, { attributes: [] }); - const req = create(ExportTraceRequestSchema, { - resourceSpans: [ - create(ResourceSpansSchema, { - resource, - scopeSpans: [create(ScopeSpansSchema, { scope, spans })], - }), - ], - }); - toBinaryFast(ExportTraceRequestSchema, req); - }, - ); - await bench.run(); return bench; } diff --git a/benchmarks/src/bench-memory.ts b/benchmarks/src/bench-memory.ts index 0ab8588af..494f553f2 100644 --- a/benchmarks/src/bench-memory.ts +++ b/benchmarks/src/bench-memory.ts @@ -26,7 +26,7 @@ // (or) // npm run bench:memory (package.json wires --expose-gc) -import { create, toBinary, toBinaryFast, fromBinary } from "@bufbuild/protobuf"; +import { create, toBinary, fromBinary } from "@bufbuild/protobuf"; import { ExportTraceRequestSchema } from "./gen/nested_pb.js"; import { SPAN_COUNT } from "./fixtures.js"; @@ -188,13 +188,6 @@ async function main() { }), ); - samples.push( - measure(`protobuf-es: create + toBinaryFast (${SPAN_COUNT} spans)`, () => { - const msg = create(ExportTraceRequestSchema, initEs); - toBinaryFast(ExportTraceRequestSchema, msg); - }), - ); - samples.push( measure(`protobufjs: create + encode (${SPAN_COUNT} spans)`, () => { const msg = ExportTraceRequestJs.create(initPbjs); diff --git a/benchmarks/src/bench-multishape.ts b/benchmarks/src/bench-multishape.ts deleted file mode 100644 index 6aeb9b5af..000000000 --- a/benchmarks/src/bench-multishape.ts +++ /dev/null @@ -1,277 +0,0 @@ -// Copyright 2021-2026 Buf Technologies, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// L3 multi-shape benchmark. -// -// Purpose: verify the core L3 claim — that alternating 3+ shapes through -// one schema is faster on the adaptive plan (variants graduate and each -// one is monomorphic on `msg[name]`) than on the generic L1+L2 plan (a -// single polymorphic plan that sees every shape's hidden class). -// -// Fixture: the `SimpleMessage` schema has 3 fields, which lets us sweep -// through exactly 3 distinct presence patterns (`{field1}`, `{field2}`, -// `{field3}`). We alternate them in strict round-robin to force the -// polymorphic property-read site to see all three shapes; V8 pushes it -// to 3-way polymorphic in the L1+L2 run, while in the L3 run each variant -// keeps its own monomorphic IC. -// -// The driver writes a JSON summary to stdout so `scripts/median-results.ts` -// and `scripts/compare-results.ts` can line-diff against `baselines/main.json` -// without shape-specific tooling. - -import { create, toBinary, toBinaryFast } from "@bufbuild/protobuf"; -import { Bench } from "tinybench"; -import { AnyValueSchema, KeyValueSchema, SpanSchema } from "./gen/nested_pb.js"; -import { SimpleMessageSchema } from "./gen/small_pb.js"; - -const ITERATIONS_WARMUP = 40; // well past L3_WARMUP=10 × 3 shapes - -function buildShapes(): ReturnType< - typeof create ->[] { - return [ - create(SimpleMessageSchema, { - name: "the quick brown fox", - value: 0, - enabled: false, - }), - create(SimpleMessageSchema, { - name: "", - value: 0x6bad_f00d, - enabled: false, - }), - create(SimpleMessageSchema, { - name: "", - value: 0, - enabled: true, - }), - ]; -} - -/** - * Three Span shapes with different presence patterns — mirrors the - * three-shape OTel pattern called out in the L3 design spec (full / - * event / error). - */ -function buildSpanShapes(): ReturnType>[] { - const kv = (k: string, v: string) => - create(KeyValueSchema, { - key: k, - value: create(AnyValueSchema, { - value: { case: "stringValue", value: v }, - }), - }); - const traceId = new Uint8Array(16).fill(0x11); - const spanId = new Uint8Array(8).fill(0x22); - return [ - // Full-shape: all scalar + a short attrs list. - create(SpanSchema, { - traceId, - spanId, - name: "GET /v1/users", - startTimeUnixNano: 1_700_000_000_000_000_000n, - endTimeUnixNano: 1_700_000_001_000_000_000n, - attributes: [kv("http.method", "GET"), kv("http.status_code", "200")], - }), - // Short-shape: no attrs, only IDs + timestamps (status/health spans). - create(SpanSchema, { - traceId, - spanId, - name: "healthcheck", - startTimeUnixNano: 1_700_000_000_000_000_000n, - endTimeUnixNano: 1_700_000_000_500_000_000n, - attributes: [], - }), - // Error-shape: IDs + timestamp + attrs, no name (empty string omitted). - create(SpanSchema, { - traceId, - spanId, - name: "", - startTimeUnixNano: 1_700_000_000_000_000_000n, - endTimeUnixNano: 1_700_000_000_100_000_000n, - attributes: [ - kv("error", "true"), - kv("error.type", "timeout"), - kv("error.message", "upstream deadline exceeded"), - ], - }), - ]; -} - -async function main(): Promise { - const time = Number(process.env.BENCH_MATRIX_TIME ?? 1500); - const warmupTime = Number(process.env.BENCH_MATRIX_WARMUP ?? 300); - const bench = new Bench({ time, warmupTime }); - - const shapes = buildShapes(); - const spans = buildSpanShapes(); - - // Byte-parity sanity (fail fast before the measurement phase). - for (const s of shapes) { - const ref = toBinary(SimpleMessageSchema, s); - const adaptive = toBinaryFast(SimpleMessageSchema, s, { adaptive: true }); - const generic = toBinaryFast(SimpleMessageSchema, s); - if ( - ref.length !== adaptive.length || - ref.length !== generic.length || - !ref.every((b, i) => b === adaptive[i] && b === generic[i]) - ) { - throw new Error( - "bench-multishape: SimpleMessage byte parity check failed", - ); - } - } - for (const s of spans) { - const ref = toBinary(SpanSchema, s); - const adaptive = toBinaryFast(SpanSchema, s, { adaptive: true }); - if ( - ref.length !== adaptive.length || - !ref.every((b, i) => b === adaptive[i]) - ) { - throw new Error("bench-multishape: Span byte parity check failed"); - } - } - - // Prime the observer: ensures variants are graduated before measurement. - for (let i = 0; i < ITERATIONS_WARMUP; i++) { - for (const s of shapes) { - toBinaryFast(SimpleMessageSchema, s, { adaptive: true }); - } - for (const s of spans) { - toBinaryFast(SpanSchema, s, { adaptive: true }); - } - } - - bench.add("SimpleMessage multi-shape :: L1+L2 generic", () => { - toBinaryFast(SimpleMessageSchema, shapes[0]); - toBinaryFast(SimpleMessageSchema, shapes[1]); - toBinaryFast(SimpleMessageSchema, shapes[2]); - }); - bench.add("SimpleMessage multi-shape :: L3 adaptive", () => { - toBinaryFast(SimpleMessageSchema, shapes[0], { adaptive: true }); - toBinaryFast(SimpleMessageSchema, shapes[1], { adaptive: true }); - toBinaryFast(SimpleMessageSchema, shapes[2], { adaptive: true }); - }); - // Single-shape regression gate: run the same shape 3× per op so the - // per-op cost comparison stays apples-to-apples. - bench.add("SimpleMessage single-shape :: L1+L2 generic", () => { - toBinaryFast(SimpleMessageSchema, shapes[0]); - toBinaryFast(SimpleMessageSchema, shapes[0]); - toBinaryFast(SimpleMessageSchema, shapes[0]); - }); - bench.add("SimpleMessage single-shape :: L3 adaptive", () => { - toBinaryFast(SimpleMessageSchema, shapes[0], { adaptive: true }); - toBinaryFast(SimpleMessageSchema, shapes[0], { adaptive: true }); - toBinaryFast(SimpleMessageSchema, shapes[0], { adaptive: true }); - }); - - // Span multi-shape. More fields + repeated attrs give the L3 variant - // more `isFieldSet` checks to skip per op. - bench.add("Span multi-shape :: L1+L2 generic", () => { - toBinaryFast(SpanSchema, spans[0]); - toBinaryFast(SpanSchema, spans[1]); - toBinaryFast(SpanSchema, spans[2]); - }); - bench.add("Span multi-shape :: L3 adaptive", () => { - toBinaryFast(SpanSchema, spans[0], { adaptive: true }); - toBinaryFast(SpanSchema, spans[1], { adaptive: true }); - toBinaryFast(SpanSchema, spans[2], { adaptive: true }); - }); - bench.add("Span single-shape :: L1+L2 generic", () => { - toBinaryFast(SpanSchema, spans[0]); - toBinaryFast(SpanSchema, spans[0]); - toBinaryFast(SpanSchema, spans[0]); - }); - bench.add("Span single-shape :: L3 adaptive", () => { - toBinaryFast(SpanSchema, spans[0], { adaptive: true }); - toBinaryFast(SpanSchema, spans[0], { adaptive: true }); - toBinaryFast(SpanSchema, spans[0], { adaptive: true }); - }); - - await bench.run(); - - const rows = bench.tasks.map((t) => ({ - name: t.name, - opsPerSec: t.result?.hz ?? 0, - rme: t.result?.rme ?? 0, - samples: t.result?.samples.length ?? 0, - })); - - // Emit table for eyeballing. - console.table( - rows.map((r) => ({ - name: r.name, - "ops/s": r.opsPerSec.toFixed(0), - "rme %": r.rme.toFixed(2), - samples: r.samples, - })), - ); - - // Emit JSON for scripts/compare-results.ts. - console.log( - JSON.stringify( - { - fixture: "multishape", - generatedAt: new Date().toISOString(), - node: process.version, - rows, - }, - null, - 2, - ), - ); - - // Compute deltas so the run self-reports its gates. - const get = (name: string): number => - rows.find((r) => r.name === name)?.opsPerSec ?? 0; - const delta = (baseline: string, current: string): number => { - const b = get(baseline); - const c = get(current); - return b > 0 ? c / b - 1 : 0; - }; - const multiSimple = delta( - "SimpleMessage multi-shape :: L1+L2 generic", - "SimpleMessage multi-shape :: L3 adaptive", - ); - const singleSimple = delta( - "SimpleMessage single-shape :: L1+L2 generic", - "SimpleMessage single-shape :: L3 adaptive", - ); - const multiSpan = delta( - "Span multi-shape :: L1+L2 generic", - "Span multi-shape :: L3 adaptive", - ); - const singleSpan = delta( - "Span single-shape :: L1+L2 generic", - "Span single-shape :: L3 adaptive", - ); - - console.log( - `\nSimpleMessage multi-shape: ${(multiSimple * 100).toFixed(2)} % (target >= +10%)`, - ); - console.log( - `SimpleMessage single-shape: ${(singleSimple * 100).toFixed(2)} % (regression <= 3%)`, - ); - console.log( - `Span multi-shape: ${(multiSpan * 100).toFixed(2)} % (target >= +10%)`, - ); - console.log( - `Span single-shape: ${(singleSpan * 100).toFixed(2)} % (regression <= 3%)`, - ); -} - -main().catch((err) => { - console.error(err); - process.exit(1); -}); diff --git a/benchmarks/src/bench-streaming.ts b/benchmarks/src/bench-streaming.ts index 892438391..55fab2b79 100644 --- a/benchmarks/src/bench-streaming.ts +++ b/benchmarks/src/bench-streaming.ts @@ -33,9 +33,8 @@ // - medium: 10 ExportTraceRequest batches × 100 spans each (OTel export) // - large: 5 K8sPodList chunks × 20 pods each (kubelet list pagination) // -// Three encoders are compared per shape: -// - reflective (toBinary + length prefix) -// - fast (toBinaryFast + length prefix, L0+L1+L2 stack) +// Two encoders are compared per shape: +// - reflective (toBinary + length prefix, fork's L0 contiguous writer) // - protobufjs (ctor.encodeDelimited, ahead-of-time codegen, where loaded) // // The output is: @@ -49,7 +48,7 @@ import { createRequire } from "node:module"; import { dirname, resolve } from "node:path"; import { fileURLToPath } from "node:url"; -import { toBinary, toBinaryFast } from "@bufbuild/protobuf"; +import { toBinary } from "@bufbuild/protobuf"; import { BinaryWriter } from "@bufbuild/protobuf/wire"; import { Bench } from "tinybench"; @@ -304,9 +303,9 @@ function buildLargeStream(): StreamShape { ); // K8s has a deeper init shape for pbjs; we skip pbjs on the large stream // rather than duplicate hundreds of lines from report-pbjs.ts here. The - // intent of the large stream is primarily protobuf-es self-comparison - // (toBinary vs toBinaryFast) on big payloads; pbjs parity is covered by - // the main report for the same fixture, just without the streaming wrap. + // intent of the large stream is primarily protobuf-es encode throughput + // on big payloads; pbjs parity is covered by the main report for the + // same fixture, just without the streaming wrap. return { label: `large stream (${LARGE_STREAM_LEN} × K8sPodList, ${K8S_POD_COUNT} pods each)`, shape: "kubelet list pagination — map-heavy configuration payloads", @@ -339,14 +338,6 @@ function encodeStreamReflective( return writer.finish(); } -function encodeStreamFast(schema: AnySchema, messages: AnyMsg[]): Uint8Array { - const writer = new BinaryWriter(); - for (let i = 0; i < messages.length; i++) { - writer.bytes(toBinaryFast(schema, messages[i])); - } - return writer.finish(); -} - function encodeStreamPbjs( ctor: PbjsCtor, messages: Record[], @@ -370,7 +361,7 @@ function encodeStreamPbjs( interface StreamingResult { fixture: string; - encoder: "toBinary" | "toBinaryFast" | "protobufjs"; + encoder: "toBinary" | "protobufjs"; streamLen: number; bytes: number; opsPerSec: number; @@ -398,14 +389,6 @@ async function runStreamingBench() { // Measure stream byte size once (all iterations produce the same bytes). const prepared = streams.map((s) => { const reflectiveBytes = encodeStreamReflective(s.schema, s.esMessages); - const fastBytes = encodeStreamFast(s.schema, s.esMessages); - // Parity check — if toBinary and toBinaryFast disagree on stream bytes - // we are measuring different workloads. Log so CI flags it. - if (reflectiveBytes.byteLength !== fastBytes.byteLength) { - console.warn( - `stream ${s.label}: toBinary=${reflectiveBytes.byteLength}B vs toBinaryFast=${fastBytes.byteLength}B — byte counts differ, investigate`, - ); - } const pbjsBytes = s.pbjs ? encodeStreamPbjs(s.pbjs.ctor, s.pbjs.messages).byteLength : null; @@ -420,14 +403,6 @@ async function runStreamingBench() { }, ); } - for (const p of prepared) { - bench.add( - `${p.label} :: sizeDelimitedEncode via toBinaryFast (${p.streamBytes} B)`, - () => { - encodeStreamFast(p.schema, p.esMessages); - }, - ); - } for (const p of prepared) { const pbjs = p.pbjs; if (!pbjs) continue; @@ -457,13 +432,9 @@ function collectResults( const [, label, kind, bytesStr] = match; const stream = streamByLabel.get(label); if (!stream) continue; - const encoder: StreamingResult["encoder"] = kind.includes( - "via toBinaryFast", - ) - ? "toBinaryFast" - : kind.includes("via toBinary") - ? "toBinary" - : "protobufjs"; + const encoder: StreamingResult["encoder"] = kind.includes("via toBinary") + ? "toBinary" + : "protobufjs"; const bytes = Number(bytesStr); const opsPerSec = task.result?.hz ?? 0; out.push({ diff --git a/benchmarks/src/bench-toBinary.ts b/benchmarks/src/bench-toBinary.ts index 5f80db6cb..07924c78a 100644 --- a/benchmarks/src/bench-toBinary.ts +++ b/benchmarks/src/bench-toBinary.ts @@ -17,7 +17,7 @@ // reflects the reflective binary encoder cost in isolation. import { Bench } from "tinybench"; -import { toBinary, toBinaryFast } from "@bufbuild/protobuf"; +import { toBinary } from "@bufbuild/protobuf"; import { SimpleMessageSchema } from "./gen/small_pb.js"; import { ExportTraceRequestSchema } from "./gen/nested_pb.js"; import { @@ -43,17 +43,6 @@ export async function runToBinaryBench() { }, ); - bench.add("toBinaryFast() SimpleMessage (pre-built)", () => { - toBinaryFast(SimpleMessageSchema, small); - }); - - bench.add( - `toBinaryFast() ExportTraceRequest (pre-built, ${SPAN_COUNT} spans)`, - () => { - toBinaryFast(ExportTraceRequestSchema, traceRequest); - }, - ); - await bench.run(); return bench; } diff --git a/benchmarks/src/heap-prof-driver.ts b/benchmarks/src/heap-prof-driver.ts index 2ee54213a..a31fbb58e 100644 --- a/benchmarks/src/heap-prof-driver.ts +++ b/benchmarks/src/heap-prof-driver.ts @@ -32,7 +32,7 @@ // // Or via the wrapper script: `npm run bench:heap-prof`. -import { toBinary, toBinaryFast } from "@bufbuild/protobuf"; +import { toBinary } from "@bufbuild/protobuf"; import { ExportTraceRequestSchema } from "./gen/nested_pb.js"; import { K8sPodListSchema } from "./gen/k8s-pod_pb.js"; @@ -51,7 +51,7 @@ import { // Keep argument parsing minimal — a shell script wraps this driver so we // don't need a full CLI framework. Accepted args: // --fixture= fixture key (default: otel100) -// --encoder= toBinary | toBinaryFast (default: toBinaryFast) +// --encoder= toBinary (only encoder shipped on main) // --iterations= iterations to run (default: 1000) function parseArg(name: string, fallback: string): string { @@ -61,7 +61,7 @@ function parseArg(name: string, fallback: string): string { } const fixtureKey = parseArg("fixture", "otel100"); -const encoderName = parseArg("encoder", "toBinaryFast"); +const encoderName = parseArg("encoder", "toBinary"); const iterations = Number(parseArg("iterations", "1000")); // biome-ignore lint/suspicious/noExplicitAny: dispatch is intentionally loose @@ -113,12 +113,8 @@ function resolveEncoder( switch (name) { case "toBinary": return toBinary; - case "toBinaryFast": - return toBinaryFast; default: - throw new Error( - `unknown encoder '${name}' — use 'toBinary' or 'toBinaryFast'`, - ); + throw new Error(`unknown encoder '${name}' — use 'toBinary'`); } } diff --git a/benchmarks/src/report-helpers.ts b/benchmarks/src/report-helpers.ts index 7ee097cbe..367a385f4 100644 --- a/benchmarks/src/report-helpers.ts +++ b/benchmarks/src/report-helpers.ts @@ -42,14 +42,25 @@ export interface BenchmarkResult { * Encoders we plot. Order matters — it drives the legend and bar ordering * within a fixture group. Kept small and fixed so the chart is legible; * when a new encoder is added, extend this array and the colors map. + * + * `upstream-protobuf-es` is `@bufbuild/protobuf@latest` published on npm, + * installed under an alias so the fork's in-tree `toBinary` (which ships + * the L0 contiguous-writer optimisation from PR #8) and the unmodified + * upstream implementation live side-by-side in the same process. It is + * the honest baseline against which cumulative fork improvements should + * be measured. */ -export const ENCODERS = ["toBinary", "toBinaryFast", "protobufjs"] as const; +export const ENCODERS = [ + "upstream-protobuf-es", + "toBinary", + "protobufjs", +] as const; export type Encoder = (typeof ENCODERS)[number]; export const ENCODER_COLORS: Record = { - toBinary: "#8b8b8b", - toBinaryFast: "#ffa600", - protobufjs: "#347fc4", + "upstream-protobuf-es": "#e55137", + toBinary: "#347fc4", + protobufjs: "#6a8e7f", }; // --- Markdown table -------------------------------------------------------- @@ -138,8 +149,8 @@ function groupByFixture(results: BenchmarkResult[]): Array<{ fixture: r.fixture, encodedSize: r.encodedSize, perEncoder: { + "upstream-protobuf-es": undefined, toBinary: undefined, - toBinaryFast: undefined, protobufjs: undefined, }, }; @@ -172,16 +183,16 @@ export function generateBenchmarkMarkdownTable( const header = [ "Fixture", "Bytes", - "toBinary", - "toBinaryFast", + "upstream", + "toBinary (fork)", "protobufjs", "Best", ]; const rows: string[][] = groups.map((g) => [ g.fixture, formatBytes(g.encodedSize), + formatOps(g.perEncoder["upstream-protobuf-es"]?.opsPerSec), formatOps(g.perEncoder.toBinary?.opsPerSec), - formatOps(g.perEncoder.toBinaryFast?.opsPerSec), formatOps(g.perEncoder.protobufjs?.opsPerSec), bestEncoderRatio(g.perEncoder), ]); @@ -438,42 +449,50 @@ export function generateBenchmarkChart(results: BenchmarkResult[]): string { // --- SVG delta chart ------------------------------------------------------- /** - * Per-fixture speed improvement of `toBinaryFast` over the two baselines we - * track: protobuf-es's reflective `toBinary`, and `protobufjs` where the - * pbjs static-module codegen is available. The main chart is log-scale, so - * a 5x improvement looks almost identical to a 1.5x improvement — the - * delta chart restores the linear comparison. + * Per-fixture speed improvement of the fork's `toBinary` (L0 contiguous + * writer, already shipped on main via PR #8) over the two baselines we + * track: + * + * 1. `upstream-protobuf-es` — the unmodified `@bufbuild/protobuf@latest` + * published on npm. This is the honest "how much faster is the fork + * than the original protobuf-es?" number. It measures the cumulative + * fork improvement shipped to main. + * 2. `protobufjs` — cross-library reference where the pbjs static-module + * codegen is available. + * + * The main chart is log-scale, so a 5x improvement looks almost identical + * to a 1.5x improvement — this chart restores the linear comparison. * - * Bars render the ratio minus one, i.e. "toBinaryFast is N% faster than - * baseline". Negative values (fast encoder slower than baseline) cross the - * axis. We cap bar length but keep the numeric label honest — overflowing - * fixtures (e.g. 500%+) still show the full percentage in the label. + * Bars render the ratio minus one, i.e. "fork's toBinary is N% faster than + * baseline". Negative values (fork slower than baseline) cross the axis. + * We cap bar length at a floor of 100% so small gains on tiny fixtures + * still render visibly; the numeric label stays honest regardless of cap. */ export function generateBenchmarkDeltaChart( results: BenchmarkResult[], ): string { const groups = groupByFixture(results); - // Two delta series per fixture (one row per baseline). We plot only - // fixtures that have at least a toBinary+toBinaryFast pair, because the - // protobuf-es delta is the headline number. The protobufjs delta is - // drawn on top when available — missing pbjs stubs leave that row empty - // without shrinking the chart. + // Two delta series per fixture. We plot only fixtures that have a fork + // `toBinary` measurement — that is the common subject of every bar. + // Missing individual baselines (e.g. protobufjs has no stub for this + // fixture) leave that series' sub-bar empty, without shrinking the row. interface DeltaRow { fixture: string; - vsToBinaryPct?: number; + vsUpstreamPct?: number; vsProtobufjsPct?: number; } const rows: DeltaRow[] = []; for (const g of groups) { - const fast = g.perEncoder.toBinaryFast?.opsPerSec; - if (!fast || fast <= 0) continue; - const slow = g.perEncoder.toBinary?.opsPerSec; + const fork = g.perEncoder.toBinary?.opsPerSec; + if (!fork || fork <= 0) continue; + const upstream = g.perEncoder["upstream-protobuf-es"]?.opsPerSec; const pbjs = g.perEncoder.protobufjs?.opsPerSec; rows.push({ fixture: g.fixture, - vsToBinaryPct: slow && slow > 0 ? (fast / slow - 1) * 100 : undefined, - vsProtobufjsPct: pbjs && pbjs > 0 ? (fast / pbjs - 1) * 100 : undefined, + vsUpstreamPct: + upstream && upstream > 0 ? (fork / upstream - 1) * 100 : undefined, + vsProtobufjsPct: pbjs && pbjs > 0 ? (fork / pbjs - 1) * 100 : undefined, }); } @@ -495,17 +514,15 @@ export function generateBenchmarkDeltaChart( // Cap bar length at the largest positive delta, with a floor of 300% // so small-fixture gains (e.g. 30% on SimpleMessage) don't render as // a pixel-wide sliver just because one outlier fixture is 500%+. - const maxPct = Math.max( - 300, - ...rows.flatMap((r) => [r.vsToBinaryPct ?? 0, r.vsProtobufjsPct ?? 0]), - ); + const allPcts = rows.flatMap((r) => [ + r.vsUpstreamPct ?? 0, + r.vsProtobufjsPct ?? 0, + ]); + const maxPct = Math.max(300, ...allPcts); // Include the most negative delta so bars can grow leftward across the // zero baseline without clipping. If everything is positive we keep the // zero-line flush with the left edge. - const minPct = Math.min( - 0, - ...rows.flatMap((r) => [r.vsToBinaryPct ?? 0, r.vsProtobufjsPct ?? 0]), - ); + const minPct = Math.min(0, ...allPcts); const pctRange = maxPct - minPct; // Zero-line X. If minPct < 0 we reserve a slice of the chart width for // negative-bar growth; otherwise the zero-line sits at marginLeft. @@ -514,10 +531,10 @@ export function generateBenchmarkDeltaChart( const pctToWidth = (pct: number) => (Math.abs(pct) / pctRange) * chartWidth; // Colors. Re-use the encoder palette so the legend cross-references the - // main chart cleanly: "vs toBinary" uses the toBinary grey, "vs - // protobufjs" uses the protobufjs blue — both describe the baseline, not - // toBinaryFast itself, which is the common subject of both bars. - const colorVsToBinary = ENCODER_COLORS.toBinary; + // main chart cleanly: each baseline's bar uses the baseline's color — + // the bars describe the baseline, not the fork's toBinary itself, which + // is the common subject of both bars. + const colorVsUpstream = ENCODER_COLORS["upstream-protobuf-es"]; const colorVsProtobufjs = ENCODER_COLORS.protobufjs; const parts: string[] = []; @@ -540,25 +557,27 @@ export function generateBenchmarkDeltaChart( // Title. parts.push( ` ` + - `toBinaryFast speedup vs baselines (linear %)` + + `Fork toBinary (L0) speedup vs baselines (linear %)` + `\n`, ); parts.push( ` ` + - `higher is better — "+300%" means 4x throughput` + + `higher is better — "+300%" means 4x throughput; ` + + `"vs upstream" is the cumulative gain over @bufbuild/protobuf@latest` + `\n`, ); // Legend row — placed well above the axis ticks, indented from the - // left margin so the fixture labels underneath never crowd it. + // left margin so the fixture labels underneath never crowd it + // (layout from #19; encoder semantics from #22). const legendY = marginTop - 52; const legendX = marginLeft - 100; parts.push( ` \n` + - ` \n` + - ` vs toBinary\n` + - ` \n` + - ` vs protobufjs\n` + + ` \n` + + ` vs upstream (@bufbuild/protobuf@latest)\n` + + ` \n` + + ` vs protobufjs\n` + ` \n`, ); @@ -588,17 +607,17 @@ export function generateBenchmarkDeltaChart( ` ${escapeXml(row.fixture)}\n`, ); - // vs toBinary bar (upper half of the row). Extra 6px vertical gap + // vs upstream bar (upper half of the row). Extra 6px vertical gap // from the row split keeps the bar's numeric label clear of the - // lower bar beneath it. - if (row.vsToBinaryPct !== undefined) { - const pct = row.vsToBinaryPct; + // lower bar beneath it (layout from #19; encoder semantics from #22). + if (row.vsUpstreamPct !== undefined) { + const pct = row.vsUpstreamPct; const w = pctToWidth(pct); const y = rowY + (rowHeight / 2 - barHeight - 5); const x = pct >= 0 ? zeroX : zeroX - w; parts.push( - ` \n` + - ` ${escapeXml(row.fixture)}: toBinaryFast is ${pct.toFixed(1)}% faster than toBinary\n` + + ` \n` + + ` ${escapeXml(row.fixture)}: fork toBinary vs upstream = ${pct.toFixed(1)}%\n` + ` \n`, ); // Label outside bar (right side for positive, left for negative). @@ -619,7 +638,7 @@ export function generateBenchmarkDeltaChart( const x = pct >= 0 ? zeroX : zeroX - w; parts.push( ` \n` + - ` ${escapeXml(row.fixture)}: toBinaryFast vs protobufjs = ${pct.toFixed(1)}%\n` + + ` ${escapeXml(row.fixture)}: fork toBinary vs protobufjs = ${pct.toFixed(1)}%\n` + ` \n`, ); const labelX = pct >= 0 ? x + w + 4 : x - 4; diff --git a/benchmarks/src/report.ts b/benchmarks/src/report.ts index 5a7af70d4..5fcaeb755 100644 --- a/benchmarks/src/report.ts +++ b/benchmarks/src/report.ts @@ -14,15 +14,15 @@ // Benchmark report generator. // -// Runs a multi-encoder matrix (toBinary, toBinaryFast, protobufjs) across -// the fixture set exposed by bench-matrix.ts, then emits: +// Runs a three-encoder matrix (upstream-protobuf-es, fork's toBinary, +// protobufjs) across the fixture set exposed by bench-matrix.ts, then emits: // // 1. bench-results.json — machine-readable raw data for CI diffing. // 2. chart.svg — grouped-bar SVG chart (log ops/sec per fixture) // with numeric labels above each bar. -// 3. chart-delta.svg — linear-scale bar chart showing toBinaryFast's -// percentage speedup over both baselines -// (toBinary, protobufjs) per fixture. +// 3. chart-delta.svg — linear-scale bar chart showing the fork's +// `toBinary` percentage speedup over both +// baselines (upstream, protobufjs) per fixture. // 4. README.md — markdown table injected between the // markers. // @@ -40,7 +40,14 @@ // `BENCH_REPORT_READ_ONLY=1`. import { existsSync, readFileSync, writeFileSync } from "node:fs"; -import { toBinary, toBinaryFast } from "@bufbuild/protobuf"; +import { toBinary } from "@bufbuild/protobuf"; +// `upstream-protobuf-es` is an npm alias for `@bufbuild/protobuf@latest` +// installed as a regular devDependency. This gives us the unmodified +// upstream encoder alongside the fork's in-tree copy in the same process, +// so the report can measure the honest cumulative gain from the original +// protobuf-es baseline (which predates the L0 contiguous-writer work in +// PR #8) instead of only showing the fork's current state in isolation. +import { toBinary as upstreamToBinary } from "upstream-protobuf-es"; import { Bench } from "tinybench"; import { @@ -169,13 +176,21 @@ async function runReportBench(): Promise { // protobuf-es encoders. We never change the schema/message references // inside the benchmark function body — that would pull allocation cost // into the measurement. Everything is captured in the closure once. + // + // The `upstream-protobuf-es` bar uses `@bufbuild/protobuf@latest` via + // the aliased devDependency. The fork's generated schemas import from + // the fork's `@bufbuild/protobuf`, but the descriptor protocol is + // wire-compatible between the two v2 versions — upstream.toBinary + // accepts the same schema/message pair and produces identical bytes. + // That lets a single schema drive bars from both libraries, no + // separate codegen needed. for (const p of prepared) { + bench.add(`${p.name} :: upstream-protobuf-es`, () => { + upstreamToBinary(p.schema, p.msg); + }); bench.add(`${p.name} :: toBinary`, () => { toBinary(p.schema, p.msg); }); - bench.add(`${p.name} :: toBinaryFast`, () => { - toBinaryFast(p.schema, p.msg); - }); } // protobufjs bars are added per-fixture wherever the pbjs static-module @@ -236,23 +251,65 @@ const readmePath = `${outDir}README.md`; let results: BenchmarkResult[]; if (process.env.BENCH_REPORT_READ_ONLY === "1" && existsSync(resultsPath)) { // Re-render mode: useful while iterating on chart / table layout so the - // author does not pay the ~30s benchmark cost for each rendering tweak. + // author does not pay the benchmark cost for each rendering tweak. const raw = JSON.parse(readFileSync(resultsPath, "utf-8")) as { results: BenchmarkResult[]; }; results = raw.results; console.log(`Loaded ${results.length} results from ${resultsPath}`); } else { - console.log("Running benchmark matrix for report (this takes ~30s)..."); - results = await runReportBench(); + // Median-of-N runs to stabilize per-fixture numbers against host jitter. + // Single-run measurements on small/fast fixtures (SimpleMessage, RPC + // envelopes) easily vary by 2-8x across back-to-back runs on an + // unpinned host; medians cancel that out. Override via + // BENCH_REPORT_RUNS env var (default 5, min 1). + const runsEnv = Number.parseInt(process.env.BENCH_REPORT_RUNS ?? "5", 10); + const runs = Number.isFinite(runsEnv) && runsEnv > 0 ? runsEnv : 5; + console.log( + `Running benchmark matrix for report (${runs} runs × ~30s each, median aggregated)...`, + ); + const runResults: BenchmarkResult[][] = []; + for (let i = 0; i < runs; i++) { + console.log(` run ${i + 1}/${runs}`); + runResults.push(await runReportBench()); + } + // Median per (fixture, encoder) pair. encodedSize is identical across + // runs for the same fixture/encoder, so first occurrence wins. + const keyed = new Map(); + for (const run of runResults) { + for (const r of run) { + const key = `${r.fixture}::${r.encoder}`; + const bucket = keyed.get(key); + if (bucket) { + bucket.ops.push(r.opsPerSec); + } else { + keyed.set(key, { ops: [r.opsPerSec], encodedSize: r.encodedSize }); + } + } + } + const firstRunOrder = runResults[0]; + results = firstRunOrder.map((r) => { + const key = `${r.fixture}::${r.encoder}`; + const bucket = keyed.get(key); + const sorted = bucket ? [...bucket.ops].sort((a, b) => a - b) : []; + const median = + sorted.length === 0 ? 0 : sorted[Math.floor(sorted.length / 2)]; + return { + fixture: r.fixture, + encoder: r.encoder, + opsPerSec: median, + encodedSize: bucket?.encodedSize ?? r.encodedSize, + }; + }); const payload = { node: process.version, platform: `${process.platform}/${process.arch}`, timestamp: new Date().toISOString(), + runs, results, }; writeFileSync(resultsPath, `${JSON.stringify(payload, null, 2)}\n`); - console.log(`Wrote ${resultsPath}`); + console.log(`Wrote ${resultsPath} (median of ${runs} runs)`); } // Build outputs. The chart and table see identical inputs, so any @@ -265,12 +322,13 @@ const chart = generateBenchmarkChart(results); writeFileSync(chartPath, chart); console.log(`Wrote ${chartPath}`); -// Delta chart: linear-scale view of toBinaryFast's % improvement over -// toBinary per fixture, with an optional protobufjs comparison where the -// bar is available. Log-scale charts hide the absolute magnitude of the -// gain on shape-specific bars that already render close to each other on -// the main chart; the delta chart is the one consumers should look at -// when they want "how much faster, in plain terms". +// Delta chart: linear-scale view of the fork's `toBinary` (L0) % +// improvement over the upstream @bufbuild/protobuf baseline, with an +// optional protobufjs comparison where the bar is available. Log-scale +// charts hide the absolute magnitude of the gain on shape-specific bars +// that already render close to each other on the main chart; the delta +// chart is the one consumers should look at when they want "how much +// faster than original protobuf-es, in plain terms". const deltaChart = generateBenchmarkDeltaChart(results); writeFileSync(deltaChartPath, deltaChart); console.log(`Wrote ${deltaChartPath}`); diff --git a/benchmarks/src/verify-correctness.ts b/benchmarks/src/verify-correctness.ts deleted file mode 100644 index 78287b56a..000000000 --- a/benchmarks/src/verify-correctness.ts +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2021-2026 Buf Technologies, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Correctness check for the experimental `toBinaryFast` encoder. -// -// We don't claim byte-identical output against `toBinary` — repeated -// scalar ordering and presence-zero handling could legitimately differ -// on future descriptors. The load-bearing claim is *semantic* round-trip -// equivalence: decoding either encoding produces structurally-equal -// messages. This file exercises that on the OTel-shaped fixture used by -// the benchmarks. - -import assert from "node:assert/strict"; -import { toBinary, toBinaryFast, fromBinary } from "@bufbuild/protobuf"; -import { ExportTraceRequestSchema } from "./gen/nested_pb.js"; -import { SimpleMessageSchema } from "./gen/small_pb.js"; -import { buildExportTraceRequest, buildSmallMessage } from "./fixtures.js"; - -function summarize(label: string, slow: Uint8Array, fast: Uint8Array): void { - const byteMatch = - slow.length === fast.length && slow.every((b, i) => b === fast[i]); - console.log( - `[${label}] slow=${slow.length}B fast=${fast.length}B bytesIdentical=${byteMatch}`, - ); -} - -// OTel-shaped ExportTraceRequest with 100 spans. -{ - const msg = buildExportTraceRequest(); - const slow = toBinary(ExportTraceRequestSchema, msg); - const fast = toBinaryFast(ExportTraceRequestSchema, msg); - - // Decode both — require structural equality of the resulting messages. - const decodedSlow = fromBinary(ExportTraceRequestSchema, slow); - const decodedFast = fromBinary(ExportTraceRequestSchema, fast); - assert.deepStrictEqual( - decodedFast, - decodedSlow, - "toBinaryFast produced a payload that decodes differently than toBinary", - ); - summarize("ExportTraceRequest", slow, fast); -} - -// SimpleMessage (scalars only): ensures the flat-scalar path works. -{ - const msg = buildSmallMessage(); - const slow = toBinary(SimpleMessageSchema, msg); - const fast = toBinaryFast(SimpleMessageSchema, msg); - const decodedSlow = fromBinary(SimpleMessageSchema, slow); - const decodedFast = fromBinary(SimpleMessageSchema, fast); - assert.deepStrictEqual(decodedFast, decodedSlow); - summarize("SimpleMessage", slow, fast); -} - -console.log("\nOK — semantic round-trip verified for all fixtures"); diff --git a/package-lock.json b/package-lock.json index 9b8c175ab..d1ec7020a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -48,6 +48,9 @@ "tinybench": "^4.0.1", "tsx": "^4.21.0", "typescript": "^5.6.3" + }, + "devDependencies": { + "upstream-protobuf-es": "npm:@bufbuild/protobuf@^2.11.0" } }, "bun/conformance": { @@ -5569,6 +5572,14 @@ "resolved": "packages/upstream-protobuf", "link": true }, + "node_modules/upstream-protobuf-es": { + "name": "@bufbuild/protobuf", + "version": "2.11.0", + "resolved": "https://registry.npmjs.org/@bufbuild/protobuf/-/protobuf-2.11.0.tgz", + "integrity": "sha512-sBXGT13cpmPR5BMgHE6UEEfEaShh5Ror6rfN3yEK5si7QVrtZg8LEPQb0VVhiLRUslD2yLnXtnRzG035J/mZXQ==", + "dev": true, + "license": "(Apache-2.0 AND BSD-3-Clause)" + }, "node_modules/url": { "version": "0.11.4", "resolved": "https://registry.npmjs.org/url/-/url-0.11.4.tgz", diff --git a/packages/protobuf-test/src/correctness-matrix.test.ts b/packages/protobuf-test/src/correctness-matrix.test.ts index 22181502a..595e9e2b6 100644 --- a/packages/protobuf-test/src/correctness-matrix.test.ts +++ b/packages/protobuf-test/src/correctness-matrix.test.ts @@ -34,9 +34,10 @@ * * Encoder registry: * Add new encoders to the ENCODERS array below as they land on main. - * Currently only `toBinary` is available on main; toBinaryFast and - * schema-plan-specialized encoders will be added once those branches - * merge. + * Currently only `toBinary` ships on main (L0 contiguous writer). The + * experimental L1+L2 schema-plan encoder (`toBinaryFast`) lives on the + * `archive/l1-l2-schema-plans-experimental` branch for future iteration + * and is intentionally absent here. */ import { suite, test } from "node:test"; @@ -87,8 +88,8 @@ interface Fixture { const ENCODERS: readonly EncoderEntry[] = [ { name: "toBinary", encode: (schema, message) => toBinary(schema, message) }, - // Future additions (guarded until they land on main): - // { name: "toBinaryFast", encode: (schema, message) => toBinaryFast(schema, message) }, + // Future additions (held on branch until they land on main): + // { name: "toBinaryFast", encode: ... } // archive/l1-l2-schema-plans-experimental // { name: "toBinarySchemaPlan", encode: ... } ]; diff --git a/packages/protobuf-test/src/schema-plan-adaptive.test.ts b/packages/protobuf-test/src/schema-plan-adaptive.test.ts deleted file mode 100644 index 02b75c88d..000000000 --- a/packages/protobuf-test/src/schema-plan-adaptive.test.ts +++ /dev/null @@ -1,362 +0,0 @@ -// Copyright 2021-2026 Buf Technologies, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// L3 runtime monomorphization tests. The load-bearing claims: -// -// 1) The shape observer graduates a variant after `L3_WARMUP` repeats of -// the same shape and that variant produces byte-identical output to -// both `toBinaryFast` (generic L1+L2) and `toBinary` (reflective). -// 2) The variant cap (`L3_VARIANT_CAP = 4`) seals the record when a 5th -// distinct shape asks for graduation; subsequent novel shapes never -// re-trigger graduation. -// 3) Shape drift after seal routes back through the generic plan and -// remains byte-parity correct. -// 4) Mode B (`new Function()` executor) is gated behind the opt-in flag -// and produces output byte-identical to Mode A for the same shape. - -import { suite, test } from "node:test"; -import * as assert from "node:assert"; -import { create, toBinary, toBinaryFast, protoInt64 } from "@bufbuild/protobuf"; -import { - getOrCreateVariants, - computeShapeHash, - L3_WARMUP, - L3_VARIANT_CAP, -} from "@bufbuild/protobuf/wire/schema-plan-adaptive"; - -import { ScalarValuesMessageSchema } from "./gen/ts/extra/msg-scalar_pb.js"; -import { - OneofMessageSchema, - OneofMessageFooSchema, -} from "./gen/ts/extra/msg-oneof_pb.js"; - -// Re-use `ScalarValuesMessageSchema` which carries a wide mix of scalar -// types and explicit/implicit presence. Each test must construct a fresh -// schema reference to get a clean observer record — we rely on WeakMap -// keying by schema identity and this file always keys off the imported -// schema object. To reset state between tests we call `getOrCreateVariants` -// and clear the observer in-place via its public mutable fields. -function resetObserver(desc: Parameters[0]): void { - const rec = getOrCreateVariants(desc); - (rec as { sealed: boolean }).sealed = false; - (rec as { observationCount: number }).observationCount = 0; - rec.shapeCounter.clear(); - rec.variants.clear(); -} - -void suite("L3 schema-plan-adaptive", () => { - void suite("shape hashing", () => { - test("distinct presence patterns produce distinct bigint signatures", () => { - const a = create(ScalarValuesMessageSchema, { doubleField: 1 }); - const b = create(ScalarValuesMessageSchema, { stringField: "hi" }); - const hA = computeShapeHash( - ScalarValuesMessageSchema, - a as unknown as Record, - ); - const hB = computeShapeHash( - ScalarValuesMessageSchema, - b as unknown as Record, - ); - assert.notStrictEqual(hA, hB); - assert.ok(typeof hA === "bigint"); - assert.ok(typeof hB === "bigint"); - }); - - test("same presence pattern yields same signature regardless of values", () => { - const a = create(ScalarValuesMessageSchema, { int32Field: 1 }); - const b = create(ScalarValuesMessageSchema, { int32Field: 999 }); - assert.strictEqual( - computeShapeHash( - ScalarValuesMessageSchema, - a as unknown as Record, - ), - computeShapeHash( - ScalarValuesMessageSchema, - b as unknown as Record, - ), - ); - }); - - test("oneof arms are distinct signatures", () => { - const strArm = create(OneofMessageSchema, { - scalar: { case: "error", value: "oops" }, - }); - const intArm = create(OneofMessageSchema, { - scalar: { case: "value", value: 7 }, - }); - const hStr = computeShapeHash( - OneofMessageSchema, - strArm as unknown as Record, - ); - const hInt = computeShapeHash( - OneofMessageSchema, - intArm as unknown as Record, - ); - assert.notStrictEqual(hStr, hInt); - }); - }); - - void suite("graduation", () => { - test("same shape graduates after L3_WARMUP encodes", () => { - resetObserver(ScalarValuesMessageSchema); - const msg = create(ScalarValuesMessageSchema, { - doubleField: 1.5, - int32Field: 42, - }); - // Before graduation: observationCount accrues, no variants. - for (let i = 0; i < L3_WARMUP - 1; i++) { - toBinaryFast(ScalarValuesMessageSchema, msg, { adaptive: true }); - } - let rec = getOrCreateVariants(ScalarValuesMessageSchema); - assert.strictEqual(rec.variants.size, 0); - assert.strictEqual( - rec.observationCount, - L3_WARMUP - 1, - "pre-graduation observation count", - ); - - // N-th call crosses the threshold and graduates. - toBinaryFast(ScalarValuesMessageSchema, msg, { adaptive: true }); - rec = getOrCreateVariants(ScalarValuesMessageSchema); - assert.strictEqual(rec.variants.size, 1); - assert.strictEqual(rec.shapeCounter.size, 0); - }); - - test("variant encodes byte-identical to generic plan", () => { - resetObserver(ScalarValuesMessageSchema); - const msg = create(ScalarValuesMessageSchema, { - doubleField: 3.14, - stringField: "hello", - int64Field: protoInt64.parse("9000000000"), - }); - // Warmup past graduation. - for (let i = 0; i < L3_WARMUP; i++) { - toBinaryFast(ScalarValuesMessageSchema, msg, { adaptive: true }); - } - // This call lands on the variant plan. - const viaVariant = toBinaryFast(ScalarValuesMessageSchema, msg, { - adaptive: true, - }); - const viaGeneric = toBinaryFast(ScalarValuesMessageSchema, msg); - const viaReflective = toBinary(ScalarValuesMessageSchema, msg); - assert.deepStrictEqual(Array.from(viaVariant), Array.from(viaGeneric)); - assert.deepStrictEqual(Array.from(viaVariant), Array.from(viaReflective)); - }); - }); - - void suite("variant cap", () => { - test("5th distinct shape seals the record", () => { - resetObserver(ScalarValuesMessageSchema); - // Shape 1..4 — graduate each. - const shapes = [ - create(ScalarValuesMessageSchema, { doubleField: 1 }), - create(ScalarValuesMessageSchema, { stringField: "a" }), - create(ScalarValuesMessageSchema, { int32Field: 1 }), - create(ScalarValuesMessageSchema, { int64Field: protoInt64.parse(1) }), - ]; - for (const shape of shapes) { - for (let i = 0; i < L3_WARMUP; i++) { - toBinaryFast(ScalarValuesMessageSchema, shape, { adaptive: true }); - } - } - const rec1 = getOrCreateVariants(ScalarValuesMessageSchema); - assert.strictEqual( - rec1.variants.size, - L3_VARIANT_CAP, - "expected 4 graduated variants", - ); - assert.strictEqual(rec1.sealed, false); - - // Shape 5 — attempt to graduate should seal. - const shape5 = create(ScalarValuesMessageSchema, { boolField: true }); - for (let i = 0; i < L3_WARMUP; i++) { - toBinaryFast(ScalarValuesMessageSchema, shape5, { adaptive: true }); - } - const rec2 = getOrCreateVariants(ScalarValuesMessageSchema); - assert.strictEqual(rec2.sealed, true, "record must seal on 5th shape"); - assert.strictEqual( - rec2.variants.size, - L3_VARIANT_CAP, - "no new variant is added on seal", - ); - }); - - test("post-seal novel shapes still encode byte-parity", () => { - resetObserver(ScalarValuesMessageSchema); - // Graduate 4 shapes then trigger seal. - const shapes = [ - create(ScalarValuesMessageSchema, { doubleField: 1 }), - create(ScalarValuesMessageSchema, { stringField: "a" }), - create(ScalarValuesMessageSchema, { int32Field: 1 }), - create(ScalarValuesMessageSchema, { int64Field: protoInt64.parse(1) }), - create(ScalarValuesMessageSchema, { boolField: true }), - ]; - for (const shape of shapes) { - for (let i = 0; i < L3_WARMUP; i++) { - toBinaryFast(ScalarValuesMessageSchema, shape, { adaptive: true }); - } - } - assert.strictEqual( - getOrCreateVariants(ScalarValuesMessageSchema).sealed, - true, - ); - - // Previously-graduated shapes still route to variants (and stay correct). - for (const shape of shapes.slice(0, 4)) { - const adaptive = toBinaryFast(ScalarValuesMessageSchema, shape, { - adaptive: true, - }); - const reflective = toBinary(ScalarValuesMessageSchema, shape); - assert.deepStrictEqual(Array.from(adaptive), Array.from(reflective)); - } - - // Novel post-seal shapes go through generic — still correct. - const novel = create(ScalarValuesMessageSchema, { - uint32Field: 12345, - floatField: 2.5, - }); - const adaptive = toBinaryFast(ScalarValuesMessageSchema, novel, { - adaptive: true, - }); - const reflective = toBinary(ScalarValuesMessageSchema, novel); - assert.deepStrictEqual(Array.from(adaptive), Array.from(reflective)); - }); - }); - - void suite("shape drift", () => { - test("value changes within same shape keep variant stable", () => { - resetObserver(ScalarValuesMessageSchema); - // Graduate a shape with two scalars. - const warm = create(ScalarValuesMessageSchema, { - doubleField: 1, - stringField: "one", - }); - for (let i = 0; i < L3_WARMUP; i++) { - toBinaryFast(ScalarValuesMessageSchema, warm, { adaptive: true }); - } - assert.strictEqual( - getOrCreateVariants(ScalarValuesMessageSchema).variants.size, - 1, - ); - - // Drift: same shape, different values. Expect variant hit + parity. - const drift = create(ScalarValuesMessageSchema, { - doubleField: 999, - stringField: "two", - }); - const adaptive = toBinaryFast(ScalarValuesMessageSchema, drift, { - adaptive: true, - }); - const reflective = toBinary(ScalarValuesMessageSchema, drift); - assert.deepStrictEqual(Array.from(adaptive), Array.from(reflective)); - // No new graduation — still 1 variant, no counter entries. - const rec = getOrCreateVariants(ScalarValuesMessageSchema); - assert.strictEqual(rec.variants.size, 1); - }); - }); - - void suite("oneof parity under L3", () => { - test("two oneof arms graduate as two variants", () => { - resetObserver(OneofMessageSchema); - const foo = create(OneofMessageSchema, { - scalar: { case: "value", value: 99 }, - }); - const bar = create(OneofMessageSchema, { - scalar: { case: "error", value: "boom" }, - }); - for (let i = 0; i < L3_WARMUP; i++) { - toBinaryFast(OneofMessageSchema, foo, { adaptive: true }); - toBinaryFast(OneofMessageSchema, bar, { adaptive: true }); - } - const rec = getOrCreateVariants(OneofMessageSchema); - assert.strictEqual( - rec.variants.size, - 2, - "expected one variant per oneof arm", - ); - assert.deepStrictEqual( - Array.from(toBinaryFast(OneofMessageSchema, foo, { adaptive: true })), - Array.from(toBinary(OneofMessageSchema, foo)), - ); - assert.deepStrictEqual( - Array.from(toBinaryFast(OneofMessageSchema, bar, { adaptive: true })), - Array.from(toBinary(OneofMessageSchema, bar)), - ); - }); - - test("message oneof arm graduates correctly", () => { - resetObserver(OneofMessageSchema); - const msg = create(OneofMessageSchema, { - message: { - case: "foo", - value: create(OneofMessageFooSchema, { name: "alpha" }), - }, - }); - for (let i = 0; i < L3_WARMUP + 2; i++) { - toBinaryFast(OneofMessageSchema, msg, { adaptive: true }); - } - assert.strictEqual( - getOrCreateVariants(OneofMessageSchema).variants.size, - 1, - ); - assert.deepStrictEqual( - Array.from(toBinaryFast(OneofMessageSchema, msg, { adaptive: true })), - Array.from(toBinary(OneofMessageSchema, msg)), - ); - }); - }); - - void suite("Mode B codegen executor (opt-in)", () => { - test("new Function() variant produces byte-identical output", () => { - const flag = Symbol.for("@bufbuild/protobuf.adaptive-codegen"); - const g = globalThis as Record; - const prev = g[flag]; - g[flag] = true; - try { - resetObserver(ScalarValuesMessageSchema); - const msg = create(ScalarValuesMessageSchema, { - doubleField: 2.5, - stringField: "codegen", - int32Field: -7, - }); - // Graduate. - for (let i = 0; i < L3_WARMUP; i++) { - toBinaryFast(ScalarValuesMessageSchema, msg, { adaptive: true }); - } - const rec = getOrCreateVariants(ScalarValuesMessageSchema); - const [variant] = Array.from(rec.variants.values()); - assert.ok(variant); - assert.strictEqual( - variant.codegen, - true, - "Mode B flag must produce a codegen variant", - ); - const viaVariant = toBinaryFast(ScalarValuesMessageSchema, msg, { - adaptive: true, - }); - const viaReflective = toBinary(ScalarValuesMessageSchema, msg); - assert.deepStrictEqual( - Array.from(viaVariant), - Array.from(viaReflective), - ); - } finally { - if (prev === undefined) { - delete g[flag]; - } else { - g[flag] = prev; - } - } - }); - }); -}); diff --git a/packages/protobuf-test/src/to-binary-fast.test.ts b/packages/protobuf-test/src/to-binary-fast.test.ts deleted file mode 100644 index 9bb48630e..000000000 --- a/packages/protobuf-test/src/to-binary-fast.test.ts +++ /dev/null @@ -1,254 +0,0 @@ -// Copyright 2021-2026 Buf Technologies, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Feature-coverage tests for the experimental `toBinaryFast` encoder. -// The load-bearing claim is byte-identical output against the reflective -// `toBinary` for the feature surfaces the fast path claims to handle — -// maps (every legal K, every legal V) and oneofs (scalar, message, enum). -// Semantic round-trip is also asserted as a defense-in-depth check. - -import { suite, test } from "node:test"; -import * as assert from "node:assert"; -import { - create, - toBinary, - toBinaryFast, - fromBinary, - protoInt64, -} from "@bufbuild/protobuf"; -import { MapsMessageSchema, MapsEnum } from "./gen/ts/extra/msg-maps_pb.js"; -import { - OneofMessageSchema, - OneofMessageFooSchema, - OneofMessageBarSchema, - OneofEnum, -} from "./gen/ts/extra/msg-oneof_pb.js"; -import { ScalarValuesMessageSchema } from "./gen/ts/extra/msg-scalar_pb.js"; - -void suite("toBinaryFast", () => { - void suite("map field parity", () => { - test("map with scalar/bytes values", () => { - const msg = create(MapsMessageSchema, { - strStrField: { a: "alpha", b: "beta", c: "gamma" }, - strInt32Field: { a: 1, b: -2, c: 0x7fff_ffff }, - strInt64Field: { - a: protoInt64.parse(1), - // Literal `n` requires ES2020; this package is compiled for ES2017. - b: protoInt64.parse(BigInt("-9007199254740993")), - }, - strBoolField: { true_key: true, false_key: false }, - strBytesField: { - a: new Uint8Array([0, 1, 2, 3]), - b: new Uint8Array([0xff, 0xfe]), - }, - }); - const slow = toBinary(MapsMessageSchema, msg); - const fast = toBinaryFast(MapsMessageSchema, msg); - assert.deepStrictEqual( - Array.from(fast), - Array.from(slow), - "byte-identical expected for string-keyed maps", - ); - assert.deepStrictEqual( - fromBinary(MapsMessageSchema, fast), - fromBinary(MapsMessageSchema, slow), - ); - }); - - test("map and map keys parse and encode", () => { - const msg = create(MapsMessageSchema, { - int32StrField: { 1: "one", [-2]: "neg-two", 100: "hundred" }, - int64StrField: { - "1": "one", - "-2": "neg-two", - "9007199254740993": "big", - }, - }); - const slow = toBinary(MapsMessageSchema, msg); - const fast = toBinaryFast(MapsMessageSchema, msg); - // Byte-identical requires same field ordering and same map iteration - // order. Both encoders iterate descriptor order + Object.keys order, - // so parity should hold. - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - assert.deepStrictEqual( - fromBinary(MapsMessageSchema, fast), - fromBinary(MapsMessageSchema, slow), - ); - }); - - test("map", () => { - const msg = create(MapsMessageSchema, { - boolStrField: { true: "yes", false: "no" }, - }); - const slow = toBinary(MapsMessageSchema, msg); - const fast = toBinaryFast(MapsMessageSchema, msg); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - }); - - test("map<*,message> encodes the value submessage", () => { - const inner = create(MapsMessageSchema, { - strStrField: { nested: "ok" }, - }); - const msg = create(MapsMessageSchema, { - strMsgField: { first: inner, second: inner }, - int32MsgField: { 1: inner, 2: inner }, - }); - const slow = toBinary(MapsMessageSchema, msg); - const fast = toBinaryFast(MapsMessageSchema, msg); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - assert.deepStrictEqual( - fromBinary(MapsMessageSchema, fast), - fromBinary(MapsMessageSchema, slow), - ); - }); - - test("map<*,enum>", () => { - const msg = create(MapsMessageSchema, { - strEnuField: { a: MapsEnum.YES, b: MapsEnum.NO }, - int32EnuField: { 1: MapsEnum.YES, 2: MapsEnum.NO }, - }); - const slow = toBinary(MapsMessageSchema, msg); - const fast = toBinaryFast(MapsMessageSchema, msg); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - }); - - test("empty maps do not emit anything", () => { - const msg = create(MapsMessageSchema, {}); - const slow = toBinary(MapsMessageSchema, msg); - const fast = toBinaryFast(MapsMessageSchema, msg); - assert.strictEqual(fast.length, 0); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - }); - }); - - void suite("oneof parity", () => { - test("scalar oneof — int value case", () => { - const msg = create(OneofMessageSchema, { - scalar: { case: "value", value: 42 }, - }); - const slow = toBinary(OneofMessageSchema, msg); - const fast = toBinaryFast(OneofMessageSchema, msg); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - }); - - test("scalar oneof — zero value must still be emitted", () => { - // Oneof presence is carried by the discriminator, so a `value: 0` - // case is *still* considered set. This is the tricky corner that - // the fast-path oneof dispatch has to get right (a non-oneof - // IMPLICIT int with value 0 would be omitted). - const msg = create(OneofMessageSchema, { - scalar: { case: "value", value: 0 }, - }); - const slow = toBinary(OneofMessageSchema, msg); - const fast = toBinaryFast(OneofMessageSchema, msg); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - assert.ok(fast.length > 0, "expected tag+value for zero-valued oneof"); - }); - - test("scalar oneof — string case with empty string", () => { - const msg = create(OneofMessageSchema, { - scalar: { case: "error", value: "" }, - }); - const slow = toBinary(OneofMessageSchema, msg); - const fast = toBinaryFast(OneofMessageSchema, msg); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - }); - - test("scalar oneof — bytes case", () => { - const msg = create(OneofMessageSchema, { - scalar: { case: "bytes", value: new Uint8Array([1, 2, 3, 255]) }, - }); - const slow = toBinary(OneofMessageSchema, msg); - const fast = toBinaryFast(OneofMessageSchema, msg); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - }); - - test("message oneof — foo case", () => { - const foo = create(OneofMessageFooSchema, { - name: "hello", - toggle: true, - }); - const msg = create(OneofMessageSchema, { - message: { case: "foo", value: foo }, - }); - const slow = toBinary(OneofMessageSchema, msg); - const fast = toBinaryFast(OneofMessageSchema, msg); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - }); - - test("message oneof — bar case", () => { - const bar = create(OneofMessageBarSchema, { a: 3, b: 4 }); - const msg = create(OneofMessageSchema, { - message: { case: "bar", value: bar }, - }); - const slow = toBinary(OneofMessageSchema, msg); - const fast = toBinaryFast(OneofMessageSchema, msg); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - }); - - test("enum oneof", () => { - const msg = create(OneofMessageSchema, { - enum: { case: "e", value: OneofEnum.A }, - }); - const slow = toBinary(OneofMessageSchema, msg); - const fast = toBinaryFast(OneofMessageSchema, msg); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - }); - - test("multiple oneof groups each contribute their selected case", () => { - const foo = create(OneofMessageFooSchema, { name: "n", toggle: false }); - const msg = create(OneofMessageSchema, { - scalar: { case: "value", value: 7 }, - message: { case: "foo", value: foo }, - enum: { case: "e", value: OneofEnum.B }, - }); - const slow = toBinary(OneofMessageSchema, msg); - const fast = toBinaryFast(OneofMessageSchema, msg); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - }); - - test("empty oneofs emit nothing", () => { - const msg = create(OneofMessageSchema, {}); - const slow = toBinary(OneofMessageSchema, msg); - const fast = toBinaryFast(OneofMessageSchema, msg); - assert.strictEqual(fast.length, 0); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - }); - }); - - void suite("regression — scalars still match", () => { - test("ScalarValuesMessage parity", () => { - const msg = create(ScalarValuesMessageSchema, { - doubleField: 0.75, - floatField: -0.75, - int64Field: protoInt64.parse(-1), - uint64Field: protoInt64.uParse(1), - int32Field: -123, - fixed64Field: protoInt64.uParse(1), - fixed32Field: 123, - boolField: true, - stringField: "hello world", - bytesField: new Uint8Array([1, 2, 3]), - uint32Field: 42, - sfixed32Field: -42, - sfixed64Field: protoInt64.parse(-42), - sint32Field: -42, - sint64Field: protoInt64.parse(-42), - }); - const slow = toBinary(ScalarValuesMessageSchema, msg); - const fast = toBinaryFast(ScalarValuesMessageSchema, msg); - assert.deepStrictEqual(Array.from(fast), Array.from(slow)); - }); - }); -}); diff --git a/packages/protobuf/package.json b/packages/protobuf/package.json index 99e59c878..07cd0b85a 100644 --- a/packages/protobuf/package.json +++ b/packages/protobuf/package.json @@ -50,10 +50,6 @@ "./wire": { "import": "./dist/esm/wire/index.js", "require": "./dist/cjs/wire/index.js" - }, - "./wire/schema-plan-adaptive": { - "import": "./dist/esm/wire/schema-plan-adaptive.js", - "require": "./dist/cjs/wire/schema-plan-adaptive.js" } }, "typesVersions": { @@ -62,8 +58,7 @@ "codegenv2": ["./dist/cjs/codegenv2/index.d.ts"], "reflect": ["./dist/cjs/reflect/index.d.ts"], "wkt": ["./dist/cjs/wkt/index.d.ts"], - "wire": ["./dist/cjs/wire/index.d.ts"], - "wire/schema-plan-adaptive": ["./dist/cjs/wire/schema-plan-adaptive.d.ts"] + "wire": ["./dist/cjs/wire/index.d.ts"] } }, "devDependencies": { diff --git a/packages/protobuf/src/index.ts b/packages/protobuf/src/index.ts index f984d3f69..433b3bf7c 100644 --- a/packages/protobuf/src/index.ts +++ b/packages/protobuf/src/index.ts @@ -23,7 +23,6 @@ export * from "./registry.js"; export type { JsonValue, JsonObject } from "./json-value.js"; export { toBinary } from "./to-binary.js"; export type { BinaryWriteOptions } from "./to-binary.js"; -export { toBinaryFast } from "./to-binary-fast.js"; export { fromBinary, mergeFromBinary } from "./from-binary.js"; export type { BinaryReadOptions } from "./from-binary.js"; export * from "./to-json.js"; diff --git a/packages/protobuf/src/to-binary-fast.ts b/packages/protobuf/src/to-binary-fast.ts deleted file mode 100644 index 037b017b7..000000000 --- a/packages/protobuf/src/to-binary-fast.ts +++ /dev/null @@ -1,1059 +0,0 @@ -// Copyright 2021-2026 Buf Technologies, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Experimental opt-in fast-path encoder. -// -// Pattern adapted from open-telemetry/opentelemetry-js#6390 (the -// ProtobufLogsSerializer in @opentelemetry/otlp-transformer), ported to -// protobuf-es' reflective encode. The existing BinaryWriter relies on -// fork/join per length-delimited field — every nested message and every -// packed repeated field pushes its accumulator onto a stack, serializes -// into its own list of chunks, then re-emits the length prefix and -// concatenates. On OTel-shaped workloads (deeply nested ResourceSpans → -// ScopeSpans → Span → KeyValue) that produces a lot of small allocations -// and a double copy on `finish()`. -// -// `toBinaryFast` instead makes two passes: -// 1) estimate the exact encoded size of every field by walking the -// message graph and accumulating bytes-needed; -// 2) allocate a single Uint8Array of that size and write bytes into it -// at fixed offsets. -// -// Because the estimate is exact, the write pass never reallocates, never -// copies, and never needs to stack fork/join state. Length prefixes are -// computed during pass 1 and cached so that pass 2 can write the varint -// before it descends into the submessage. The entire hot path lives in a -// single tight loop with no intermediate `Uint8Array`/`number[]` objects -// per field. -// -// Scope: -// - supported: scalar fields (all 15 types), enums, nested messages, -// repeated scalar (packed + unpacked), repeated message, -// map for all legal K and any scalar/enum/message V, -// oneof groups -// - unsupported: extensions, delimited/group encoding, unknown fields -// -// For unsupported schemas `toBinaryFast` falls back to the existing -// reflective `toBinary`. The decision is computed once per `DescMessage` -// and cached in a `WeakMap`, so the fallback check does not dominate the -// hot path after the first call. -// -// Output is semantic-identical to `toBinary`: `fromBinary(schema, -// toBinaryFast(schema, msg))` and `fromBinary(schema, toBinary(schema, -// msg))` produce structurally-equal messages. Byte-identical output is -// not guaranteed (field ordering matches descriptor order, which matches -// `toBinary`'s non-unknown path, but future tweaks may diverge). - -import type { MessageShape } from "./types.js"; -import { - ScalarType, - type DescField, - type DescMessage, - type DescOneof, -} from "./descriptors.js"; -import { protoInt64 } from "./proto-int64.js"; -import { toBinary } from "./to-binary.js"; -import { getTextEncoding } from "./wire/text-encoding.js"; -import { - selectOrObserve, - type VariantHelpers, -} from "./wire/schema-plan-adaptive.js"; - -// ----------------------------------------------------------------------------- -// Support detection -// ----------------------------------------------------------------------------- - -const supportCache = new WeakMap(); - -// `0n` requires target >= ES2020, but this package is compiled for ES2017. -// Materialize the bigint zero once at module load so closures can compare -// against it without the BigInt() call on the hot path. Marked PURE so -// unused-path eliminators (esbuild, Rollup, Terser) can drop this module -// when toBinaryFast is never referenced. -const BIGINT_ZERO = /*@__PURE__*/ BigInt(0); - -/** - * Walk the descriptor (including transitive message fields) and return - * true iff every field in the subtree uses an MVP-supported shape. The - * result is cached per `DescMessage` — most schemas have small, bounded - * field trees and the walk is cheap but not free, so we amortize. - */ -function isSupported( - desc: DescMessage, - visiting: Set = new Set(), -): boolean { - const cached = supportCache.get(desc); - if (cached !== undefined) return cached; - // Guard against recursive message types (e.g. google.protobuf.Value). - // While a cycle is in flight we optimistically assume support; if a - // descendant turns out to be unsupported, we overwrite the cache - // entry below. - if (visiting.has(desc)) return true; - visiting.add(desc); - - let ok = true; - for (const field of desc.fields) { - // Delimited (group) encoding is not handled — the legacy wire format - // requires paired start/end tags which don't fit the single-pass - // write model. Map fields and message-typed map values cannot use - // delimited encoding (enforced by the descriptor), so we only need - // to check singular messages and repeated messages. - if ( - (field.fieldKind === "message" || - (field.fieldKind === "list" && field.listKind === "message")) && - (field as { delimitedEncoding?: boolean }).delimitedEncoding === true - ) { - ok = false; - break; - } - // Recurse into message fields. - if (field.fieldKind === "message" && field.message) { - if (!isSupported(field.message, visiting)) { - ok = false; - break; - } - } - if ( - field.fieldKind === "list" && - field.listKind === "message" && - field.message - ) { - if (!isSupported(field.message, visiting)) { - ok = false; - break; - } - } - // Recurse into map value messages. - if ( - field.fieldKind === "map" && - field.mapKind === "message" && - field.message - ) { - if (!isSupported(field.message, visiting)) { - ok = false; - break; - } - } - } - visiting.delete(desc); - supportCache.set(desc, ok); - return ok; -} - -// ----------------------------------------------------------------------------- -// Wire format helpers -// ----------------------------------------------------------------------------- - -const WIRE_VARINT = 0; -const WIRE_BIT64 = 1; -const WIRE_LENGTH_DELIMITED = 2; -const WIRE_BIT32 = 5; - -/** Size in bytes of an unsigned 32-bit varint. */ -function varintSize32(v: number): number { - if (v < 0x80) return 1; - if (v < 0x4000) return 2; - if (v < 0x200000) return 3; - if (v < 0x10000000) return 4; - return 5; -} - -/** Size in bytes of an int32 varint (negatives use 10 bytes). */ -function int32Size(v: number): number { - if (v < 0) return 10; - return varintSize32(v); -} - -/** Size of a zigzag-encoded 32-bit signed integer. */ -function sint32Size(v: number): number { - return varintSize32(((v << 1) ^ (v >> 31)) >>> 0); -} - -/** - * Size of a 64-bit varint given its (lo, hi) two's-complement halves. - * The varint writer emits while (hi > 0 || lo > 0x7f) and then one more - * byte, so we count in 7-bit chunks across the 64 bits. - */ -function varintSize64(lo: number, hi: number): number { - // Normalize to uint32. - let l = lo >>> 0; - let h = hi >>> 0; - let bytes = 1; - while (h > 0 || l > 0x7f) { - bytes++; - l = ((l >>> 7) | (h << 25)) >>> 0; - h >>>= 7; - } - return bytes; -} - -function tagSize(fieldNo: number, wireType: number): number { - return varintSize32(((fieldNo << 3) | wireType) >>> 0); -} - -/** - * UTF-8 byte length of a JS string without encoding. Mirrors the helper - * used in opentelemetry-js#6390 — correct for valid UTF-16 input (which - * all JS strings are). Surrogate pairs contribute 4 bytes. - */ -function utf8ByteLength(str: string): number { - const len = str.length; - let byteLen = 0; - for (let i = 0; i < len; i++) { - const code = str.charCodeAt(i); - if (code < 0x80) { - byteLen += 1; - } else if (code < 0x800) { - byteLen += 2; - } else if (code < 0xd800 || code >= 0xe000) { - byteLen += 3; - } else { - // Lead of a surrogate pair — skip the trail, account for 4 bytes. - i++; - byteLen += 4; - } - } - return byteLen; -} - -// ----------------------------------------------------------------------------- -// Encoded-size cache -// ----------------------------------------------------------------------------- -// -// We compute the size of each submessage exactly once (pass 1) and reuse -// that number in pass 2 to write the length prefix. A WeakMap keyed by -// the message object isolates this state to the current toBinaryFast call -// without leaking across calls (the map itself is scoped to one encode). - -type SizeMap = Map; - -// ----------------------------------------------------------------------------- -// Pass 1 — size estimation -// ----------------------------------------------------------------------------- - -function scalarSize(type: ScalarType, value: unknown): number { - switch (type) { - case ScalarType.STRING: { - const byteLen = utf8ByteLength(value as string); - return varintSize32(byteLen) + byteLen; - } - case ScalarType.BOOL: - return 1; - case ScalarType.DOUBLE: - return 8; - case ScalarType.FLOAT: - return 4; - case ScalarType.INT32: - return int32Size(value as number); - case ScalarType.UINT32: - return varintSize32((value as number) >>> 0); - case ScalarType.SINT32: - return sint32Size(value as number); - case ScalarType.FIXED32: - case ScalarType.SFIXED32: - return 4; - case ScalarType.INT64: - case ScalarType.UINT64: { - const tc = - type === ScalarType.UINT64 - ? protoInt64.uEnc(value as string | number | bigint) - : protoInt64.enc(value as string | number | bigint); - return varintSize64(tc.lo, tc.hi); - } - case ScalarType.SINT64: { - const tc = protoInt64.enc(value as string | number | bigint); - const sign = tc.hi >> 31; - const lo = (tc.lo << 1) ^ sign; - const hi = ((tc.hi << 1) | (tc.lo >>> 31)) ^ sign; - return varintSize64(lo, hi); - } - case ScalarType.FIXED64: - case ScalarType.SFIXED64: - return 8; - case ScalarType.BYTES: { - const b = value as Uint8Array; - return varintSize32(b.length) + b.length; - } - } - // Unreachable for well-formed descriptors; fall back to 0 so that - // misconfigured types don't silently corrupt the buffer — the size/ - // write mismatch assertion will catch it. - return 0; -} - -function scalarWireType(type: ScalarType): number { - switch (type) { - case ScalarType.BYTES: - case ScalarType.STRING: - return WIRE_LENGTH_DELIMITED; - case ScalarType.DOUBLE: - case ScalarType.FIXED64: - case ScalarType.SFIXED64: - return WIRE_BIT64; - case ScalarType.FIXED32: - case ScalarType.SFIXED32: - case ScalarType.FLOAT: - return WIRE_BIT32; - default: - return WIRE_VARINT; - } -} - -/** - * Should this non-oneof field be emitted for the given message? - * Oneof members are dispatched separately and never flow through this - * predicate. - */ -function isFieldSet(field: DescField, value: unknown): boolean { - // Explicit presence (proto2 / proto3 optional): the generated setters - // only assign when the property was set. Missing ⇒ undefined. - if (value === undefined || value === null) return false; - - // Implicit presence (proto3 singular scalar/enum): zero value means - // "not set" and must not be emitted. Lists/maps handled separately - // (empty list/map means "not set" too). - switch (field.fieldKind) { - case "scalar": { - const t = field.scalar; - if (field.presence !== 2 /* IMPLICIT */) { - // Explicit / legacy required: any defined value counts as set. - return true; - } - if (t === ScalarType.STRING) return (value as string).length > 0; - if (t === ScalarType.BYTES) return (value as Uint8Array).length > 0; - if (t === ScalarType.BOOL) return value === true; - if ( - t === ScalarType.INT64 || - t === ScalarType.UINT64 || - t === ScalarType.SINT64 || - t === ScalarType.FIXED64 || - t === ScalarType.SFIXED64 - ) { - // bigint zero, numeric zero, "0" string all represent unset. - // Compare via coercion so 0n / 0 / "0" all return false. - // Literal `0n` requires ES2020; see BIGINT_ZERO above. - return value !== 0 && value !== BIGINT_ZERO && value !== "0"; - } - return (value as number) !== 0; - } - case "enum": - if (field.presence !== 2 /* IMPLICIT */) return true; - return (value as number) !== 0; - case "message": - return true; // already filtered by undefined check above - case "list": - return (value as unknown[]).length > 0; - case "map": - // Map fields carry their own "any entry" gate here — empty object - // ⇒ not set ⇒ omit. Same semantics as reflect.unsafeIsSet. - return Object.keys(value as object).length > 0; - } - // Exhaustive switch; unreachable. Return true so unexpected shapes - // surface as a size/write mismatch error rather than silent data loss. - return true; -} - -// ----------------------------------------------------------------------------- -// Map key helpers -// ----------------------------------------------------------------------------- -// -// protobuf-es stores map fields as plain JS objects keyed by the stringified -// map key (see reflectMap.mapKeyToLocal). On the fast path we iterate -// `Object.keys`, so every key we see is a string. For integer and boolean -// map keys we parse back to the typed value before computing the scalar -// size or writing the scalar bytes — matching what the reflective encoder -// does via ReflectMap's iterator. - -type MapKeyScalar = Exclude< - ScalarType, - ScalarType.FLOAT | ScalarType.DOUBLE | ScalarType.BYTES ->; - -function coerceMapKey(stringKey: string, keyType: MapKeyScalar): unknown { - switch (keyType) { - case ScalarType.STRING: - return stringKey; - case ScalarType.BOOL: - // Object keys for boolean maps are always "true" / "false" strings. - return stringKey === "true"; - case ScalarType.INT64: - case ScalarType.SINT64: - case ScalarType.SFIXED64: - return protoInt64.parse(stringKey); - case ScalarType.UINT64: - case ScalarType.FIXED64: - return protoInt64.uParse(stringKey); - default: - // INT32, SINT32, FIXED32, SFIXED32, UINT32 — parse back to number. - return Number.parseInt(stringKey, 10); - } -} - -/** - * Body-size of a single map entry message `{ key, value }`, excluding - * the outer field tag and length prefix. Returns both the body size and, - * for message-typed values, the submessage body size (so the writer - * doesn't recompute it). - */ -function estimateMapEntryBody( - field: DescField & { fieldKind: "map" }, - keyTyped: unknown, - value: unknown, - sizes: SizeMap, -): { body: number; valueSubSize: number } { - // Entry key is always field number 1. - const keySize = - tagSize(1, scalarWireType(field.mapKey)) + - scalarSize(field.mapKey, keyTyped); - let valSize: number; - let valueSubSize = 0; - switch (field.mapKind) { - case "scalar": - valSize = - tagSize(2, scalarWireType(field.scalar)) + - scalarSize(field.scalar, value); - break; - case "enum": - valSize = tagSize(2, WIRE_VARINT) + int32Size(value as number); - break; - case "message": { - const sub = value as Record; - valueSubSize = estimateMessageSize(field.message, sub, sizes); - sizes.set(sub, valueSubSize); - valSize = - tagSize(2, WIRE_LENGTH_DELIMITED) + - varintSize32(valueSubSize) + - valueSubSize; - break; - } - } - return { body: keySize + valSize, valueSubSize }; -} - -/** - * Size contribution of a map field: for every entry, an outer tag + length - * prefix + entry body. Map entries are always length-delimited — map fields - * cannot use delimited (group) encoding. - */ -function estimateMapFieldSize( - field: DescField & { fieldKind: "map" }, - obj: Record, - sizes: SizeMap, -): number { - const tagBytes = tagSize(field.number, WIRE_LENGTH_DELIMITED); - let size = 0; - for (const strKey of Object.keys(obj)) { - const keyTyped = coerceMapKey(strKey, field.mapKey); - const { body } = estimateMapEntryBody(field, keyTyped, obj[strKey], sizes); - size += tagBytes + varintSize32(body) + body; - } - return size; -} - -/** - * Size contribution of a single non-oneof non-map "regular" field. Broken - * out so that the oneof dispatch can reuse the same switch. - */ -function estimateRegularFieldSize( - field: DescField, - value: unknown, - sizes: SizeMap, -): number { - switch (field.fieldKind) { - case "scalar": - return ( - tagSize(field.number, scalarWireType(field.scalar)) + - scalarSize(field.scalar, value) - ); - case "enum": - return tagSize(field.number, WIRE_VARINT) + int32Size(value as number); - case "message": { - const sub = value as Record; - const subSize = estimateMessageSize(field.message, sub, sizes); - sizes.set(sub, subSize); - return ( - tagSize(field.number, WIRE_LENGTH_DELIMITED) + - varintSize32(subSize) + - subSize - ); - } - case "list": { - const list = value as unknown[]; - let size = 0; - if (field.listKind === "message") { - const tagBytes = tagSize(field.number, WIRE_LENGTH_DELIMITED); - for (let k = 0; k < list.length; k++) { - const sub = list[k] as Record; - const subSize = estimateMessageSize(field.message, sub, sizes); - sizes.set(sub, subSize); - size += tagBytes + varintSize32(subSize) + subSize; - } - return size; - } - if (field.listKind === "enum") { - if (field.packed) { - let body = 0; - for (let k = 0; k < list.length; k++) { - body += int32Size(list[k] as number); - } - return ( - tagSize(field.number, WIRE_LENGTH_DELIMITED) + - varintSize32(body) + - body - ); - } - const tagBytes = tagSize(field.number, WIRE_VARINT); - for (let k = 0; k < list.length; k++) { - size += tagBytes + int32Size(list[k] as number); - } - return size; - } - // listKind === "scalar" - const t = field.scalar; - const wt = scalarWireType(t); - if (field.packed && wt !== WIRE_LENGTH_DELIMITED) { - let body = 0; - for (let k = 0; k < list.length; k++) { - body += scalarSize(t, list[k]); - } - return ( - tagSize(field.number, WIRE_LENGTH_DELIMITED) + - varintSize32(body) + - body - ); - } - const tagBytes = tagSize(field.number, wt); - for (let k = 0; k < list.length; k++) { - size += tagBytes + scalarSize(t, list[k]); - } - return size; - } - case "map": - // Map fields flow through estimateMapFieldSize; this branch is - // defensive and never taken on the estimation hot path. - return estimateMapFieldSize( - field as DescField & { fieldKind: "map" }, - value as Record, - sizes, - ); - } - return 0; -} - -function estimateMessageSize( - desc: DescMessage, - message: Record, - sizes: SizeMap, -): number { - let size = 0; - const fields = desc.fields; - for (let i = 0; i < fields.length; i++) { - const field = fields[i]; - // Oneof members are dispatched via the `desc.oneofs` loop below. - if (field.oneof !== undefined) continue; - - if (field.fieldKind === "map") { - const obj = message[field.localName] as - | Record - | undefined; - if (!obj || Object.keys(obj).length === 0) continue; - size += estimateMapFieldSize( - field as DescField & { fieldKind: "map" }, - obj, - sizes, - ); - continue; - } - - const value = message[field.localName]; - if (!isFieldSet(field, value)) continue; - size += estimateRegularFieldSize(field, value, sizes); - } - // Oneof dispatch: at most one field per oneof contributes, identified by - // the `case` discriminator on the oneof ADT object. Zero values are - // emitted when a oneof case is explicitly set — that's the whole point - // of the oneof: presence is carried by the discriminator, not by value. - const oneofs = desc.oneofs; - for (let i = 0; i < oneofs.length; i++) { - const oneof = oneofs[i]; - const adt = message[oneof.localName] as - | { case: string | undefined; value?: unknown } - | undefined; - if (!adt || adt.case === undefined) continue; - const selected = findOneofField(oneof, adt.case); - if (!selected) continue; - size += estimateRegularFieldSize(selected, adt.value, sizes); - } - return size; -} - -function findOneofField( - oneof: DescOneof, - caseName: string, -): DescField | undefined { - const fs = oneof.fields; - for (let i = 0; i < fs.length; i++) { - if (fs[i].localName === caseName) return fs[i]; - } - return undefined; -} - -// ----------------------------------------------------------------------------- -// Pass 2 — write into pre-allocated buffer -// ----------------------------------------------------------------------------- - -/** - * Writer state bundled into a plain object so that helper functions can - * mutate `pos` without paying for method-call indirection on a class. - */ -interface Cursor { - buf: Uint8Array; - view: DataView; - pos: number; - encodeUtf8: (s: string) => Uint8Array; -} - -function writeVarint32(c: Cursor, v: number): void { - // Callers pre-coerce to uint32 where needed. - while (v > 0x7f) { - c.buf[c.pos++] = (v & 0x7f) | 0x80; - v = v >>> 7; - } - c.buf[c.pos++] = v; -} - -function writeTag(c: Cursor, fieldNo: number, wireType: number): void { - writeVarint32(c, ((fieldNo << 3) | wireType) >>> 0); -} - -function writeVarint64(c: Cursor, lo: number, hi: number): void { - let l = lo >>> 0; - let h = hi >>> 0; - while (h > 0 || l > 0x7f) { - c.buf[c.pos++] = (l & 0x7f) | 0x80; - l = ((l >>> 7) | (h << 25)) >>> 0; - h >>>= 7; - } - c.buf[c.pos++] = l & 0x7f; -} - -function writeInt32(c: Cursor, v: number): void { - // Negative int32 is sign-extended to 64 bits and written as 10-byte varint. - if (v >= 0) { - writeVarint32(c, v); - } else { - writeVarint64(c, v | 0, -1); - } -} - -function writeSInt32(c: Cursor, v: number): void { - writeVarint32(c, ((v << 1) ^ (v >> 31)) >>> 0); -} - -function writeScalar(c: Cursor, type: ScalarType, value: unknown): void { - switch (type) { - case ScalarType.STRING: { - const s = value as string; - // ASCII fast path: write char codes directly; otherwise materialize - // via TextEncoder. Size was already accounted for. - let isAscii = true; - const len = s.length; - for (let i = 0; i < len; i++) { - if (s.charCodeAt(i) > 127) { - isAscii = false; - break; - } - } - if (isAscii) { - writeVarint32(c, len); - for (let i = 0; i < len; i++) { - c.buf[c.pos++] = s.charCodeAt(i); - } - } else { - const bytes = c.encodeUtf8(s); - writeVarint32(c, bytes.length); - c.buf.set(bytes, c.pos); - c.pos += bytes.length; - } - return; - } - case ScalarType.BOOL: - c.buf[c.pos++] = (value as boolean) ? 1 : 0; - return; - case ScalarType.DOUBLE: - c.view.setFloat64(c.pos, value as number, true); - c.pos += 8; - return; - case ScalarType.FLOAT: - c.view.setFloat32(c.pos, value as number, true); - c.pos += 4; - return; - case ScalarType.INT32: - writeInt32(c, value as number); - return; - case ScalarType.UINT32: - writeVarint32(c, (value as number) >>> 0); - return; - case ScalarType.SINT32: - writeSInt32(c, value as number); - return; - case ScalarType.FIXED32: - c.view.setUint32(c.pos, (value as number) >>> 0, true); - c.pos += 4; - return; - case ScalarType.SFIXED32: - c.view.setInt32(c.pos, value as number, true); - c.pos += 4; - return; - case ScalarType.INT64: - case ScalarType.UINT64: { - const tc = - type === ScalarType.UINT64 - ? protoInt64.uEnc(value as string | number | bigint) - : protoInt64.enc(value as string | number | bigint); - writeVarint64(c, tc.lo, tc.hi); - return; - } - case ScalarType.SINT64: { - const tc = protoInt64.enc(value as string | number | bigint); - const sign = tc.hi >> 31; - const lo = (tc.lo << 1) ^ sign; - const hi = ((tc.hi << 1) | (tc.lo >>> 31)) ^ sign; - writeVarint64(c, lo, hi); - return; - } - case ScalarType.FIXED64: { - const tc = protoInt64.uEnc(value as string | number | bigint); - c.view.setUint32(c.pos, tc.lo >>> 0, true); - c.view.setUint32(c.pos + 4, tc.hi >>> 0, true); - c.pos += 8; - return; - } - case ScalarType.SFIXED64: { - const tc = protoInt64.enc(value as string | number | bigint); - c.view.setInt32(c.pos, tc.lo | 0, true); - c.view.setInt32(c.pos + 4, tc.hi | 0, true); - c.pos += 8; - return; - } - case ScalarType.BYTES: { - const b = value as Uint8Array; - writeVarint32(c, b.length); - c.buf.set(b, c.pos); - c.pos += b.length; - return; - } - } -} - -function writeMapEntry( - c: Cursor, - field: DescField & { fieldKind: "map" }, - keyTyped: unknown, - value: unknown, - sizes: SizeMap, -): void { - // Entry key: field number 1. - writeTag(c, 1, scalarWireType(field.mapKey)); - writeScalar(c, field.mapKey, keyTyped); - // Entry value: field number 2. - switch (field.mapKind) { - case "scalar": - writeTag(c, 2, scalarWireType(field.scalar)); - writeScalar(c, field.scalar, value); - return; - case "enum": - writeTag(c, 2, WIRE_VARINT); - writeInt32(c, value as number); - return; - case "message": { - const sub = value as Record; - const subSize = sizes.get(sub) ?? 0; - writeTag(c, 2, WIRE_LENGTH_DELIMITED); - writeVarint32(c, subSize); - writeMessageInto(c, field.message, sub, sizes); - return; - } - } -} - -function writeMapField( - c: Cursor, - field: DescField & { fieldKind: "map" }, - obj: Record, - sizes: SizeMap, -): void { - for (const strKey of Object.keys(obj)) { - const keyTyped = coerceMapKey(strKey, field.mapKey); - const value = obj[strKey]; - // Body size is recomputed here rather than cached because caching it - // per-entry would require either (1) a second identity-keyed cache - // separate from `sizes` or (2) wrapping each entry in a synthetic - // object. Recompute is cheap — scalar types only, except for the - // `value` submessage which reads from `sizes` anyway. - const { body } = estimateMapEntryBody(field, keyTyped, value, sizes); - writeTag(c, field.number, WIRE_LENGTH_DELIMITED); - writeVarint32(c, body); - writeMapEntry(c, field, keyTyped, value, sizes); - } -} - -/** - * Write one non-oneof non-map field. Matches estimateRegularFieldSize - * exactly so that pass 1 and pass 2 stay in sync. - */ -function writeRegularField( - c: Cursor, - field: DescField, - value: unknown, - sizes: SizeMap, -): void { - switch (field.fieldKind) { - case "scalar": - writeTag(c, field.number, scalarWireType(field.scalar)); - writeScalar(c, field.scalar, value); - return; - case "enum": - writeTag(c, field.number, WIRE_VARINT); - writeInt32(c, value as number); - return; - case "message": { - const sub = value as Record; - const subSize = sizes.get(sub) ?? 0; - writeTag(c, field.number, WIRE_LENGTH_DELIMITED); - writeVarint32(c, subSize); - writeMessageInto(c, field.message, sub, sizes); - return; - } - case "list": { - const list = value as unknown[]; - if (field.listKind === "message") { - for (let k = 0; k < list.length; k++) { - const sub = list[k] as Record; - const subSize = sizes.get(sub) ?? 0; - writeTag(c, field.number, WIRE_LENGTH_DELIMITED); - writeVarint32(c, subSize); - writeMessageInto(c, field.message, sub, sizes); - } - return; - } - if (field.listKind === "enum") { - if (field.packed) { - let body = 0; - for (let k = 0; k < list.length; k++) { - body += int32Size(list[k] as number); - } - writeTag(c, field.number, WIRE_LENGTH_DELIMITED); - writeVarint32(c, body); - for (let k = 0; k < list.length; k++) { - writeInt32(c, list[k] as number); - } - return; - } - for (let k = 0; k < list.length; k++) { - writeTag(c, field.number, WIRE_VARINT); - writeInt32(c, list[k] as number); - } - return; - } - // scalar list - const t = field.scalar; - const wt = scalarWireType(t); - if (field.packed && wt !== WIRE_LENGTH_DELIMITED) { - let body = 0; - for (let k = 0; k < list.length; k++) { - body += scalarSize(t, list[k]); - } - writeTag(c, field.number, WIRE_LENGTH_DELIMITED); - writeVarint32(c, body); - for (let k = 0; k < list.length; k++) { - writeScalar(c, t, list[k]); - } - return; - } - for (let k = 0; k < list.length; k++) { - writeTag(c, field.number, wt); - writeScalar(c, t, list[k]); - } - return; - } - case "map": - // Map fields are dispatched through writeMapField from the caller; - // this branch is unreachable on the hot path but defensive. - writeMapField( - c, - field as DescField & { fieldKind: "map" }, - value as Record, - sizes, - ); - return; - } -} - -function writeMessageInto( - c: Cursor, - desc: DescMessage, - message: Record, - sizes: SizeMap, -): void { - const fields = desc.fields; - for (let i = 0; i < fields.length; i++) { - const field = fields[i]; - // Oneof members: dispatched via the oneof loop below. - if (field.oneof !== undefined) continue; - - if (field.fieldKind === "map") { - const obj = message[field.localName] as - | Record - | undefined; - if (!obj || Object.keys(obj).length === 0) continue; - writeMapField(c, field as DescField & { fieldKind: "map" }, obj, sizes); - continue; - } - - const value = message[field.localName]; - if (!isFieldSet(field, value)) continue; - writeRegularField(c, field, value, sizes); - } - const oneofs = desc.oneofs; - for (let i = 0; i < oneofs.length; i++) { - const oneof = oneofs[i]; - const adt = message[oneof.localName] as - | { case: string | undefined; value?: unknown } - | undefined; - if (!adt || adt.case === undefined) continue; - const selected = findOneofField(oneof, adt.case); - if (!selected) continue; - writeRegularField(c, selected, adt.value, sizes); - } -} - -// ----------------------------------------------------------------------------- -// Entry point -// ----------------------------------------------------------------------------- - -// ----------------------------------------------------------------------------- -// Adaptive (L3) glue -// ----------------------------------------------------------------------------- -// -// L3 is an opt-in overlay that observes message shapes per schema and -// graduates specialized per-shape plans after a warmup window. The generic -// L1+L2 estimate/write helpers above are exposed to L3 through -// `adaptiveHelpers` so that a variant plan's unrolled step list can call -// directly into them without re-entering the field-presence gate. -// -// Default: adaptive is off. Enable per-call via `{ adaptive: true }` or -// globally via `process.env.PROTOBUF_ES_L3 === "1"`. See -// `packages/protobuf/src/wire/schema-plan-adaptive.ts`. - -const adaptiveHelpers: VariantHelpers = { - estimateRegular: (field, value, sizes) => - estimateRegularFieldSize(field, value, sizes), - estimateMap: (field, obj, sizes) => - estimateMapFieldSize(field as DescField & { fieldKind: "map" }, obj, sizes), - writeRegular: (cursor, field, value, sizes) => - writeRegularField(cursor as Cursor, field, value, sizes), - writeMap: (cursor, field, obj, sizes) => - writeMapField( - cursor as Cursor, - field as DescField & { fieldKind: "map" }, - obj, - sizes, - ), -}; - -function adaptiveDefault(): boolean { - // Cross-runtime lookup avoids depending on @types/node in this package. - const g = globalThis as { - process?: { env?: Record }; - }; - return g.process?.env?.PROTOBUF_ES_L3 === "1"; -} - -/** - * Options accepted by {@link toBinaryFast}. - * - * `adaptive` turns on L3 runtime monomorphization: the encoder observes - * message shapes per schema and graduates specialized plans for the - * recurring ones (see `wire/schema-plan-adaptive.ts`). Default: false. - */ -export interface ToBinaryFastOptions { - adaptive?: boolean; -} - -/** - * Opt-in fast-path binary encoder. See the top-of-file comment for the - * motivation and scope. - * - * Falls back to {@link toBinary} when the schema uses features not yet - * supported by the fast path (extensions or delimited/group encoding). - * Unknown fields on messages are always dropped by the fast path — if - * you need to round-trip unknowns, use `toBinary` instead. - * - * @experimental This API is experimental and may change or be removed - * without notice. The intent is to explore whether a two-pass encode - * meaningfully improves OTel-shaped workloads; once stabilized, the - * improvement may fold into the default `toBinary`. - */ -export function toBinaryFast( - schema: Desc, - message: MessageShape, - options?: ToBinaryFastOptions, -): Uint8Array { - if (!isSupported(schema)) { - return toBinary(schema, message); - } - const msg = message as unknown as Record; - const adaptive = options?.adaptive ?? adaptiveDefault(); - - if (adaptive) { - const variant = selectOrObserve(schema, msg, adaptiveHelpers); - if (variant !== undefined) { - const sizes: SizeMap = new Map(); - const total = variant.estimate(msg, sizes); - const buf = new Uint8Array(total); - const cursor: Cursor = { - buf, - view: new DataView(buf.buffer, buf.byteOffset, buf.byteLength), - pos: 0, - encodeUtf8: getTextEncoding().encodeUtf8, - }; - variant.write(cursor, msg, sizes); - if (cursor.pos !== total) { - throw new Error( - `toBinaryFast (L3): size/write mismatch (est=${total} wrote=${cursor.pos}) — please report this as a bug`, - ); - } - return buf; - } - // Observation miss — fall through to generic. - } - - const sizes: SizeMap = new Map(); - const total = estimateMessageSize(schema, msg, sizes); - const buf = new Uint8Array(total); - const cursor: Cursor = { - buf, - view: new DataView(buf.buffer, buf.byteOffset, buf.byteLength), - pos: 0, - encodeUtf8: getTextEncoding().encodeUtf8, - }; - writeMessageInto(cursor, schema, msg, sizes); - if (cursor.pos !== total) { - throw new Error( - `toBinaryFast: size/write mismatch (est=${total} wrote=${cursor.pos}) — please report this as a bug`, - ); - } - return buf; -} diff --git a/packages/protobuf/src/wire/schema-plan-adaptive.ts b/packages/protobuf/src/wire/schema-plan-adaptive.ts deleted file mode 100644 index ebe3ba378..000000000 --- a/packages/protobuf/src/wire/schema-plan-adaptive.ts +++ /dev/null @@ -1,603 +0,0 @@ -// Copyright 2021-2026 Buf Technologies, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// L3 Runtime Monomorphization — shape-observed variant plans layered -// atop the L1+L2 hot path in `to-binary-fast.ts`. -// -// The idea (per `analysis/p1-t6-l3-design-spec.md`): -// -// L1+L2 compiles one set of size/write routines per `DescMessage`. The -// inner property read `msg[field.localName]` therefore sees every hidden -// class that the schema is ever encoded against. On OTel-style workloads -// the same schema is hit with 3–6 distinct shapes (request / response / -// error / oneof-arm variations) and V8 turns that property access site -// megamorphic, costing 1.4–3.5× in the encode loop. -// -// L3 observes incoming messages over the first `N = 10` encode calls -// per schema, computes a compact "shape signature" (per-slot field- -// presence bitmap), and once any single shape repeats ≥ 5 times it -// graduates a specialized plan variant for that shape. Up to 4 variants -// per schema; the 5th unique shape seals the record and sends every -// subsequent call back through the generic plan. -// -// This module is a *pure additive overlay* — default behaviour of -// `toBinaryFast` does not change. L3 is opt-in via the `adaptive: true` -// option or `PROTOBUF_ES_L3=1`. -// -// ## Two execution modes (D10 + CSP clarification) -// -// Mode A — CSP-safe (default). -// A variant is a pre-computed `FieldPlan[]` (compact descriptor for -// each field known-present in the observed shape). The variant -// executor is a statically-imported function that walks this array, -// skipping the generic `isFieldSet` presence gate entirely. This path -// does not use `new Function()` and runs under strict CSP -// (`'unsafe-eval'` denied). -// -// Mode B — CSP-unsafe (opt-in). -// Enabled by setting `globalThis[Symbol.for('@bufbuild/protobuf.adaptive-codegen')] = true` -// *before* the first encode of a given schema. On graduation, the -// variant's executor source is template-generated and constructed via -// `new Function(...)`, giving each variant its own JIT-inlined loop -// with its own inline-cache scope. Template tokens draw only from the -// `Op` enum and descriptor metadata — no user data flows into the -// source. -// -// Shape-drift handling: after a variant graduates, any future novel -// shape falls through to the generic plan. Once the variant cap (4) is -// breached, the record seals and further graduation stops permanently; -// already-graduated variants keep serving their shapes. - -import { ScalarType } from "../descriptors.js"; -import type { DescField, DescMessage, DescOneof } from "../descriptors.js"; - -// ----------------------------------------------------------------------------- -// Tunables -// ----------------------------------------------------------------------------- - -/** - * Observation threshold (D1). A shape graduates to its own variant plan - * once it has been observed this many times. Configurable via - * `PROTOBUF_ES_L3_WARMUP` so benchmarks can sweep the knob. - */ -export const L3_WARMUP: number = (() => { - // Cross-runtime env lookup — avoids a hard dependency on Node's - // `process` global (the package is published without @types/node). - const g = globalThis as { - process?: { env?: Record }; - }; - const env = g.process?.env; - const raw = env ? env.PROTOBUF_ES_L3_WARMUP : undefined; - const parsed = raw !== undefined ? Number.parseInt(raw, 10) : Number.NaN; - return Number.isFinite(parsed) && parsed > 0 ? parsed : 10; -})(); - -/** - * Variant cap (D3). Matches V8's polymorphic IC 4-way threshold. The - * 5th unique shape seals the record. - */ -export const L3_VARIANT_CAP = 4; - -/** - * Max schema width where shape-hash compute still fits the 300 ns budget - * (D11). Wider schemas disable L3 at first-encode time. - */ -export const L3_MAX_FIELDS = 64; - -// Explicit bigint constants (ES2017 compile target disallows `0n` / `1n`). -const BIGINT_ZERO = /*@__PURE__*/ BigInt(0); -const BIGINT_ONE = /*@__PURE__*/ BigInt(1); - -// Feature flag for Mode B (CSP-unsafe codegen executor). -const L3_CODEGEN_FLAG: symbol = Symbol.for( - "@bufbuild/protobuf.adaptive-codegen", -); - -function codegenEnabled(): boolean { - const g = globalThis as Record; - return g[L3_CODEGEN_FLAG] === true; -} - -// ----------------------------------------------------------------------------- -// Per-field presence signature -// ----------------------------------------------------------------------------- -// -// Bit `i` of the signature is 1 iff the message populates slot `i` such -// that `toBinaryFast` would emit it — i.e. the field is either explicitly -// set with a non-zero/non-empty value (implicit-presence scalars), any -// defined value (explicit-presence), a non-empty list/map, or an active -// oneof arm. Slots map 1:1 to `desc.fields`; oneof slots encode the -// *specific arm* (slot ID = desc.fields.length + oneofIndex * maxArmCount -// + armIndex — see `buildSlotMap` below) so that a schema hit with -// `stringValue` vs `intValue` on the same oneof has two distinct shapes. - -/** One entry per field slot, pre-resolved for fast presence tests. */ -interface Slot { - /** 0 for regular fields, 1 for oneof arm slots. */ - readonly kind: 0 | 1; - /** The field descriptor. */ - readonly field: DescField; - /** For regular fields, the localName property on the message object. */ - readonly localName: string; - /** For oneof arms, the oneof this slot belongs to. */ - readonly oneof: DescOneof | undefined; - /** For oneof arms, the arm's `case` string (field.localName). */ - readonly armCase: string | undefined; -} - -interface SlotMap { - readonly slots: readonly Slot[]; - /** Total slot count. ≤ 64 for L3-eligible schemas. */ - readonly width: number; - /** True if schema is wider than L3_MAX_FIELDS (D11). */ - readonly tooWide: boolean; -} - -const slotMapCache = new WeakMap(); - -function buildSlotMap(desc: DescMessage): SlotMap { - const cached = slotMapCache.get(desc); - if (cached !== undefined) return cached; - - const slots: Slot[] = []; - for (const f of desc.fields) { - if (f.oneof !== undefined) continue; - slots.push({ - kind: 0, - field: f, - localName: f.localName, - oneof: undefined, - armCase: undefined, - }); - } - for (const oneof of desc.oneofs) { - for (const arm of oneof.fields) { - slots.push({ - kind: 1, - field: arm, - localName: oneof.localName, // read the ADT object off this key - oneof, - armCase: arm.localName, - }); - } - } - const map: SlotMap = { - slots, - width: slots.length, - tooWide: slots.length > L3_MAX_FIELDS, - }; - slotMapCache.set(desc, map); - return map; -} - -/** - * Compute a `bigint` signature for a message according to the descriptor's - * slot map. Bit `i` reflects whether slot `i` would be emitted under the - * generic encoder's presence rules. Pure — no allocation beyond the - * returned bigint. - * - * @internal - */ -export function computeShapeHash( - desc: DescMessage, - msg: Record, -): bigint { - const map = buildSlotMap(desc); - if (map.tooWide) return BIGINT_ZERO; - const slots = map.slots; - let hash = BIGINT_ZERO; - for (let i = 0; i < slots.length; i++) { - const s = slots[i]; - if (s.kind === 0) { - if (slotPresentRegular(s.field, msg[s.localName])) { - hash |= BIGINT_ONE << BigInt(i); - } - } else { - const adt = msg[s.localName] as - | { case?: string; value?: unknown } - | undefined; - if (adt && adt.case === s.armCase && adt.case !== undefined) { - hash |= BIGINT_ONE << BigInt(i); - } - } - } - return hash; -} - -/** - * Whether a non-oneof field would be emitted by the generic encoder. - * Mirrors `isFieldSet` in `to-binary-fast.ts` but is duplicated here to - * keep the module self-contained (avoids a circular import). - */ -function slotPresentRegular(field: DescField, value: unknown): boolean { - if (value === undefined || value === null) return false; - switch (field.fieldKind) { - case "scalar": { - if (field.presence !== 2 /* IMPLICIT */) return true; - const t = field.scalar; - if (t === ScalarType.STRING) return (value as string).length > 0; - if (t === ScalarType.BYTES) return (value as Uint8Array).length > 0; - if (t === ScalarType.BOOL) return value === true; - if ( - t === ScalarType.INT64 || - t === ScalarType.UINT64 || - t === ScalarType.SINT64 || - t === ScalarType.FIXED64 || - t === ScalarType.SFIXED64 - ) { - return value !== 0 && value !== BIGINT_ZERO && value !== "0"; - } - return (value as number) !== 0; - } - case "enum": - if (field.presence !== 2) return true; - return (value as number) !== 0; - case "message": - return true; - case "list": - return (value as unknown[]).length > 0; - case "map": - return Object.keys(value as object).length > 0; - } - return true; -} - -// ----------------------------------------------------------------------------- -// Variant plan -// ----------------------------------------------------------------------------- -// -// A variant plan is, in Mode A, just the ordered list of slots that were -// observed present in the graduating shape. The variant executor walks -// this list and delegates the actual encode to the schema-generic -// `estimate*/write*` helpers in `to-binary-fast.ts`, which are provided -// by the caller via `VariantHelpers`. Crucially, the variant skips the -// per-field `isFieldSet` presence branch entirely — every slot in the -// variant's list is known-present by construction. - -/** Opaque handle to helpers injected by `to-binary-fast.ts`. */ -export interface VariantHelpers { - /** Encode-size estimator for a non-oneof regular field. */ - estimateRegular: ( - field: DescField, - value: unknown, - sizes: Map, - ) => number; - /** Encode-size estimator for a map field. */ - estimateMap: ( - field: DescField, - obj: Record, - sizes: Map, - ) => number; - /** Write routine for a non-oneof regular field. */ - writeRegular: ( - cursor: unknown, - field: DescField, - value: unknown, - sizes: Map, - ) => void; - /** Write routine for a map field. */ - writeMap: ( - cursor: unknown, - field: DescField, - obj: Record, - sizes: Map, - ) => void; -} - -/** The per-slot work unit a variant replays. */ -interface VariantStep { - /** 0 = regular field, 1 = map field, 2 = oneof arm. */ - readonly kind: 0 | 1 | 2; - readonly field: DescField; - readonly localName: string; // for kind=2 this is the oneof localName - readonly armCase: string | undefined; // kind=2 only -} - -/** - * Estimator function for a single variant. Returns the total encoded - * size of `msg` under this variant's known-present slot list, populating - * `sizes` for any submessage it encounters. - */ -type VariantEstimator = ( - msg: Record, - sizes: Map, -) => number; - -/** - * Writer function for a single variant. Writes all known-present slots - * into `cursor`, consuming submessage sizes pre-computed in `sizes`. - */ -type VariantWriter = ( - cursor: unknown, - msg: Record, - sizes: Map, -) => void; - -export interface VariantPlan { - readonly signature: bigint; - readonly estimate: VariantEstimator; - readonly write: VariantWriter; - /** - * Whether this variant was built with Mode B codegen (new Function()) - * or Mode A (static interpreter). - */ - readonly codegen: boolean; -} - -// ----------------------------------------------------------------------------- -// Observer record -// ----------------------------------------------------------------------------- - -export interface SchemaPlanVariants { - /** Set once at construction: schema too wide for L3 (D11). */ - readonly disableL3: boolean; - /** Shape signature → graduated variant plan. */ - readonly variants: Map; - /** Shape signature → pre-graduation observation count. */ - readonly shapeCounter: Map; - /** Total encodes observed. Used for telemetry only. */ - observationCount: number; - /** True once variant cap (D3) is breached. */ - sealed: boolean; -} - -const variantsCache = new WeakMap(); - -export function getOrCreateVariants(desc: DescMessage): SchemaPlanVariants { - let rec = variantsCache.get(desc); - if (rec === undefined) { - const map = buildSlotMap(desc); - rec = { - disableL3: map.tooWide, - variants: new Map(), - shapeCounter: new Map(), - observationCount: 0, - sealed: false, - }; - variantsCache.set(desc, rec); - } - return rec; -} - -/** Test-only hook: reset all caches for a clean observation run. */ -export function __resetAdaptiveCaches(): void { - // WeakMaps lose all entries when the last strong ref to a schema is - // dropped; in tests we need explicit clear semantics. Implemented by - // swapping the module-local maps — keep the same `const` binding but - // mutate via internal API. - // - // Since WeakMap has no .clear(), we re-create a fresh cache via a - // private path. The exported `variantsCache` is intentionally not - // re-assigned; callers re-use `getOrCreateVariants` which seeds a - // new record on cache miss. To flush between test cases we overwrite - // any record we see with a disabled one by consulting a ref list — - // simpler approach: reset per-schema by passing a fresh schema. - // - // For the implemented tests we recreate schemas per-case rather than - // reaching into the cache; keep this export as a documented no-op so - // the test file's call is cheap. The comment above is load-bearing - // for reviewers. -} - -// ----------------------------------------------------------------------------- -// Variant graduation -// ----------------------------------------------------------------------------- - -/** - * Build the ordered list of steps that a variant must execute for its - * observed shape. The list is frozen once built. - */ -function buildSteps(desc: DescMessage, signature: bigint): VariantStep[] { - const map = buildSlotMap(desc); - const steps: VariantStep[] = []; - for (let i = 0; i < map.slots.length; i++) { - if ((signature & (BIGINT_ONE << BigInt(i))) === BIGINT_ZERO) continue; - const s = map.slots[i]; - if (s.kind === 0) { - steps.push({ - kind: s.field.fieldKind === "map" ? 1 : 0, - field: s.field, - localName: s.localName, - armCase: undefined, - }); - } else { - steps.push({ - kind: 2, - field: s.field, - localName: s.localName, - armCase: s.armCase, - }); - } - } - return steps; -} - -/** - * Compile a variant plan for `signature` using the generic estimate/write - * helpers. Honours Mode B when `codegen` is true — the generated function - * unrolls the `steps` array into a straight-line sequence of calls so V8 - * sees monomorphic receivers at every dispatch point. - */ -export function compileVariantPlan( - desc: DescMessage, - signature: bigint, - helpers: VariantHelpers, -): VariantPlan { - const steps = buildSteps(desc, signature); - const useCodegen = codegenEnabled(); - - if (!useCodegen) { - // Mode A — static interpreter. - const estimate: VariantEstimator = (msg, sizes) => { - let size = 0; - for (let i = 0; i < steps.length; i++) { - const st = steps[i]; - if (st.kind === 0) { - size += helpers.estimateRegular(st.field, msg[st.localName], sizes); - } else if (st.kind === 1) { - size += helpers.estimateMap( - st.field, - msg[st.localName] as Record, - sizes, - ); - } else { - const adt = msg[st.localName] as { value: unknown } | undefined; - size += helpers.estimateRegular( - st.field, - adt === undefined ? undefined : adt.value, - sizes, - ); - } - } - return size; - }; - const write: VariantWriter = (cursor, msg, sizes) => { - for (let i = 0; i < steps.length; i++) { - const st = steps[i]; - if (st.kind === 0) { - helpers.writeRegular(cursor, st.field, msg[st.localName], sizes); - } else if (st.kind === 1) { - helpers.writeMap( - cursor, - st.field, - msg[st.localName] as Record, - sizes, - ); - } else { - const adt = msg[st.localName] as { value: unknown } | undefined; - helpers.writeRegular( - cursor, - st.field, - adt === undefined ? undefined : adt.value, - sizes, - ); - } - } - }; - return Object.freeze({ signature, estimate, write, codegen: false }); - } - - // Mode B — generate dedicated executor closures via new Function(). - // Each variant gets its own per-function IC by running the unrolled - // step list inside a fresh function scope. Source is fully - // template-generated from descriptor metadata and the step kind — no - // user-controllable strings enter the source. - const stepIndices = steps.map((_, i) => i); - const estimateLines = stepIndices.map((i) => { - const st = steps[i]; - if (st.kind === 0) { - return `size += ER(F[${i}], msg[N[${i}]], sizes);`; - } - if (st.kind === 1) { - return `size += EM(F[${i}], msg[N[${i}]], sizes);`; - } - return `{ const adt = msg[N[${i}]]; size += ER(F[${i}], adt === undefined ? undefined : adt.value, sizes); }`; - }); - const writeLines = stepIndices.map((i) => { - const st = steps[i]; - if (st.kind === 0) { - return `WR(cursor, F[${i}], msg[N[${i}]], sizes);`; - } - if (st.kind === 1) { - return `WM(cursor, F[${i}], msg[N[${i}]], sizes);`; - } - return `{ const adt = msg[N[${i}]]; WR(cursor, F[${i}], adt === undefined ? undefined : adt.value, sizes); }`; - }); - - const estimateSrc = `return function variantEstimate(msg, sizes){let size=0;${estimateLines.join( - "", - )}return size;};`; - const writeSrc = `return function variantWrite(cursor, msg, sizes){${writeLines.join( - "", - )}};`; - - const F = steps.map((s) => s.field); - const N = steps.map((s) => s.localName); - - const estimateFactory = new Function("F", "N", "ER", "EM", estimateSrc) as ( - F: DescField[], - N: string[], - ER: VariantHelpers["estimateRegular"], - EM: VariantHelpers["estimateMap"], - ) => VariantEstimator; - const estimate = estimateFactory( - F, - N, - helpers.estimateRegular, - helpers.estimateMap, - ); - - const writeFactory = new Function("F", "N", "WR", "WM", writeSrc) as ( - F: DescField[], - N: string[], - WR: VariantHelpers["writeRegular"], - WM: VariantHelpers["writeMap"], - ) => VariantWriter; - const write = writeFactory(F, N, helpers.writeRegular, helpers.writeMap); - - return Object.freeze({ signature, estimate, write, codegen: true }); -} - -// ----------------------------------------------------------------------------- -// Hot-path entry -// ----------------------------------------------------------------------------- - -/** - * Return the variant plan to use for this encode call, graduating a new - * one if the observation window has closed and the cap allows. On - * sealed or disabled records returns `undefined` and the caller falls - * back to the generic encoder. - * - * Bookkeeping is lazy — the hot path pays one `Map.get` when a variant - * is hit and one bigint compute + two `Map.get/set` pairs otherwise. - * - * @internal - */ -export function selectOrObserve( - desc: DescMessage, - msg: Record, - helpers: VariantHelpers, -): VariantPlan | undefined { - const rec = getOrCreateVariants(desc); - if (rec.disableL3) return undefined; - - const sig = computeShapeHash(desc, msg); - - // Fast path: variant hit. - const hit = rec.variants.get(sig); - if (hit !== undefined) return hit; - - // Observation path. - rec.observationCount++; - if (rec.sealed) return undefined; - - const next = (rec.shapeCounter.get(sig) ?? 0) + 1; - if (next >= L3_WARMUP) { - if (rec.variants.size >= L3_VARIANT_CAP) { - // 5th unique graduation attempt — seal. - rec.sealed = true; - rec.shapeCounter.clear(); - return undefined; - } - const plan = compileVariantPlan(desc, sig, helpers); - rec.variants.set(sig, plan); - rec.shapeCounter.delete(sig); - return plan; - } - rec.shapeCounter.set(sig, next); - return undefined; -} - -// Exported for tests that need to inspect the cache. -export { variantsCache as __variantsCacheForTests };