diff --git a/benchmarks/proto/nested.proto b/benchmarks/proto/nested.proto index 22fce18bb..066a65e8d 100644 --- a/benchmarks/proto/nested.proto +++ b/benchmarks/proto/nested.proto @@ -17,18 +17,34 @@ package bench.v1; // nested.proto intentionally mirrors the shape of the OTLP trace export // request (opentelemetry.proto.collector.trace.v1.ExportTraceServiceRequest), -// simplified to the fields most traffic goes through. This is a realistic -// hotpath workload — see open-telemetry/opentelemetry-js#6221 for the -// production incident that motivates measuring this shape. +// now extended to exercise the full AnyValue oneof and a map +// so that the fast-path encoder benchmark matches a realistic OTel payload. +// See open-telemetry/opentelemetry-js#6221 for the production incident that +// motivates measuring this shape. + +// AnyValue is the oneof-typed value carried by every KeyValue attribute in +// OTLP. We include the common leaf types; array/kvlist are intentionally +// omitted — they nest through KeyValue again and would dominate the +// benchmark on a distribution we don't observe in practice. +message AnyValue { + oneof value { + string string_value = 1; + bool bool_value = 2; + int64 int_value = 3; + double double_value = 4; + bytes bytes_value = 7; + } +} message KeyValue { string key = 1; - string string_value = 2; + AnyValue value = 2; } message InstrumentationScope { string name = 1; string version = 2; + repeated KeyValue attributes = 3; } message Span { @@ -47,6 +63,11 @@ message ScopeSpans { message Resource { repeated KeyValue attributes = 1; + // `labels` exercises map encoding on the fast path. + // Real OTLP payloads don't carry a labels map, but maps show up in + // many other ConnectRPC schemas and we want the benchmark to reflect + // that fast-path cost too. + map labels = 2; } message ResourceSpans { diff --git a/benchmarks/src/bench-comparison-protobufjs.ts b/benchmarks/src/bench-comparison-protobufjs.ts index ca2768260..d609df6af 100644 --- a/benchmarks/src/bench-comparison-protobufjs.ts +++ b/benchmarks/src/bench-comparison-protobufjs.ts @@ -62,7 +62,20 @@ function buildOtelLikePayload(): Record { for (let i = 0; i < SPAN_COUNT; i++) { const attributes: unknown[] = []; for (let j = 0; j < 10; j++) { - attributes.push({ key: `k${j}`, stringValue: `v${i}-${j}` }); + // AnyValue oneof: mostly string, some int, some bool — matches the + // distribution the fast-path benchmark feeds into the reflective + // encoder via the same fixture. + let anyValue: Record; + if (j === 2 || j === 5) { + anyValue = { value: { case: "intValue", value: BigInt(200 + j) } }; + } else if (j === 8) { + anyValue = { value: { case: "boolValue", value: (i + j) % 7 === 0 } }; + } else { + anyValue = { + value: { case: "stringValue", value: `v${i}-${j}` }, + }; + } + attributes.push({ key: `k${j}`, value: anyValue }); } spans.push({ traceId: new Uint8Array(16), @@ -76,7 +89,14 @@ function buildOtelLikePayload(): Record { return { resourceSpans: [ { - resource: { attributes: [] }, + resource: { + attributes: [], + labels: { + env: "production", + region: "us-east-1", + cluster: "bench-cluster", + }, + }, scopeSpans: [ { scope: { name: "@example/tracer", version: "1.0.0" }, @@ -91,8 +111,11 @@ function buildOtelLikePayload(): Record { // protobufjs uses Long for 64-bit fields by default. We generate with // --force-long to get consistent typing; here we still pass BigInt-like // plain numbers via strings so both paths encode the same timestamp. -// Prepare a payload variant with string timestamps for pbjs, so the -// write paths are comparable (no JSON conversion cost inside the hot loop). +// Prepare a payload variant for pbjs with: +// - string timestamps (no JSON conversion cost inside the hot loop) +// - AnyValue oneof represented as a plain field rather than the +// `{ case, value }` ADT protobuf-es uses, because protobufjs stores +// oneof members directly on the parent message. function buildOtelLikePayloadForPbjs(): Record { const base = buildOtelLikePayload(); // biome-ignore lint/suspicious/noExplicitAny: in-place shape munging @@ -100,6 +123,27 @@ function buildOtelLikePayloadForPbjs(): Record { for (const span of resourceSpans.scopeSpans[0].spans) { span.startTimeUnixNano = "1700000000000000000"; span.endTimeUnixNano = "1700000000000001000"; + // biome-ignore lint/suspicious/noExplicitAny: in-place shape munging + for (const attr of span.attributes as any[]) { + const adt = attr.value?.value as + | { case: string; value: unknown } + | undefined; + if (adt) { + const pbjsKey = + adt.case === "intValue" + ? "intValue" + : adt.case === "boolValue" + ? "boolValue" + : "stringValue"; + // protobufjs also accepts int64 as string without Long. + attr.value = { + [pbjsKey]: + adt.case === "intValue" + ? (adt.value as bigint).toString() + : adt.value, + }; + } + } } return base; } diff --git a/benchmarks/src/bench-create-toBinary.ts b/benchmarks/src/bench-create-toBinary.ts index 9f5fee35c..9fd129d9e 100644 --- a/benchmarks/src/bench-create-toBinary.ts +++ b/benchmarks/src/bench-create-toBinary.ts @@ -21,6 +21,7 @@ import { Bench } from "tinybench"; import { create, toBinary, toBinaryFast } from "@bufbuild/protobuf"; import { SimpleMessageSchema } from "./gen/small_pb.js"; import { + AnyValueSchema, ExportTraceRequestSchema, ResourceSpansSchema, ScopeSpansSchema, @@ -62,7 +63,9 @@ export async function runCreateToBinaryBench() { attrs.push( create(KeyValueSchema, { key: `k${j}`, - stringValue: `v${i}-${j}`, + value: create(AnyValueSchema, { + value: { case: "stringValue", value: `v${i}-${j}` }, + }), }), ); } @@ -104,7 +107,9 @@ export async function runCreateToBinaryBench() { attrs.push( create(KeyValueSchema, { key: `k${j}`, - stringValue: `v${i}-${j}`, + value: create(AnyValueSchema, { + value: { case: "stringValue", value: `v${i}-${j}` }, + }), }), ); } diff --git a/benchmarks/src/bench-create.ts b/benchmarks/src/bench-create.ts index 96d3e8ada..9208ae800 100644 --- a/benchmarks/src/bench-create.ts +++ b/benchmarks/src/bench-create.ts @@ -20,6 +20,7 @@ import { Bench } from "tinybench"; import { create } from "@bufbuild/protobuf"; import { SimpleMessageSchema } from "./gen/small_pb.js"; import { + AnyValueSchema, ExportTraceRequestSchema, ResourceSpansSchema, ScopeSpansSchema, @@ -56,7 +57,9 @@ export async function runCreateBench() { attrs.push( create(KeyValueSchema, { key: `k${j}`, - stringValue: `v${i}-${j}`, + value: create(AnyValueSchema, { + value: { case: "stringValue", value: `v${i}-${j}` }, + }), }), ); } diff --git a/benchmarks/src/bench-memory.ts b/benchmarks/src/bench-memory.ts index 8a62488ba..7e82d8811 100644 --- a/benchmarks/src/bench-memory.ts +++ b/benchmarks/src/bench-memory.ts @@ -47,7 +47,19 @@ function buildInit(): Record { for (let i = 0; i < SPAN_COUNT; i++) { const attributes: unknown[] = []; for (let j = 0; j < 10; j++) { - attributes.push({ key: `k${j}`, stringValue: `v${i}-${j}` }); + // Mirror the distribution used in bench-comparison-protobufjs so the + // memory numbers reflect the same workload as the throughput runs. + let anyValue: Record; + if (j === 2 || j === 5) { + anyValue = { value: { case: "intValue", value: BigInt(200 + j) } }; + } else if (j === 8) { + anyValue = { value: { case: "boolValue", value: (i + j) % 7 === 0 } }; + } else { + anyValue = { + value: { case: "stringValue", value: `v${i}-${j}` }, + }; + } + attributes.push({ key: `k${j}`, value: anyValue }); } spans.push({ traceId: new Uint8Array(16), @@ -61,7 +73,14 @@ function buildInit(): Record { return { resourceSpans: [ { - resource: { attributes: [] }, + resource: { + attributes: [], + labels: { + env: "production", + region: "us-east-1", + cluster: "bench-cluster", + }, + }, scopeSpans: [ { scope: { name: "@example/tracer", version: "1.0.0" }, @@ -80,6 +99,26 @@ function buildInitForPbjs(): Record { for (const span of resourceSpans.scopeSpans[0].spans) { span.startTimeUnixNano = "1700000000000000000"; span.endTimeUnixNano = "1700000000000001000"; + // biome-ignore lint/suspicious/noExplicitAny: in-place shape munging + for (const attr of span.attributes as any[]) { + const adt = attr.value?.value as + | { case: string; value: unknown } + | undefined; + if (adt) { + const pbjsKey = + adt.case === "intValue" + ? "intValue" + : adt.case === "boolValue" + ? "boolValue" + : "stringValue"; + attr.value = { + [pbjsKey]: + adt.case === "intValue" + ? (adt.value as bigint).toString() + : adt.value, + }; + } + } } return base; } diff --git a/benchmarks/src/fixtures.ts b/benchmarks/src/fixtures.ts index 85f67b15c..01ed36c0c 100644 --- a/benchmarks/src/fixtures.ts +++ b/benchmarks/src/fixtures.ts @@ -15,6 +15,7 @@ import { create } from "@bufbuild/protobuf"; import { SimpleMessageSchema, type SimpleMessage } from "./gen/small_pb.js"; import { + AnyValueSchema, ExportTraceRequestSchema, type ExportTraceRequest, KeyValueSchema, @@ -50,26 +51,44 @@ function bytes(seed: number, length: number): Uint8Array { } // Mimics OTLP attribute cardinality in realistic trace exports: -// ~10 attributes per span, short ASCII keys and values. +// ~10 attributes per span, short ASCII keys, and a mix of AnyValue leaf +// types so that the oneof dispatch on the fast path gets exercised on +// every variant we encode in production (string dominates; bool, int, +// and double show up on status / error flags / durations). function buildAttributes(spanIdx: number) { const out = [] as ReturnType>[]; - const keys = [ - "http.method", - "http.url", - "http.status_code", - "http.user_agent", - "net.peer.name", - "net.peer.port", - "service.name", - "service.version", - "deployment.environment", - "rpc.system", + const descriptors = [ + { key: "http.method", kind: "string" as const }, + { key: "http.url", kind: "string" as const }, + { key: "http.status_code", kind: "int" as const }, + { key: "http.user_agent", kind: "string" as const }, + { key: "net.peer.name", kind: "string" as const }, + { key: "net.peer.port", kind: "int" as const }, + { key: "service.name", kind: "string" as const }, + { key: "service.version", kind: "string" as const }, + { key: "error", kind: "bool" as const }, + { key: "rpc.system", kind: "string" as const }, ]; - for (let i = 0; i < keys.length; i++) { + for (let i = 0; i < descriptors.length; i++) { + const d = descriptors[i]; + let anyInit: Parameters>[1]; + if (d.kind === "string") { + anyInit = { + value: { case: "stringValue", value: `value-${spanIdx}-${i}` }, + }; + } else if (d.kind === "int") { + anyInit = { + value: { case: "intValue", value: BigInt(200 + (i % 5)) }, + }; + } else { + anyInit = { + value: { case: "boolValue", value: (spanIdx + i) % 7 === 0 }, + }; + } out.push( create(KeyValueSchema, { - key: keys[i], - stringValue: `value-${spanIdx}-${i}`, + key: d.key, + value: create(AnyValueSchema, anyInit), }), ); } @@ -109,13 +128,26 @@ export function buildExportTraceRequest(): ExportTraceRequest { attributes: [ create(KeyValueSchema, { key: "service.name", - stringValue: "bench-service", + value: create(AnyValueSchema, { + value: { case: "stringValue", value: "bench-service" }, + }), }), create(KeyValueSchema, { key: "service.version", - stringValue: "1.0.0", + value: create(AnyValueSchema, { + value: { case: "stringValue", value: "1.0.0" }, + }), }), ], + // Exercise map encoding on the fast path. Realistic + // cardinality: a handful of per-process deployment labels. + labels: { + env: "production", + region: "us-east-1", + cluster: "bench-cluster", + az: "us-east-1a", + tenant: "bench-tenant", + }, }); const resourceSpans = create(ResourceSpansSchema, { resource, @@ -132,25 +164,31 @@ export function buildExportTraceRequest(): ExportTraceRequest { // re-parse via fromJsonString). export function buildExportTraceRequestJsonShape() { const spans = [] as unknown[]; - const attributeKeys = [ - "http.method", - "http.url", - "http.status_code", - "http.user_agent", - "net.peer.name", - "net.peer.port", - "service.name", - "service.version", - "deployment.environment", - "rpc.system", + const attributeDescriptors = [ + { key: "http.method", kind: "string" as const }, + { key: "http.url", kind: "string" as const }, + { key: "http.status_code", kind: "int" as const }, + { key: "http.user_agent", kind: "string" as const }, + { key: "net.peer.name", kind: "string" as const }, + { key: "net.peer.port", kind: "int" as const }, + { key: "service.name", kind: "string" as const }, + { key: "service.version", kind: "string" as const }, + { key: "error", kind: "bool" as const }, + { key: "rpc.system", kind: "string" as const }, ]; for (let i = 0; i < SPAN_COUNT; i++) { const attributes: unknown[] = []; - for (let j = 0; j < attributeKeys.length; j++) { - attributes.push({ - key: attributeKeys[j], - stringValue: `value-${i}-${j}`, - }); + for (let j = 0; j < attributeDescriptors.length; j++) { + const d = attributeDescriptors[j]; + let value: unknown; + if (d.kind === "string") { + value = { case: "stringValue", value: `value-${i}-${j}` }; + } else if (d.kind === "int") { + value = { case: "intValue", value: (200 + (j % 5)).toString() }; + } else { + value = { case: "boolValue", value: (i + j) % 7 === 0 }; + } + attributes.push({ key: d.key, value }); } spans.push({ traceId: bytes(i, 16), @@ -172,9 +210,22 @@ export function buildExportTraceRequestJsonShape() { { resource: { attributes: [ - { key: "service.name", stringValue: "bench-service" }, - { key: "service.version", stringValue: "1.0.0" }, + { + key: "service.name", + value: { case: "stringValue", value: "bench-service" }, + }, + { + key: "service.version", + value: { case: "stringValue", value: "1.0.0" }, + }, ], + labels: { + env: "production", + region: "us-east-1", + cluster: "bench-cluster", + az: "us-east-1a", + tenant: "bench-tenant", + }, }, scopeSpans: [ { diff --git a/packages/protobuf-test/src/to-binary-fast.test.ts b/packages/protobuf-test/src/to-binary-fast.test.ts new file mode 100644 index 000000000..4666f0377 --- /dev/null +++ b/packages/protobuf-test/src/to-binary-fast.test.ts @@ -0,0 +1,257 @@ +// Copyright 2021-2026 Buf Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Feature-coverage tests for the experimental `toBinaryFast` encoder. +// The load-bearing claim is byte-identical output against the reflective +// `toBinary` for the feature surfaces the fast path claims to handle — +// maps (every legal K, every legal V) and oneofs (scalar, message, enum). +// Semantic round-trip is also asserted as a defense-in-depth check. + +import { suite, test } from "node:test"; +import * as assert from "node:assert"; +import { + create, + toBinary, + toBinaryFast, + fromBinary, + protoInt64, +} from "@bufbuild/protobuf"; +import { + MapsMessageSchema, + MapsEnum, +} from "./gen/ts/extra/msg-maps_pb.js"; +import { + OneofMessageSchema, + OneofMessageFooSchema, + OneofMessageBarSchema, + OneofEnum, +} from "./gen/ts/extra/msg-oneof_pb.js"; +import { ScalarValuesMessageSchema } from "./gen/ts/extra/msg-scalar_pb.js"; + +void suite("toBinaryFast", () => { + void suite("map field parity", () => { + test("map with scalar/bytes values", () => { + const msg = create(MapsMessageSchema, { + strStrField: { a: "alpha", b: "beta", c: "gamma" }, + strInt32Field: { a: 1, b: -2, c: 0x7fff_ffff }, + strInt64Field: { + a: protoInt64.parse(1), + // Literal `n` requires ES2020; this package is compiled for ES2017. + b: protoInt64.parse(BigInt("-9007199254740993")), + }, + strBoolField: { true_key: true, false_key: false }, + strBytesField: { + a: new Uint8Array([0, 1, 2, 3]), + b: new Uint8Array([0xff, 0xfe]), + }, + }); + const slow = toBinary(MapsMessageSchema, msg); + const fast = toBinaryFast(MapsMessageSchema, msg); + assert.deepStrictEqual( + Array.from(fast), + Array.from(slow), + "byte-identical expected for string-keyed maps", + ); + assert.deepStrictEqual( + fromBinary(MapsMessageSchema, fast), + fromBinary(MapsMessageSchema, slow), + ); + }); + + test("map and map keys parse and encode", () => { + const msg = create(MapsMessageSchema, { + int32StrField: { 1: "one", [-2]: "neg-two", 100: "hundred" }, + int64StrField: { + "1": "one", + "-2": "neg-two", + "9007199254740993": "big", + }, + }); + const slow = toBinary(MapsMessageSchema, msg); + const fast = toBinaryFast(MapsMessageSchema, msg); + // Byte-identical requires same field ordering and same map iteration + // order. Both encoders iterate descriptor order + Object.keys order, + // so parity should hold. + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + assert.deepStrictEqual( + fromBinary(MapsMessageSchema, fast), + fromBinary(MapsMessageSchema, slow), + ); + }); + + test("map", () => { + const msg = create(MapsMessageSchema, { + boolStrField: { true: "yes", false: "no" }, + }); + const slow = toBinary(MapsMessageSchema, msg); + const fast = toBinaryFast(MapsMessageSchema, msg); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + }); + + test("map<*,message> encodes the value submessage", () => { + const inner = create(MapsMessageSchema, { + strStrField: { nested: "ok" }, + }); + const msg = create(MapsMessageSchema, { + strMsgField: { first: inner, second: inner }, + int32MsgField: { 1: inner, 2: inner }, + }); + const slow = toBinary(MapsMessageSchema, msg); + const fast = toBinaryFast(MapsMessageSchema, msg); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + assert.deepStrictEqual( + fromBinary(MapsMessageSchema, fast), + fromBinary(MapsMessageSchema, slow), + ); + }); + + test("map<*,enum>", () => { + const msg = create(MapsMessageSchema, { + strEnuField: { a: MapsEnum.YES, b: MapsEnum.NO }, + int32EnuField: { 1: MapsEnum.YES, 2: MapsEnum.NO }, + }); + const slow = toBinary(MapsMessageSchema, msg); + const fast = toBinaryFast(MapsMessageSchema, msg); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + }); + + test("empty maps do not emit anything", () => { + const msg = create(MapsMessageSchema, {}); + const slow = toBinary(MapsMessageSchema, msg); + const fast = toBinaryFast(MapsMessageSchema, msg); + assert.strictEqual(fast.length, 0); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + }); + }); + + void suite("oneof parity", () => { + test("scalar oneof — int value case", () => { + const msg = create(OneofMessageSchema, { + scalar: { case: "value", value: 42 }, + }); + const slow = toBinary(OneofMessageSchema, msg); + const fast = toBinaryFast(OneofMessageSchema, msg); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + }); + + test("scalar oneof — zero value must still be emitted", () => { + // Oneof presence is carried by the discriminator, so a `value: 0` + // case is *still* considered set. This is the tricky corner that + // the fast-path oneof dispatch has to get right (a non-oneof + // IMPLICIT int with value 0 would be omitted). + const msg = create(OneofMessageSchema, { + scalar: { case: "value", value: 0 }, + }); + const slow = toBinary(OneofMessageSchema, msg); + const fast = toBinaryFast(OneofMessageSchema, msg); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + assert.ok(fast.length > 0, "expected tag+value for zero-valued oneof"); + }); + + test("scalar oneof — string case with empty string", () => { + const msg = create(OneofMessageSchema, { + scalar: { case: "error", value: "" }, + }); + const slow = toBinary(OneofMessageSchema, msg); + const fast = toBinaryFast(OneofMessageSchema, msg); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + }); + + test("scalar oneof — bytes case", () => { + const msg = create(OneofMessageSchema, { + scalar: { case: "bytes", value: new Uint8Array([1, 2, 3, 255]) }, + }); + const slow = toBinary(OneofMessageSchema, msg); + const fast = toBinaryFast(OneofMessageSchema, msg); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + }); + + test("message oneof — foo case", () => { + const foo = create(OneofMessageFooSchema, { + name: "hello", + toggle: true, + }); + const msg = create(OneofMessageSchema, { + message: { case: "foo", value: foo }, + }); + const slow = toBinary(OneofMessageSchema, msg); + const fast = toBinaryFast(OneofMessageSchema, msg); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + }); + + test("message oneof — bar case", () => { + const bar = create(OneofMessageBarSchema, { a: 3, b: 4 }); + const msg = create(OneofMessageSchema, { + message: { case: "bar", value: bar }, + }); + const slow = toBinary(OneofMessageSchema, msg); + const fast = toBinaryFast(OneofMessageSchema, msg); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + }); + + test("enum oneof", () => { + const msg = create(OneofMessageSchema, { + enum: { case: "e", value: OneofEnum.A }, + }); + const slow = toBinary(OneofMessageSchema, msg); + const fast = toBinaryFast(OneofMessageSchema, msg); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + }); + + test("multiple oneof groups each contribute their selected case", () => { + const foo = create(OneofMessageFooSchema, { name: "n", toggle: false }); + const msg = create(OneofMessageSchema, { + scalar: { case: "value", value: 7 }, + message: { case: "foo", value: foo }, + enum: { case: "e", value: OneofEnum.B }, + }); + const slow = toBinary(OneofMessageSchema, msg); + const fast = toBinaryFast(OneofMessageSchema, msg); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + }); + + test("empty oneofs emit nothing", () => { + const msg = create(OneofMessageSchema, {}); + const slow = toBinary(OneofMessageSchema, msg); + const fast = toBinaryFast(OneofMessageSchema, msg); + assert.strictEqual(fast.length, 0); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + }); + }); + + void suite("regression — scalars still match", () => { + test("ScalarValuesMessage parity", () => { + const msg = create(ScalarValuesMessageSchema, { + doubleField: 0.75, + floatField: -0.75, + int64Field: protoInt64.parse(-1), + uint64Field: protoInt64.uParse(1), + int32Field: -123, + fixed64Field: protoInt64.uParse(1), + fixed32Field: 123, + boolField: true, + stringField: "hello world", + bytesField: new Uint8Array([1, 2, 3]), + uint32Field: 42, + sfixed32Field: -42, + sfixed64Field: protoInt64.parse(-42), + sint32Field: -42, + sint64Field: protoInt64.parse(-42), + }); + const slow = toBinary(ScalarValuesMessageSchema, msg); + const fast = toBinaryFast(ScalarValuesMessageSchema, msg); + assert.deepStrictEqual(Array.from(fast), Array.from(slow)); + }); + }); +}); diff --git a/packages/protobuf/src/to-binary-fast.ts b/packages/protobuf/src/to-binary-fast.ts index 5f334e64a..4ea562247 100644 --- a/packages/protobuf/src/to-binary-fast.ts +++ b/packages/protobuf/src/to-binary-fast.ts @@ -37,11 +37,12 @@ // single tight loop with no intermediate `Uint8Array`/`number[]` objects // per field. // -// Scope (MVP): +// Scope: // - supported: scalar fields (all 15 types), enums, nested messages, -// repeated scalar (packed + unpacked), repeated message -// - unsupported: map fields, oneof groups, extensions, delimited/group -// encoding, unknown fields +// repeated scalar (packed + unpacked), repeated message, +// map for all legal K and any scalar/enum/message V, +// oneof groups +// - unsupported: extensions, delimited/group encoding, unknown fields // // For unsupported schemas `toBinaryFast` falls back to the existing // reflective `toBinary`. The decision is computed once per `DescMessage` @@ -55,7 +56,12 @@ // `toBinary`'s non-unknown path, but future tweaks may diverge). import type { MessageShape } from "./types.js"; -import { ScalarType, type DescField, type DescMessage } from "./descriptors.js"; +import { + ScalarType, + type DescField, + type DescMessage, + type DescOneof, +} from "./descriptors.js"; import { protoInt64 } from "./proto-int64.js"; import { toBinary } from "./to-binary.js"; import { getTextEncoding } from "./wire/text-encoding.js"; @@ -66,6 +72,13 @@ import { getTextEncoding } from "./wire/text-encoding.js"; const supportCache = new WeakMap(); +// `0n` requires target >= ES2020, but this package is compiled for ES2017. +// Materialize the bigint zero once at module load so closures can compare +// against it without the BigInt() call on the hot path. Marked PURE so +// unused-path eliminators (esbuild, Rollup, Terser) can drop this module +// when toBinaryFast is never referenced. +const BIGINT_ZERO = /*@__PURE__*/ BigInt(0); + /** * Walk the descriptor (including transitive message fields) and return * true iff every field in the subtree uses an MVP-supported shape. The @@ -87,7 +100,11 @@ function isSupported( let ok = true; for (const field of desc.fields) { - // Delimited (group) encoding is not handled in the MVP. + // Delimited (group) encoding is not handled — the legacy wire format + // requires paired start/end tags which don't fit the single-pass + // write model. Map fields and message-typed map values cannot use + // delimited encoding (enforced by the descriptor), so we only need + // to check singular messages and repeated messages. if ( (field.fieldKind === "message" || (field.fieldKind === "list" && field.listKind === "message")) && @@ -96,15 +113,6 @@ function isSupported( ok = false; break; } - // Maps and oneofs are not handled in the MVP. - if (field.fieldKind === "map") { - ok = false; - break; - } - if (field.oneof !== undefined) { - ok = false; - break; - } // Recurse into message fields. if (field.fieldKind === "message" && field.message) { if (!isSupported(field.message, visiting)) { @@ -122,6 +130,17 @@ function isSupported( break; } } + // Recurse into map value messages. + if ( + field.fieldKind === "map" && + field.mapKind === "message" && + field.message + ) { + if (!isSupported(field.message, visiting)) { + ok = false; + break; + } + } } visiting.delete(desc); supportCache.set(desc, ok); @@ -287,28 +306,22 @@ function scalarWireType(type: ScalarType): number { } } -/** Should this field be emitted for the given message? */ -function isFieldSet( - field: DescField, - value: unknown, - message: Record, -): boolean { - // Explicit presence (proto2 / proto3 optional / synthetic oneof): the - // generated setters only assign when the property was set. Missing ⇒ - // undefined in the message object. +/** + * Should this non-oneof field be emitted for the given message? + * Oneof members are dispatched separately and never flow through this + * predicate. + */ +function isFieldSet(field: DescField, value: unknown): boolean { + // Explicit presence (proto2 / proto3 optional): the generated setters + // only assign when the property was set. Missing ⇒ undefined. if (value === undefined || value === null) return false; // Implicit presence (proto3 singular scalar/enum): zero value means // "not set" and must not be emitted. Lists/maps handled separately - // (empty list means "not set" too). + // (empty list/map means "not set" too). switch (field.fieldKind) { case "scalar": { const t = field.scalar; - // Implicit presence is the default in proto3; we omit zero values. - // For proto2 explicit-presence scalars the codegen emits undefined - // when unset, so this branch wouldn't fire. Checking presence on - // the descriptor would be more precise but also more expensive on - // the hot path — mirror the existing reflect semantics here. if (field.presence !== 2 /* IMPLICIT */) { // Explicit / legacy required: any defined value counts as set. return true; @@ -324,8 +337,9 @@ function isFieldSet( t === ScalarType.SFIXED64 ) { // bigint zero, numeric zero, "0" string all represent unset. - // biome-ignore lint/suspicious/noDoubleEquals: 0n == 0 == "0" intentionally. - return value != 0; + // Compare via coercion so 0n / 0 / "0" all return false. + // Literal `0n` requires ES2020; see BIGINT_ZERO above. + return value !== 0 && value !== BIGINT_ZERO && value !== "0"; } return (value as number) !== 0; } @@ -337,19 +351,199 @@ function isFieldSet( case "list": return (value as unknown[]).length > 0; case "map": - // Map fields are a blocker in isSupported; we should never get - // here, but be defensive. + // Map fields carry their own "any entry" gate here — empty object + // ⇒ not set ⇒ omit. Same semantics as reflect.unsafeIsSet. return Object.keys(value as object).length > 0; } - // Unreachable — above switch is exhaustive for DescField.fieldKind. - // Return true so that an unexpected shape surfaces as an error during - // the size/write mismatch check rather than silent data loss. + // Exhaustive switch; unreachable. Return true so unexpected shapes + // surface as a size/write mismatch error rather than silent data loss. return true; - // Parameter `message` kept for future use (e.g. oneof case disambiguation); - // currently unused but left in the signature to avoid churn when we - // expand the supported surface. - // biome-ignore lint/correctness/noUnreachable: documentation. - void message; +} + +// ----------------------------------------------------------------------------- +// Map key helpers +// ----------------------------------------------------------------------------- +// +// protobuf-es stores map fields as plain JS objects keyed by the stringified +// map key (see reflectMap.mapKeyToLocal). On the fast path we iterate +// `Object.keys`, so every key we see is a string. For integer and boolean +// map keys we parse back to the typed value before computing the scalar +// size or writing the scalar bytes — matching what the reflective encoder +// does via ReflectMap's iterator. + +type MapKeyScalar = Exclude< + ScalarType, + ScalarType.FLOAT | ScalarType.DOUBLE | ScalarType.BYTES +>; + +function coerceMapKey(stringKey: string, keyType: MapKeyScalar): unknown { + switch (keyType) { + case ScalarType.STRING: + return stringKey; + case ScalarType.BOOL: + // Object keys for boolean maps are always "true" / "false" strings. + return stringKey === "true"; + case ScalarType.INT64: + case ScalarType.SINT64: + case ScalarType.SFIXED64: + return protoInt64.parse(stringKey); + case ScalarType.UINT64: + case ScalarType.FIXED64: + return protoInt64.uParse(stringKey); + default: + // INT32, SINT32, FIXED32, SFIXED32, UINT32 — parse back to number. + return Number.parseInt(stringKey, 10); + } +} + +/** + * Body-size of a single map entry message `{ key, value }`, excluding + * the outer field tag and length prefix. Returns both the body size and, + * for message-typed values, the submessage body size (so the writer + * doesn't recompute it). + */ +function estimateMapEntryBody( + field: DescField & { fieldKind: "map" }, + keyTyped: unknown, + value: unknown, + sizes: SizeMap, +): { body: number; valueSubSize: number } { + // Entry key is always field number 1. + const keySize = + tagSize(1, scalarWireType(field.mapKey)) + scalarSize(field.mapKey, keyTyped); + let valSize: number; + let valueSubSize = 0; + switch (field.mapKind) { + case "scalar": + valSize = + tagSize(2, scalarWireType(field.scalar)) + + scalarSize(field.scalar, value); + break; + case "enum": + valSize = tagSize(2, WIRE_VARINT) + int32Size(value as number); + break; + case "message": { + const sub = value as Record; + valueSubSize = estimateMessageSize(field.message, sub, sizes); + sizes.set(sub, valueSubSize); + valSize = + tagSize(2, WIRE_LENGTH_DELIMITED) + + varintSize32(valueSubSize) + + valueSubSize; + break; + } + } + return { body: keySize + valSize, valueSubSize }; +} + +/** + * Size contribution of a map field: for every entry, an outer tag + length + * prefix + entry body. Map entries are always length-delimited — map fields + * cannot use delimited (group) encoding. + */ +function estimateMapFieldSize( + field: DescField & { fieldKind: "map" }, + obj: Record, + sizes: SizeMap, +): number { + const tagBytes = tagSize(field.number, WIRE_LENGTH_DELIMITED); + let size = 0; + for (const strKey of Object.keys(obj)) { + const keyTyped = coerceMapKey(strKey, field.mapKey); + const { body } = estimateMapEntryBody(field, keyTyped, obj[strKey], sizes); + size += tagBytes + varintSize32(body) + body; + } + return size; +} + +/** + * Size contribution of a single non-oneof non-map "regular" field. Broken + * out so that the oneof dispatch can reuse the same switch. + */ +function estimateRegularFieldSize( + field: DescField, + value: unknown, + sizes: SizeMap, +): number { + switch (field.fieldKind) { + case "scalar": + return ( + tagSize(field.number, scalarWireType(field.scalar)) + + scalarSize(field.scalar, value) + ); + case "enum": + return tagSize(field.number, WIRE_VARINT) + int32Size(value as number); + case "message": { + const sub = value as Record; + const subSize = estimateMessageSize(field.message, sub, sizes); + sizes.set(sub, subSize); + return ( + tagSize(field.number, WIRE_LENGTH_DELIMITED) + + varintSize32(subSize) + + subSize + ); + } + case "list": { + const list = value as unknown[]; + let size = 0; + if (field.listKind === "message") { + const tagBytes = tagSize(field.number, WIRE_LENGTH_DELIMITED); + for (let k = 0; k < list.length; k++) { + const sub = list[k] as Record; + const subSize = estimateMessageSize(field.message, sub, sizes); + sizes.set(sub, subSize); + size += tagBytes + varintSize32(subSize) + subSize; + } + return size; + } + if (field.listKind === "enum") { + if (field.packed) { + let body = 0; + for (let k = 0; k < list.length; k++) { + body += int32Size(list[k] as number); + } + return ( + tagSize(field.number, WIRE_LENGTH_DELIMITED) + + varintSize32(body) + + body + ); + } + const tagBytes = tagSize(field.number, WIRE_VARINT); + for (let k = 0; k < list.length; k++) { + size += tagBytes + int32Size(list[k] as number); + } + return size; + } + // listKind === "scalar" + const t = field.scalar; + const wt = scalarWireType(t); + if (field.packed && wt !== WIRE_LENGTH_DELIMITED) { + let body = 0; + for (let k = 0; k < list.length; k++) { + body += scalarSize(t, list[k]); + } + return ( + tagSize(field.number, WIRE_LENGTH_DELIMITED) + + varintSize32(body) + + body + ); + } + const tagBytes = tagSize(field.number, wt); + for (let k = 0; k < list.length; k++) { + size += tagBytes + scalarSize(t, list[k]); + } + return size; + } + case "map": + // Map fields flow through estimateMapFieldSize; this branch is + // defensive and never taken on the estimation hot path. + return estimateMapFieldSize( + field as DescField & { fieldKind: "map" }, + value as Record, + sizes, + ); + } + return 0; } function estimateMessageSize( @@ -361,84 +555,55 @@ function estimateMessageSize( const fields = desc.fields; for (let i = 0; i < fields.length; i++) { const field = fields[i]; - const value = message[field.localName]; - if (!isFieldSet(field, value, message)) continue; + // Oneof members are dispatched via the `desc.oneofs` loop below. + if (field.oneof !== undefined) continue; - switch (field.fieldKind) { - case "scalar": { - size += tagSize(field.number, scalarWireType(field.scalar)); - size += scalarSize(field.scalar, value); - break; - } - case "enum": { - size += tagSize(field.number, WIRE_VARINT); - size += int32Size(value as number); - break; - } - case "message": { - const sub = value as Record; - const subSize = estimateMessageSize(field.message, sub, sizes); - sizes.set(sub, subSize); - size += - tagSize(field.number, WIRE_LENGTH_DELIMITED) + - varintSize32(subSize) + - subSize; - break; - } - case "list": { - const list = value as unknown[]; - if (field.listKind === "message") { - const tagBytes = tagSize(field.number, WIRE_LENGTH_DELIMITED); - for (let k = 0; k < list.length; k++) { - const sub = list[k] as Record; - const subSize = estimateMessageSize(field.message, sub, sizes); - sizes.set(sub, subSize); - size += tagBytes + varintSize32(subSize) + subSize; - } - } else if (field.listKind === "enum") { - if (field.packed) { - let body = 0; - for (let k = 0; k < list.length; k++) { - body += int32Size(list[k] as number); - } - size += - tagSize(field.number, WIRE_LENGTH_DELIMITED) + - varintSize32(body) + - body; - } else { - const tagBytes = tagSize(field.number, WIRE_VARINT); - for (let k = 0; k < list.length; k++) { - size += tagBytes + int32Size(list[k] as number); - } - } - } else { - // listKind === "scalar" - const t = field.scalar; - const wt = scalarWireType(t); - if (field.packed && wt !== WIRE_LENGTH_DELIMITED) { - let body = 0; - for (let k = 0; k < list.length; k++) { - body += scalarSize(t, list[k]); - } - size += - tagSize(field.number, WIRE_LENGTH_DELIMITED) + - varintSize32(body) + - body; - } else { - const tagBytes = tagSize(field.number, wt); - for (let k = 0; k < list.length; k++) { - size += tagBytes + scalarSize(t, list[k]); - } - } - } - break; - } - // map / oneof: filtered out by isSupported; unreachable. + if (field.fieldKind === "map") { + const obj = message[field.localName] as + | Record + | undefined; + if (!obj || Object.keys(obj).length === 0) continue; + size += estimateMapFieldSize( + field as DescField & { fieldKind: "map" }, + obj, + sizes, + ); + continue; } + + const value = message[field.localName]; + if (!isFieldSet(field, value)) continue; + size += estimateRegularFieldSize(field, value, sizes); + } + // Oneof dispatch: at most one field per oneof contributes, identified by + // the `case` discriminator on the oneof ADT object. Zero values are + // emitted when a oneof case is explicitly set — that's the whole point + // of the oneof: presence is carried by the discriminator, not by value. + const oneofs = desc.oneofs; + for (let i = 0; i < oneofs.length; i++) { + const oneof = oneofs[i]; + const adt = message[oneof.localName] as + | { case: string | undefined; value?: unknown } + | undefined; + if (!adt || adt.case === undefined) continue; + const selected = findOneofField(oneof, adt.case); + if (!selected) continue; + size += estimateRegularFieldSize(selected, adt.value, sizes); } return size; } +function findOneofField( + oneof: DescOneof, + caseName: string, +): DescField | undefined { + const fs = oneof.fields; + for (let i = 0; i < fs.length; i++) { + if (fs[i].localName === caseName) return fs[i]; + } + return undefined; +} + // ----------------------------------------------------------------------------- // Pass 2 — write into pre-allocated buffer // ----------------------------------------------------------------------------- @@ -587,88 +752,190 @@ function writeScalar(c: Cursor, type: ScalarType, value: unknown): void { } } -function writeMessageInto( +function writeMapEntry( c: Cursor, - desc: DescMessage, - message: Record, + field: DescField & { fieldKind: "map" }, + keyTyped: unknown, + value: unknown, sizes: SizeMap, ): void { - const fields = desc.fields; - for (let i = 0; i < fields.length; i++) { - const field = fields[i]; - const value = message[field.localName]; - if (!isFieldSet(field, value, message)) continue; + // Entry key: field number 1. + writeTag(c, 1, scalarWireType(field.mapKey)); + writeScalar(c, field.mapKey, keyTyped); + // Entry value: field number 2. + switch (field.mapKind) { + case "scalar": + writeTag(c, 2, scalarWireType(field.scalar)); + writeScalar(c, field.scalar, value); + return; + case "enum": + writeTag(c, 2, WIRE_VARINT); + writeInt32(c, value as number); + return; + case "message": { + const sub = value as Record; + const subSize = sizes.get(sub) ?? 0; + writeTag(c, 2, WIRE_LENGTH_DELIMITED); + writeVarint32(c, subSize); + writeMessageInto(c, field.message, sub, sizes); + return; + } + } +} - switch (field.fieldKind) { - case "scalar": { - writeTag(c, field.number, scalarWireType(field.scalar)); - writeScalar(c, field.scalar, value); - break; - } - case "enum": { - writeTag(c, field.number, WIRE_VARINT); - writeInt32(c, value as number); - break; - } - case "message": { - const sub = value as Record; - const subSize = sizes.get(sub) ?? 0; - writeTag(c, field.number, WIRE_LENGTH_DELIMITED); - writeVarint32(c, subSize); - writeMessageInto(c, field.message, sub, sizes); - break; +function writeMapField( + c: Cursor, + field: DescField & { fieldKind: "map" }, + obj: Record, + sizes: SizeMap, +): void { + for (const strKey of Object.keys(obj)) { + const keyTyped = coerceMapKey(strKey, field.mapKey); + const value = obj[strKey]; + // Body size is recomputed here rather than cached because caching it + // per-entry would require either (1) a second identity-keyed cache + // separate from `sizes` or (2) wrapping each entry in a synthetic + // object. Recompute is cheap — scalar types only, except for the + // `value` submessage which reads from `sizes` anyway. + const { body } = estimateMapEntryBody(field, keyTyped, value, sizes); + writeTag(c, field.number, WIRE_LENGTH_DELIMITED); + writeVarint32(c, body); + writeMapEntry(c, field, keyTyped, value, sizes); + } +} + +/** + * Write one non-oneof non-map field. Matches estimateRegularFieldSize + * exactly so that pass 1 and pass 2 stay in sync. + */ +function writeRegularField( + c: Cursor, + field: DescField, + value: unknown, + sizes: SizeMap, +): void { + switch (field.fieldKind) { + case "scalar": + writeTag(c, field.number, scalarWireType(field.scalar)); + writeScalar(c, field.scalar, value); + return; + case "enum": + writeTag(c, field.number, WIRE_VARINT); + writeInt32(c, value as number); + return; + case "message": { + const sub = value as Record; + const subSize = sizes.get(sub) ?? 0; + writeTag(c, field.number, WIRE_LENGTH_DELIMITED); + writeVarint32(c, subSize); + writeMessageInto(c, field.message, sub, sizes); + return; + } + case "list": { + const list = value as unknown[]; + if (field.listKind === "message") { + for (let k = 0; k < list.length; k++) { + const sub = list[k] as Record; + const subSize = sizes.get(sub) ?? 0; + writeTag(c, field.number, WIRE_LENGTH_DELIMITED); + writeVarint32(c, subSize); + writeMessageInto(c, field.message, sub, sizes); + } + return; } - case "list": { - const list = value as unknown[]; - if (field.listKind === "message") { + if (field.listKind === "enum") { + if (field.packed) { + let body = 0; for (let k = 0; k < list.length; k++) { - const sub = list[k] as Record; - const subSize = sizes.get(sub) ?? 0; - writeTag(c, field.number, WIRE_LENGTH_DELIMITED); - writeVarint32(c, subSize); - writeMessageInto(c, field.message, sub, sizes); - } - } else if (field.listKind === "enum") { - if (field.packed) { - let body = 0; - for (let k = 0; k < list.length; k++) { - body += int32Size(list[k] as number); - } - writeTag(c, field.number, WIRE_LENGTH_DELIMITED); - writeVarint32(c, body); - for (let k = 0; k < list.length; k++) { - writeInt32(c, list[k] as number); - } - } else { - for (let k = 0; k < list.length; k++) { - writeTag(c, field.number, WIRE_VARINT); - writeInt32(c, list[k] as number); - } + body += int32Size(list[k] as number); } - } else { - // scalar list - const t = field.scalar; - const wt = scalarWireType(t); - if (field.packed && wt !== WIRE_LENGTH_DELIMITED) { - let body = 0; - for (let k = 0; k < list.length; k++) { - body += scalarSize(t, list[k]); - } - writeTag(c, field.number, WIRE_LENGTH_DELIMITED); - writeVarint32(c, body); - for (let k = 0; k < list.length; k++) { - writeScalar(c, t, list[k]); - } - } else { - for (let k = 0; k < list.length; k++) { - writeTag(c, field.number, wt); - writeScalar(c, t, list[k]); - } + writeTag(c, field.number, WIRE_LENGTH_DELIMITED); + writeVarint32(c, body); + for (let k = 0; k < list.length; k++) { + writeInt32(c, list[k] as number); } + return; } - break; + for (let k = 0; k < list.length; k++) { + writeTag(c, field.number, WIRE_VARINT); + writeInt32(c, list[k] as number); + } + return; + } + // scalar list + const t = field.scalar; + const wt = scalarWireType(t); + if (field.packed && wt !== WIRE_LENGTH_DELIMITED) { + let body = 0; + for (let k = 0; k < list.length; k++) { + body += scalarSize(t, list[k]); + } + writeTag(c, field.number, WIRE_LENGTH_DELIMITED); + writeVarint32(c, body); + for (let k = 0; k < list.length; k++) { + writeScalar(c, t, list[k]); + } + return; } + for (let k = 0; k < list.length; k++) { + writeTag(c, field.number, wt); + writeScalar(c, t, list[k]); + } + return; + } + case "map": + // Map fields are dispatched through writeMapField from the caller; + // this branch is unreachable on the hot path but defensive. + writeMapField( + c, + field as DescField & { fieldKind: "map" }, + value as Record, + sizes, + ); + return; + } +} + +function writeMessageInto( + c: Cursor, + desc: DescMessage, + message: Record, + sizes: SizeMap, +): void { + const fields = desc.fields; + for (let i = 0; i < fields.length; i++) { + const field = fields[i]; + // Oneof members: dispatched via the oneof loop below. + if (field.oneof !== undefined) continue; + + if (field.fieldKind === "map") { + const obj = message[field.localName] as + | Record + | undefined; + if (!obj || Object.keys(obj).length === 0) continue; + writeMapField( + c, + field as DescField & { fieldKind: "map" }, + obj, + sizes, + ); + continue; } + + const value = message[field.localName]; + if (!isFieldSet(field, value)) continue; + writeRegularField(c, field, value, sizes); + } + const oneofs = desc.oneofs; + for (let i = 0; i < oneofs.length; i++) { + const oneof = oneofs[i]; + const adt = message[oneof.localName] as + | { case: string | undefined; value?: unknown } + | undefined; + if (!adt || adt.case === undefined) continue; + const selected = findOneofField(oneof, adt.case); + if (!selected) continue; + writeRegularField(c, selected, adt.value, sizes); } } @@ -681,9 +948,9 @@ function writeMessageInto( * motivation and scope. * * Falls back to {@link toBinary} when the schema uses features not yet - * supported by the fast path (maps, oneofs, extensions, or delimited - * encoding). Unknown fields on messages are always dropped by the fast - * path — if you need to round-trip unknowns, use `toBinary` instead. + * supported by the fast path (extensions or delimited/group encoding). + * Unknown fields on messages are always dropped by the fast path — if + * you need to round-trip unknowns, use `toBinary` instead. * * @experimental This API is experimental and may change or be removed * without notice. The intent is to explore whether a two-pass encode