diff --git a/packages/protobuf-test/src/byte-identity.test.ts b/packages/protobuf-test/src/byte-identity.test.ts new file mode 100644 index 000000000..08d52a8ec --- /dev/null +++ b/packages/protobuf-test/src/byte-identity.test.ts @@ -0,0 +1,343 @@ +// Copyright 2021-2026 Buf Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * Byte-identity tests — strict assertions that encoders produce wire-format + * bytes matching a pre-computed canonical reference. + * + * Why this matters: + * Semantic round-trip checks (see round-trip-property.test.ts) catch most + * regressions, but they cannot detect encoder divergence in areas where + * the proto spec allows flexibility (e.g. proto3 default omission, + * canonical varint encoding, packed-vs-unpacked repeated fields). This + * file pins down a small set of canonical cases where any deviation from + * the reference bytes is almost certainly a bug. + * + * The reference bytes are captured from the stable `toBinary` output on + * main. When a new encoder is added to ENCODERS, it must match those + * bytes exactly. + * + * If a legitimate wire-format change lands (rare, and would require an + * ADR), update the reference constants below. + */ + +import { suite, test } from "node:test"; +import * as assert from "node:assert"; +import { + create, + toBinary, + fromBinary, + protoInt64, + type DescMessage, + type MessageShape, +} from "@bufbuild/protobuf"; +import { + RepeatedScalarValuesMessageSchema, + ScalarValuesMessageSchema, +} from "./gen/ts/extra/msg-scalar_pb.js"; +import { OneofMessageSchema } from "./gen/ts/extra/msg-oneof_pb.js"; +import { MapsMessageSchema } from "./gen/ts/extra/msg-maps_pb.js"; + +interface EncoderEntry { + readonly name: string; + readonly encode: ( + schema: Desc, + message: MessageShape, + ) => Uint8Array; +} + +const ENCODERS: readonly EncoderEntry[] = [ + { name: "toBinary", encode: (schema, message) => toBinary(schema, message) }, +]; + +/** + * Convert Uint8Array to hex string for readable diff output in failure + * messages. We do not compare hex directly because we want each byte to + * be individually assertable. + */ +function toHex(bytes: Uint8Array): string { + return Array.from(bytes) + .map((b) => b.toString(16).padStart(2, "0")) + .join(" "); +} + +/** + * Assert that every encoder produces the expected byte sequence. + * Fails fast with a hex dump on mismatch. + */ +function assertBytes( + schema: Desc, + message: MessageShape, + expected: number[], + context: string, +): void { + for (const enc of ENCODERS) { + const actual = enc.encode(schema, message); + assert.deepStrictEqual( + Array.from(actual), + expected, + `${context} / ${enc.name}: expected [${toHex(new Uint8Array(expected))}] got [${toHex(actual)}]`, + ); + // Round-trip must also succeed. + const decoded = fromBinary(schema, actual); + assert.deepStrictEqual( + decoded, + message, + `${context} / ${enc.name} round-trip`, + ); + } +} + +void suite("byte-identity: canonical wire format assertions", () => { + void suite("proto3 default value omission", () => { + void test("empty message → 0 bytes", () => { + const msg = create(ScalarValuesMessageSchema); + assertBytes(ScalarValuesMessageSchema, msg, [], "empty scalar message"); + }); + + void test("explicit zero scalars are omitted", () => { + const msg = create(ScalarValuesMessageSchema, { + doubleField: 0, + floatField: 0, + int32Field: 0, + uint32Field: 0, + int64Field: protoInt64.parse(0), + boolField: false, + stringField: "", + bytesField: new Uint8Array(), + }); + // All defaults → no bytes emitted. + assertBytes(ScalarValuesMessageSchema, msg, [], "zero scalars"); + }); + + void test("non-default scalar is emitted with correct tag", () => { + // int32 field = 5, field number 5 → tag = (5 << 3) | 0 = 0x28, value = 0x05 + const msg = create(ScalarValuesMessageSchema, { int32Field: 5 }); + assertBytes( + ScalarValuesMessageSchema, + msg, + [0x28, 0x05], + "single int32=5", + ); + }); + }); + + void suite("packed repeated scalar encoding (proto3 default)", () => { + void test("repeated int32 uses packed encoding", () => { + // int32 field = 5, packed → tag = (5 << 3) | 2 (LEN) = 0x2a + // payload: varint 1, varint 2, varint 3 → length 3 + const msg = create(RepeatedScalarValuesMessageSchema, { + int32Field: [1, 2, 3], + }); + assertBytes( + RepeatedScalarValuesMessageSchema, + msg, + [0x2a, 0x03, 0x01, 0x02, 0x03], + "packed int32 [1,2,3]", + ); + }); + + void test("empty repeated is omitted entirely", () => { + const msg = create(RepeatedScalarValuesMessageSchema, { int32Field: [] }); + assertBytes( + RepeatedScalarValuesMessageSchema, + msg, + [], + "empty repeated int32", + ); + }); + + void test("repeated bool packed", () => { + // bool field = 8, tag = (8 << 3) | 2 = 0x42, payload 3 bytes [1,0,1] + const msg = create(RepeatedScalarValuesMessageSchema, { + boolField: [true, false, true], + }); + assertBytes( + RepeatedScalarValuesMessageSchema, + msg, + [0x42, 0x03, 0x01, 0x00, 0x01], + "packed bool", + ); + }); + + void test("repeated string is NOT packed (LEN wire type per element)", () => { + // string field = 9, each element gets its own tag. + // tag = (9 << 3) | 2 = 0x4a + // "a" → [0x4a, 0x01, 0x61], "b" → [0x4a, 0x01, 0x62] + const msg = create(RepeatedScalarValuesMessageSchema, { + stringField: ["a", "b"], + }); + assertBytes( + RepeatedScalarValuesMessageSchema, + msg, + [0x4a, 0x01, 0x61, 0x4a, 0x01, 0x62], + "repeated string a,b", + ); + }); + }); + + void suite("oneof encoding", () => { + void test("empty oneof produces no bytes", () => { + const msg = create(OneofMessageSchema); + assertBytes(OneofMessageSchema, msg, [], "empty oneof"); + }); + + void test("oneof scalar=value with zero still emits", () => { + // When a oneof case is set, the field IS emitted even if value is default. + // scalar.value field = 1, tag = (1 << 3) | 0 = 0x08, value = 0 + const msg = create(OneofMessageSchema, { + scalar: { case: "value", value: 0 }, + }); + assertBytes( + OneofMessageSchema, + msg, + [0x08, 0x00], + "oneof value=0 (explicit)", + ); + }); + + void test("oneof string empty still emits LEN=0", () => { + // scalar.error field = 2, tag = (2 << 3) | 2 = 0x12, len=0 + const msg = create(OneofMessageSchema, { + scalar: { case: "error", value: "" }, + }); + assertBytes( + OneofMessageSchema, + msg, + [0x12, 0x00], + "oneof empty string (explicit)", + ); + }); + }); + + void suite("UTF-8 encoding", () => { + void test("ASCII string", () => { + // string field = 9, tag = (9 << 3) | 2 = 0x4a + const msg = create(ScalarValuesMessageSchema, { stringField: "abc" }); + assertBytes( + ScalarValuesMessageSchema, + msg, + [0x4a, 0x03, 0x61, 0x62, 0x63], + "ASCII abc", + ); + }); + + void test("multi-byte: Cyrillic (2 bytes each)", () => { + // "мир" = 3 codepoints, 6 UTF-8 bytes + const msg = create(ScalarValuesMessageSchema, { stringField: "мир" }); + assertBytes( + ScalarValuesMessageSchema, + msg, + [0x4a, 0x06, 0xd0, 0xbc, 0xd0, 0xb8, 0xd1, 0x80], + "Cyrillic мир", + ); + }); + + void test("4-byte codepoint: emoji", () => { + // "🎉" = U+1F389, UTF-8 = F0 9F 8E 89 (4 bytes) + const msg = create(ScalarValuesMessageSchema, { stringField: "🎉" }); + assertBytes( + ScalarValuesMessageSchema, + msg, + [0x4a, 0x04, 0xf0, 0x9f, 0x8e, 0x89], + "emoji 🎉", + ); + }); + }); + + void suite("varint encoding boundaries", () => { + void test("single-byte varint (value < 128)", () => { + // int32 field=5, value=127 → tag=0x28, varint=0x7f + const msg = create(ScalarValuesMessageSchema, { int32Field: 127 }); + assertBytes( + ScalarValuesMessageSchema, + msg, + [0x28, 0x7f], + "int32=127 (1-byte varint)", + ); + }); + + void test("two-byte varint boundary (value = 128)", () => { + // varint(128) = [0x80, 0x01] + const msg = create(ScalarValuesMessageSchema, { int32Field: 128 }); + assertBytes( + ScalarValuesMessageSchema, + msg, + [0x28, 0x80, 0x01], + "int32=128 (2-byte varint)", + ); + }); + + void test("sint32 zigzag encoding (negative)", () => { + // sint32 field=16, tag=(16<<3)|0=0x80,0x01 (2-byte tag) + // zigzag(-1) = 1, varint(1) = 0x01 + const msg = create(ScalarValuesMessageSchema, { sint32Field: -1 }); + assertBytes( + ScalarValuesMessageSchema, + msg, + [0x80, 0x01, 0x01], + "sint32=-1 zigzag", + ); + }); + }); + + void suite("map field encoding (regression: ordering across runs)", () => { + void test("single-entry map is deterministic", () => { + // A single-entry map has no ordering ambiguity. + // MapsMessage.str_str_field = 1, each entry is a LEN-delimited KV sub-message. + const msg = create(MapsMessageSchema, { + strStrField: { k: "v" }, + }); + // Same encoder must produce same bytes every call. + const enc = ENCODERS[0]; + assert.ok(enc, "at least one encoder registered"); + const bytes1 = enc.encode(MapsMessageSchema, msg); + const bytes2 = enc.encode(MapsMessageSchema, msg); + assert.deepStrictEqual( + Array.from(bytes1), + Array.from(bytes2), + "encoder must be deterministic for single-entry map", + ); + // And round-trip must succeed. + const decoded = fromBinary(MapsMessageSchema, bytes1); + assert.deepStrictEqual(decoded.strStrField, { k: "v" }); + }); + }); + + void suite("encoder determinism (same input → same bytes)", () => { + void test("repeated encoding produces identical bytes", () => { + const msg = create(ScalarValuesMessageSchema, { + doubleField: 3.14, + int32Field: 42, + stringField: "hello", + bytesField: new Uint8Array([1, 2, 3]), + }); + for (const enc of ENCODERS) { + const b1 = enc.encode(ScalarValuesMessageSchema, msg); + const b2 = enc.encode(ScalarValuesMessageSchema, msg); + const b3 = enc.encode(ScalarValuesMessageSchema, msg); + assert.deepStrictEqual( + Array.from(b1), + Array.from(b2), + `${enc.name}: run 1 vs 2`, + ); + assert.deepStrictEqual( + Array.from(b2), + Array.from(b3), + `${enc.name}: run 2 vs 3`, + ); + } + }); + }); +}); diff --git a/packages/protobuf-test/src/correctness-matrix.test.ts b/packages/protobuf-test/src/correctness-matrix.test.ts new file mode 100644 index 000000000..22181502a --- /dev/null +++ b/packages/protobuf-test/src/correctness-matrix.test.ts @@ -0,0 +1,292 @@ +// Copyright 2021-2026 Buf Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * Correctness matrix test — verifies that every supported encoder produces + * wire-format-compatible output for a representative set of message fixtures. + * + * Why this exists: + * The Phase 1+ encode-path optimizations (contiguous-buffer writer, + * schema plan codegen, specialized writers) introduce new encoder entry + * points that must remain semantically equivalent to the reference + * `toBinary` implementation. Ad-hoc checks in benchmark scripts are not + * sufficient — any future encoder variant must be covered by a CI-run + * matrix so regressions are caught immediately. + * + * Matrix shape: + * for each fixture F: + * for each (encoderA, encoderB) in encoders × encoders: + * assert encoderA(F).length === encoderB(F).length + * assert fromBinary(encoderA(F)) deep-equals fromBinary(encoderB(F)) + * assert byte-identical if fixture marked canonical + * assert re-encode(decode(encoderA(F))) is stable + * + * Encoder registry: + * Add new encoders to the ENCODERS array below as they land on main. + * Currently only `toBinary` is available on main; toBinaryFast and + * schema-plan-specialized encoders will be added once those branches + * merge. + */ + +import { suite, test } from "node:test"; +import * as assert from "node:assert"; +import { + create, + toBinary, + fromBinary, + protoInt64, + type DescMessage, + type MessageShape, +} from "@bufbuild/protobuf"; +import { + RepeatedScalarValuesMessageSchema, + ScalarValuesMessageSchema, +} from "./gen/ts/extra/msg-scalar_pb.js"; +import { MapsMessageSchema } from "./gen/ts/extra/msg-maps_pb.js"; +import { MessageFieldMessageSchema } from "./gen/ts/extra/msg-message_pb.js"; +import { OneofMessageSchema } from "./gen/ts/extra/msg-oneof_pb.js"; +import { UserSchema } from "./gen/ts/extra/example_pb.js"; +import { StructSchema, ValueSchema } from "@bufbuild/protobuf/wkt"; + +/** + * An encoder entry under test. `name` is used in test titles; `encode` + * receives the schema + message and must return the binary wire-format. + */ +interface EncoderEntry { + readonly name: string; + readonly encode: ( + schema: Desc, + message: MessageShape, + ) => Uint8Array; +} + +/** + * A fixture to exercise. `build` returns a fully-populated message used in + * every combination test. `canonical` marks fixtures whose wire format is + * expected to be byte-identical across all encoders (e.g. no maps with + * non-deterministic key ordering). Map-containing fixtures are marked + * `canonical: false` because proto3 does not guarantee map key order. + */ +interface Fixture { + readonly name: string; + readonly schema: Desc; + readonly build: () => MessageShape; + readonly canonical: boolean; +} + +const ENCODERS: readonly EncoderEntry[] = [ + { name: "toBinary", encode: (schema, message) => toBinary(schema, message) }, + // Future additions (guarded until they land on main): + // { name: "toBinaryFast", encode: (schema, message) => toBinaryFast(schema, message) }, + // { name: "toBinarySchemaPlan", encode: ... } +]; + +// Fixtures — small but representative. Each covers one proto feature category. +const fixtures: Fixture[] = [ + { + name: "SimpleMessage / scalars", + schema: ScalarValuesMessageSchema as DescMessage, + canonical: true, + build: () => + create(ScalarValuesMessageSchema, { + doubleField: 0.75, + floatField: -0.75, + int64Field: protoInt64.parse(-1), + uint64Field: protoInt64.uParse(1), + int32Field: -123, + fixed64Field: protoInt64.uParse(1), + fixed32Field: 123, + boolField: true, + stringField: "hello world", + bytesField: new Uint8Array([ + 104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100, + ]), + uint32Field: 123, + sfixed32Field: -123, + sfixed64Field: protoInt64.parse(-1), + sint32Field: -1, + sint64Field: protoInt64.parse(-1), + }) as unknown as MessageShape, + }, + { + name: "RepeatedPacked / scalars", + schema: RepeatedScalarValuesMessageSchema as DescMessage, + canonical: true, + build: () => + create(RepeatedScalarValuesMessageSchema, { + doubleField: [0.75, 0, 1], + floatField: [0.75, -0.75], + int64Field: [protoInt64.parse(-1), protoInt64.parse(-2)], + uint64Field: [protoInt64.uParse(1), protoInt64.uParse(2)], + int32Field: [-123, 500], + boolField: [true, false, true], + stringField: ["hello", "world"], + uint32Field: [123, 123], + sint32Field: [-1, -2, 999], + }) as unknown as MessageShape, + }, + { + name: "Nested / message spans", + schema: MessageFieldMessageSchema as DescMessage, + canonical: true, + build: () => + create(MessageFieldMessageSchema, { + messageField: { name: "outer" }, + repeatedMessageField: [{ name: "a" }, { name: "b" }, { name: "c" }], + }) as unknown as MessageShape, + }, + { + name: "Nested / User with manager chain", + schema: UserSchema as DescMessage, + canonical: true, + build: () => + create(UserSchema, { + firstName: "Alice", + active: true, + manager: { + firstName: "Bob", + active: true, + manager: { firstName: "Carol", active: false }, + }, + locations: ["berlin", "remote"], + // No projects map — keeps this fixture canonical. + }) as unknown as MessageShape, + }, + { + name: "Map-containing", + schema: MapsMessageSchema as DescMessage, + canonical: false, // proto3 map key order not deterministic across encoders + build: () => + create(MapsMessageSchema, { + strStrField: { a: "str", b: "xx", c: "more" }, + strInt32Field: { a: 123, b: 455 }, + strBoolField: { a: true, b: false }, + int32StrField: { 1: "one", 2: "two" }, + }) as unknown as MessageShape, + }, + { + name: "Oneof / scalar variant", + schema: OneofMessageSchema as DescMessage, + canonical: true, + build: () => + create(OneofMessageSchema, { + scalar: { case: "value", value: 42 }, + }) as unknown as MessageShape, + }, + { + name: "Oneof / message variant", + schema: OneofMessageSchema as DescMessage, + canonical: true, + build: () => + create(OneofMessageSchema, { + message: { + case: "foo", + value: { name: "alice", toggle: true }, + }, + }) as unknown as MessageShape, + }, + { + name: "Oneof / empty (default unset)", + schema: OneofMessageSchema as DescMessage, + canonical: true, + build: () => + create(OneofMessageSchema) as unknown as MessageShape, + }, + { + name: "AnyValue / oneof variant", + schema: ValueSchema as DescMessage, + canonical: true, + build: () => + create(ValueSchema, { + kind: { case: "stringValue", value: "hello" }, + }) as unknown as MessageShape, + }, + { + name: "Struct / WKT with nested values", + schema: StructSchema as DescMessage, + canonical: false, // Struct is a map + build: () => + create(StructSchema, { + fields: { + name: { kind: { case: "stringValue", value: "Alice" } }, + age: { kind: { case: "numberValue", value: 30 } }, + active: { kind: { case: "boolValue", value: true } }, + }, + }) as unknown as MessageShape, + }, +]; + +void suite("correctness matrix: encoders × fixtures", () => { + for (const fixture of fixtures) { + void suite(fixture.name, () => { + for (const encA of ENCODERS) { + for (const encB of ENCODERS) { + void test(`${encA.name} vs ${encB.name}`, () => { + const message = fixture.build(); + const bytesA = encA.encode(fixture.schema, message); + const bytesB = encB.encode(fixture.schema, message); + + // 1. Length equality — must always hold across encoders. + assert.strictEqual( + bytesA.length, + bytesB.length, + `${encA.name} produced ${bytesA.length} bytes but ${encB.name} produced ${bytesB.length}`, + ); + + // 2. Semantic equality via round-trip decode. + const roundA = fromBinary(fixture.schema, bytesA); + const roundB = fromBinary(fixture.schema, bytesB); + assert.deepStrictEqual( + roundA, + roundB, + `${encA.name} and ${encB.name} decode to different messages`, + ); + + // 3. Byte identity for canonical fixtures (no map ordering). + if (fixture.canonical && encA.name === encB.name) { + assert.deepStrictEqual( + Array.from(bytesA), + Array.from(bytesB), + `${encA.name} is not deterministic on canonical fixture`, + ); + } + if (fixture.canonical && encA.name !== encB.name) { + assert.deepStrictEqual( + Array.from(bytesA), + Array.from(bytesB), + `${encA.name} and ${encB.name} produce different bytes on canonical fixture`, + ); + } + }); + + void test(`${encA.name} → decode → ${encB.name} is stable`, () => { + const message = fixture.build(); + const bytes1 = encA.encode(fixture.schema, message); + const decoded = fromBinary(fixture.schema, bytes1); + const bytes2 = encB.encode(fixture.schema, decoded); + + // Decoded-then-reencoded must round-trip to the same semantic message. + const decoded2 = fromBinary(fixture.schema, bytes2); + assert.deepStrictEqual(decoded, decoded2); + + // And for canonical fixtures, the bytes must match as well. + if (fixture.canonical) { + assert.strictEqual(bytes1.length, bytes2.length); + } + }); + } + } + }); + } +}); diff --git a/packages/protobuf-test/src/round-trip-property.test.ts b/packages/protobuf-test/src/round-trip-property.test.ts new file mode 100644 index 000000000..7750153c5 --- /dev/null +++ b/packages/protobuf-test/src/round-trip-property.test.ts @@ -0,0 +1,447 @@ +// Copyright 2021-2026 Buf Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * Property-based round-trip tests for encode/decode invariants. + * + * Core property under test: + * for every valid message M and every registered encoder E: + * fromBinary(schema, E(schema, M)) semantically equals M + * + * Additional properties: + * - idempotence: encode(decode(encode(M))) === encode(M) in length + * - encoder equivalence: every encoder produces the same semantic result + * - regression: specific prior bugs are covered (map ordering, empty + * oneof, bytes with control chars, unicode strings) + * + * Implementation notes: + * - No external property-based library (fast-check not available in + * this workspace). We use a deterministic PRNG with a fixed seed so + * the suite remains reproducible across CI runs. + * - Payloads are generated within schema constraints — we never attempt + * to construct invalid messages (those are covered by other suites). + */ + +import { suite, test } from "node:test"; +import * as assert from "node:assert"; +import { + create, + toBinary, + fromBinary, + protoInt64, + type DescMessage, + type MessageShape, +} from "@bufbuild/protobuf"; +import { + RepeatedScalarValuesMessageSchema, + ScalarValuesMessageSchema, +} from "./gen/ts/extra/msg-scalar_pb.js"; +import { MessageFieldMessageSchema } from "./gen/ts/extra/msg-message_pb.js"; +import { OneofMessageSchema } from "./gen/ts/extra/msg-oneof_pb.js"; +import { MapsMessageSchema } from "./gen/ts/extra/msg-maps_pb.js"; +import { UserSchema } from "./gen/ts/extra/example_pb.js"; + +/** + * Encoder registry — mirrors correctness-matrix.test.ts. Kept local so + * each test file is self-contained and can be skipped independently. + */ +interface EncoderEntry { + readonly name: string; + readonly encode: ( + schema: Desc, + message: MessageShape, + ) => Uint8Array; +} + +const ENCODERS: readonly EncoderEntry[] = [ + { name: "toBinary", encode: (schema, message) => toBinary(schema, message) }, +]; + +/** + * Deterministic PRNG (mulberry32). Seeded so property test output is + * identical across runs — debugging a failure reproduces the same inputs. + */ +function makeRng(seed: number): () => number { + let t = seed >>> 0; + return () => { + t = (t + 0x6d2b79f5) | 0; + let r = Math.imul(t ^ (t >>> 15), 1 | t); + r = (r + Math.imul(r ^ (r >>> 7), 61 | r)) ^ r; + return ((r ^ (r >>> 14)) >>> 0) / 4294967296; + }; +} + +function randInt(rng: () => number, min: number, max: number): number { + return Math.floor(rng() * (max - min + 1)) + min; +} + +function randString(rng: () => number, maxLen = 16): string { + const len = randInt(rng, 0, maxLen); + let out = ""; + for (let i = 0; i < len; i++) { + // Mix ASCII + a few multi-byte codepoints to exercise UTF-8. + const pick = rng(); + if (pick < 0.8) { + out += String.fromCharCode(randInt(rng, 32, 126)); + } else if (pick < 0.95) { + out += String.fromCharCode(randInt(rng, 0x00a0, 0x04ff)); // Latin-ext/Cyrillic + } else { + out += String.fromCodePoint(randInt(rng, 0x1f600, 0x1f64f)); // Emoji + } + } + return out; +} + +function randBytes(rng: () => number, maxLen = 16): Uint8Array { + const len = randInt(rng, 0, maxLen); + const arr = new Uint8Array(len); + for (let i = 0; i < len; i++) arr[i] = randInt(rng, 0, 255); + return arr; +} + +/** + * Build a randomized ScalarValuesMessage. Every scalar kind is exercised, + * including 64-bit variants which must go through protoInt64. + */ +function randomScalarMessage(rng: () => number) { + return create(ScalarValuesMessageSchema, { + doubleField: (rng() - 0.5) * 1e6, + floatField: Math.fround((rng() - 0.5) * 1e3), + int64Field: protoInt64.parse(randInt(rng, -1_000_000, 1_000_000)), + uint64Field: protoInt64.uParse(randInt(rng, 0, 1_000_000)), + int32Field: randInt(rng, -(1 << 30), 1 << 30), + fixed64Field: protoInt64.uParse(randInt(rng, 0, 1_000_000)), + fixed32Field: randInt(rng, 0, 1 << 30), + boolField: rng() > 0.5, + stringField: randString(rng), + bytesField: randBytes(rng), + uint32Field: randInt(rng, 0, 1 << 30), + sfixed32Field: randInt(rng, -(1 << 30), 1 << 30), + sfixed64Field: protoInt64.parse(randInt(rng, -1_000_000, 1_000_000)), + sint32Field: randInt(rng, -(1 << 30), 1 << 30), + sint64Field: protoInt64.parse(randInt(rng, -1_000_000, 1_000_000)), + }); +} + +/** + * Nested User chain with random depth. Exercises recursive encoding and + * LEN-delimited sub-message size calculation. + */ +function randomUser( + rng: () => number, + depth = 0, +): MessageShape { + return create(UserSchema, { + firstName: randString(rng), + lastName: randString(rng), + active: rng() > 0.5, + locations: Array.from({ length: randInt(rng, 0, 3) }, () => + randString(rng), + ), + manager: depth < 3 && rng() > 0.4 ? randomUser(rng, depth + 1) : undefined, + // No projects map — keeps the fixture encoder-deterministic. + }); +} + +/** + * Assertion helper: every encoder produces a byte-array that round-trips + * to the same semantic message, and all encoders agree on length. + */ +function assertRoundTrip( + schema: Desc, + message: MessageShape, + context: string, +): void { + const results = ENCODERS.map((enc) => ({ + name: enc.name, + bytes: enc.encode(schema, message), + })); + // Round-trip each encoding. + for (const { name, bytes } of results) { + const decoded = fromBinary(schema, bytes); + assert.deepStrictEqual( + decoded, + message, + `${context}: ${name} round-trip mismatch`, + ); + } + // Cross-encoder agreement on byte length. + const [first, ...rest] = results; + if (first === undefined) return; + for (const other of rest) { + assert.strictEqual( + first.bytes.length, + other.bytes.length, + `${context}: ${first.name} produced ${first.bytes.length} bytes but ${other.name} produced ${other.bytes.length}`, + ); + } +} + +void suite("round-trip property tests", () => { + void suite("ScalarValuesMessage (100 random cases)", () => { + void test("decode(encode(M)) === M for all encoders", () => { + const rng = makeRng(0xabcdef); + for (let i = 0; i < 100; i++) { + const msg = randomScalarMessage(rng); + assertRoundTrip(ScalarValuesMessageSchema, msg, `case #${i}`); + } + }); + }); + + void suite("RepeatedScalarValuesMessage (50 random cases)", () => { + void test("packed-repeated round-trip", () => { + const rng = makeRng(0x12345); + for (let i = 0; i < 50; i++) { + const msg = create(RepeatedScalarValuesMessageSchema, { + int32Field: Array.from({ length: randInt(rng, 0, 20) }, () => + randInt(rng, -(1 << 20), 1 << 20), + ), + int64Field: Array.from({ length: randInt(rng, 0, 20) }, () => + protoInt64.parse(randInt(rng, -1000, 1000)), + ), + stringField: Array.from({ length: randInt(rng, 0, 10) }, () => + randString(rng), + ), + boolField: Array.from( + { length: randInt(rng, 0, 30) }, + () => rng() > 0.5, + ), + doubleField: Array.from( + { length: randInt(rng, 0, 10) }, + () => (rng() - 0.5) * 1000, + ), + sint32Field: Array.from({ length: randInt(rng, 0, 10) }, () => + randInt(rng, -(1 << 20), 1 << 20), + ), + }); + assertRoundTrip(RepeatedScalarValuesMessageSchema, msg, `packed #${i}`); + } + }); + }); + + void suite("Nested messages (50 random cases)", () => { + void test("recursive User manager chain", () => { + const rng = makeRng(0xdeadbeef); + for (let i = 0; i < 50; i++) { + const msg = randomUser(rng); + assertRoundTrip(UserSchema, msg, `user #${i}`); + } + }); + + void test("MessageFieldMessage with repeated sub-messages", () => { + const rng = makeRng(0xfeedface); + for (let i = 0; i < 30; i++) { + const msg = create(MessageFieldMessageSchema, { + messageField: { name: randString(rng) }, + repeatedMessageField: Array.from( + { length: randInt(rng, 0, 8) }, + () => ({ name: randString(rng) }), + ), + }); + assertRoundTrip(MessageFieldMessageSchema, msg, `nested #${i}`); + } + }); + }); + + void suite("Oneof variants (covers every case)", () => { + void test("scalar variants", () => { + const rng = makeRng(0xcafebabe); + for (let i = 0; i < 30; i++) { + // Cycle through each scalar oneof case to ensure full coverage. + const pick = i % 3; + const msg = + pick === 0 + ? create(OneofMessageSchema, { + scalar: { case: "value", value: randInt(rng, -1000, 1000) }, + }) + : pick === 1 + ? create(OneofMessageSchema, { + scalar: { case: "error", value: randString(rng) }, + }) + : create(OneofMessageSchema, { + scalar: { case: "bytes", value: randBytes(rng) }, + }); + assertRoundTrip(OneofMessageSchema, msg, `oneof-scalar #${i}`); + } + }); + + void test("empty oneof (regression: zero-value oneof must stay unset)", () => { + const msg = create(OneofMessageSchema); + assertRoundTrip(OneofMessageSchema, msg, "empty oneof"); + // After round-trip, oneof must still be undefined — not a zero-value encoding. + const bytes = toBinary(OneofMessageSchema, msg); + assert.strictEqual( + bytes.length, + 0, + "empty oneof must produce zero bytes", + ); + const decoded = fromBinary(OneofMessageSchema, bytes); + assert.strictEqual(decoded.scalar.case, undefined); + assert.strictEqual(decoded.message.case, undefined); + assert.strictEqual(decoded.enum.case, undefined); + }); + + void test("message variants (foo/bar/baz)", () => { + const rng = makeRng(0xbaadf00d); + const cases = ["foo", "bar", "baz"] as const; + for (let i = 0; i < 30; i++) { + const caseKind = cases[i % 3]; + const msg = + caseKind === "foo" + ? create(OneofMessageSchema, { + message: { + case: "foo", + value: { name: randString(rng), toggle: rng() > 0.5 }, + }, + }) + : caseKind === "bar" + ? create(OneofMessageSchema, { + message: { + case: "bar", + value: { + a: randInt(rng, -100, 100), + b: randInt(rng, 0, 100), + }, + }, + }) + : create(OneofMessageSchema, { + message: { + case: "baz", + value: { + a: randInt(rng, -100, 100), + b: randInt(rng, 0, 100), + }, + }, + }); + assertRoundTrip(OneofMessageSchema, msg, `oneof-msg #${i}`); + } + }); + }); + + void suite("Map fields (regression: key ordering)", () => { + void test("map encode-decode preserves all entries", () => { + const rng = makeRng(0x1337); + for (let i = 0; i < 20; i++) { + const entryCount = randInt(rng, 0, 8); + const strStr: Record = {}; + for (let j = 0; j < entryCount; j++) { + strStr[`k${j}_${randString(rng, 4)}`] = randString(rng); + } + const msg = create(MapsMessageSchema, { + strStrField: strStr, + }); + // Note: bytes may differ per encoder due to map key order — + // we only assert semantic equality here (assertRoundTrip decodes). + const bytes = toBinary(MapsMessageSchema, msg); + const decoded = fromBinary(MapsMessageSchema, bytes); + assert.deepStrictEqual( + decoded.strStrField, + msg.strStrField, + `map round-trip #${i}`, + ); + } + }); + }); + + void suite("Edge cases", () => { + void test("empty message — zero bytes", () => { + const msg = create(ScalarValuesMessageSchema); + // Proto3: default scalars must be omitted, empty message → 0 bytes. + const bytes = toBinary(ScalarValuesMessageSchema, msg); + assert.strictEqual(bytes.length, 0); + const decoded = fromBinary(ScalarValuesMessageSchema, bytes); + assert.deepStrictEqual(decoded, msg); + }); + + void test("max int32 / min int32 boundaries", () => { + for (const val of [ + 2147483647, + -2147483648, + 2147483647 - 1, + -2147483648 + 1, + 0, + 1, + -1, + ]) { + const msg = create(ScalarValuesMessageSchema, { + int32Field: val, + sint32Field: val, + sfixed32Field: val, + }); + assertRoundTrip(ScalarValuesMessageSchema, msg, `int32=${val}`); + } + }); + + void test("max int64 / min int64 boundaries", () => { + const values = [ + protoInt64.parse("9223372036854775807"), + protoInt64.parse("-9223372036854775808"), + protoInt64.parse(0), + protoInt64.parse(1), + protoInt64.parse(-1), + ]; + for (const val of values) { + const msg = create(ScalarValuesMessageSchema, { + int64Field: val, + sint64Field: val, + sfixed64Field: val, + }); + assertRoundTrip(ScalarValuesMessageSchema, msg, `int64=${val}`); + } + }); + + void test("very large strings (10 KiB)", () => { + const msg = create(ScalarValuesMessageSchema, { + stringField: "x".repeat(10 * 1024), + }); + assertRoundTrip(ScalarValuesMessageSchema, msg, "10KiB string"); + }); + + void test("very large bytes (10 KiB)", () => { + const msg = create(ScalarValuesMessageSchema, { + bytesField: new Uint8Array(10 * 1024).fill(0xab), + }); + assertRoundTrip(ScalarValuesMessageSchema, msg, "10KiB bytes"); + }); + + void test("bytes with control characters (regression)", () => { + const msg = create(ScalarValuesMessageSchema, { + bytesField: new Uint8Array([0, 1, 2, 3, 8, 9, 10, 13, 27, 127, 255]), + }); + assertRoundTrip(ScalarValuesMessageSchema, msg, "control-byte bytes"); + }); + + void test("UTF-8 multi-byte sequences (regression)", () => { + const msg = create(ScalarValuesMessageSchema, { + // Mix ASCII, 2-byte, 3-byte, 4-byte (surrogate pair) codepoints. + stringField: "hello мир 你好 🎉 résumé naïve", + }); + assertRoundTrip(ScalarValuesMessageSchema, msg, "utf8"); + }); + + void test("deeply nested message (depth=20)", () => { + // Build User { manager: User { manager: ... } } 20 levels deep. + let user: MessageShape = create(UserSchema, { + firstName: "leaf", + }); + for (let i = 0; i < 20; i++) { + user = create(UserSchema, { + firstName: `level-${i}`, + manager: user, + }); + } + assertRoundTrip(UserSchema, user, "depth=20"); + }); + }); +});