diff --git a/source/bson-binary-vector/bson-binary-vector.md b/source/bson-binary-vector/bson-binary-vector.md index edbeb5944b..d46f6681a9 100644 --- a/source/bson-binary-vector/bson-binary-vector.md +++ b/source/bson-binary-vector/bson-binary-vector.md @@ -184,6 +184,8 @@ Drivers MUST validate vector metadata and raise an error if any invariant is vio - Padding MUST be 0 for all dtypes where padding doesn’t apply, and MUST be within \[0, 7\] for PACKED_BIT. - A PACKED_BIT vector MUST NOT be empty if padding is in the range \[1, 7\]. +- When unpacking binary data into a FLOAT32 Vector structure, the length of the binary data following the dtype and + padding MUST be a multiple of 4 bytes. Drivers MUST perform this validation when a numeric vector and padding are provided through the API, and when unpacking binary data (BSON or similar) into a Vector structure. @@ -242,3 +244,9 @@ See the [README](tests/README.md) for tests. you want to store or transmit binary data more efficiently by grouping 8 bits into a single byte (uint8). For an example in Python, see [numpy.unpackbits](https://numpy.org/doc/2.0/reference/generated/numpy.unpackbits.html#numpy.unpackbits). + +## Changelog + +- 2025-02-04: Update validation for decoding into a FLOAT32 vector. + +- 2024-11-01: BSON Binary Subtype 9 accepted DRIVERS-2926 (#1708) diff --git a/source/bson-binary-vector/tests/README.md b/source/bson-binary-vector/tests/README.md index 3357ebb264..1a2cd87199 100644 --- a/source/bson-binary-vector/tests/README.md +++ b/source/bson-binary-vector/tests/README.md @@ -29,7 +29,7 @@ Each JSON file contains three top-level keys. - `description`: string describing the test. - `valid`: boolean indicating if the vector, dtype, and padding should be considered a valid input. -- `vector`: list of numbers +- `vector`: (required if valid is true) list of numbers - `dtype_hex`: string defining the data type in hex (e.g. "0x10", "0x27") - `dtype_alias`: (optional) string defining the data dtype, perhaps as Enum. - `padding`: (optional) integer for byte padding. Defaults to 0. @@ -50,7 +50,10 @@ MUST assert that the input float array is the same after encoding and decoding. #### To prove correct in an invalid case (`valid:false`), one MUST -- raise an exception when attempting to encode a document from the numeric values, dtype, and padding. +- if the vector field is present, raise an exception when attempting to encode a document from the numeric values, + dtype, and padding. +- if the canonical_bson field is present, raise an exception when attempting to deserialize it into the corresponding + numeric values, as the field contains corrupted data. ## FAQ diff --git a/source/bson-binary-vector/tests/float32.json b/source/bson-binary-vector/tests/float32.json index 872c435323..845f504ff3 100644 --- a/source/bson-binary-vector/tests/float32.json +++ b/source/bson-binary-vector/tests/float32.json @@ -44,8 +44,22 @@ "vector": [127.0, 7.0], "dtype_hex": "0x27", "dtype_alias": "FLOAT32", - "padding": 3 + "padding": 3, + "canonical_bson": "1C00000005766563746F72000A0000000927030000FE420000E04000" + }, + { + "description": "Insufficient vector data with 3 bytes FLOAT32", + "valid": false, + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "canonical_bson": "1700000005766563746F7200050000000927002A2A2A00" + }, + { + "description": "Insufficient vector data with 5 bytes FLOAT32", + "valid": false, + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "canonical_bson": "1900000005766563746F7200070000000927002A2A2A2A2A00" } ] } - diff --git a/source/bson-binary-vector/tests/int8.json b/source/bson-binary-vector/tests/int8.json index 7529721e5e..29524fb617 100644 --- a/source/bson-binary-vector/tests/int8.json +++ b/source/bson-binary-vector/tests/int8.json @@ -42,7 +42,8 @@ "vector": [127, 7], "dtype_hex": "0x03", "dtype_alias": "INT8", - "padding": 3 + "padding": 3, + "canonical_bson": "1600000005766563746F7200040000000903037F0700" }, { "description": "INT8 with float inputs", @@ -54,4 +55,3 @@ } ] } - diff --git a/source/bson-binary-vector/tests/packed_bit.json b/source/bson-binary-vector/tests/packed_bit.json index 035776e87f..a220e7e318 100644 --- a/source/bson-binary-vector/tests/packed_bit.json +++ b/source/bson-binary-vector/tests/packed_bit.json @@ -8,7 +8,8 @@ "vector": [], "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", - "padding": 1 + "padding": 1, + "canonical_bson": "1400000005766563746F72000200000009100100" }, { "description": "Simple Vector PACKED_BIT", @@ -61,21 +62,14 @@ "dtype_alias": "PACKED_BIT", "padding": 0 }, - { - "description": "Padding specified with no vector data PACKED_BIT", - "valid": false, - "vector": [], - "dtype_hex": "0x10", - "dtype_alias": "PACKED_BIT", - "padding": 1 - }, { "description": "Exceeding maximum padding PACKED_BIT", "valid": false, "vector": [1], "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", - "padding": 8 + "padding": 8, + "canonical_bson": "1500000005766563746F7200030000000910080100" }, { "description": "Negative padding PACKED_BIT", @@ -84,15 +78,6 @@ "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", "padding": -1 - }, - { - "description": "Vector with float values PACKED_BIT", - "valid": false, - "vector": [127.5], - "dtype_hex": "0x10", - "dtype_alias": "PACKED_BIT", - "padding": 0 } ] } -