Skip to content

Commit 5877708

Browse files
committed
More tests
1 parent 9b323da commit 5877708

File tree

2 files changed

+56
-3
lines changed

2 files changed

+56
-3
lines changed

test/bson_corpus/binary.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,36 @@
7474
"description": "$type query operator (conflicts with legacy $binary form with $type field)",
7575
"canonical_bson": "180000000378001000000010247479706500020000000000",
7676
"canonical_extjson": "{\"x\" : { \"$type\" : {\"$numberInt\": \"2\"}}}"
77+
},
78+
{
79+
"description": "subtype 0x09 Vector FLOAT32",
80+
"canonical_bson": "170000000578000A0000000927000000FE420000E04000",
81+
"canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"JwAAAP5CAADgQA==\", \"subType\": \"09\"}}}"
82+
},
83+
{
84+
"description": "subtype 0x09 Vector INT8",
85+
"canonical_bson": "11000000057800040000000903007F0700",
86+
"canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"AwB/Bw==\", \"subType\": \"09\"}}}"
87+
},
88+
{
89+
"description": "subtype 0x09 Vector PACKED_BIT",
90+
"canonical_bson": "11000000057800040000000910007F0700",
91+
"canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"EAB/Bw==\", \"subType\": \"09\"}}}"
92+
},
93+
{
94+
"description": "subtype 0x09 Vector (Zero-length) FLOAT32",
95+
"canonical_bson": "0F0000000578000200000009270000",
96+
"canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"JwA=\", \"subType\": \"09\"}}}"
97+
},
98+
{
99+
"description": "subtype 0x09 Vector (Zero-length) INT8",
100+
"canonical_bson": "0F0000000578000200000009030000",
101+
"canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"AwA=\", \"subType\": \"09\"}}}"
102+
},
103+
{
104+
"description": "subtype 0x09 Vector (Zero-length) PACKED_BIT",
105+
"canonical_bson": "0F0000000578000200000009100000",
106+
"canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"EAA=\", \"subType\": \"09\"}}}"
77107
}
78108
],
79109
"decodeErrors": [

test/test_bson.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -729,14 +729,17 @@ def test_uuid_legacy(self):
729729
self.assertEqual(id, transformed)
730730

731731
def test_vector(self):
732+
"""Tests of subtype 9"""
733+
# We start with valid cases, across the 3 dtypes implemented.
734+
# Work with a simple vector that can be interpreted as int8, float32, or ubyte
732735
list_vector = [127, 7]
733736
# As INT8, vector has length 2
734737
binary_vector = Binary.from_vector(list_vector, BinaryVectorDtype.INT8)
735738
vector = binary_vector.as_vector()
736739
assert vector.data == list_vector
737740
# test encoding roundtrip
738741
assert {"vector": binary_vector} == decode(encode({"vector": binary_vector}))
739-
# test json roundtrip # TODO - Is this the wrong place?
742+
# test json roundtrip
740743
assert binary_vector == json_util.loads(json_util.dumps(binary_vector))
741744

742745
# For vectors of bits, aka PACKED_BIT type, vector has length 8 * 2
@@ -758,13 +761,33 @@ def test_vector(self):
758761
len(padded_vec.as_vector(BinaryVectorDtype.INT8).data) == 8 * len(list_vector) - padding
759762
)
760763

764+
# It is worthwhile explicitly showing the values encoded to BSON
765+
padded_doc = {"padded_vec": padded_vec}
766+
assert (
767+
encode(padded_doc)
768+
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x07\x00"
769+
)
770+
# and dumped to json
771+
assert (
772+
json_util.dumps(padded_doc)
773+
== '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}'
774+
)
775+
761776
# FLOAT32 is also implemented
762777
float_binary = Binary.from_vector(list_vector, BinaryVectorDtype.FLOAT32)
763778
assert all(isinstance(d, float) for d in float_binary.as_vector().data)
764779

765-
# The C extension was segfaulting on unicode RegExs, so we have this test
766-
# that doesn't really test anything but the lack of a segfault.
780+
# Now some invalid cases
781+
for x in [-1, 257]:
782+
try:
783+
Binary.from_vector([x], BinaryVectorDtype.PACKED_BIT)
784+
except struct.error as e:
785+
assert str(e) == "ubyte format requires 0 <= number <= 255"
786+
767787
def test_unicode_regex(self):
788+
"""Tests we do not get a segfault for C extension on unicode RegExs.
789+
This had been happening.
790+
"""
768791
regex = re.compile("revisi\xf3n")
769792
decode(encode({"regex": regex}))
770793

0 commit comments

Comments
 (0)