@@ -729,14 +729,17 @@ def test_uuid_legacy(self):
729729 self .assertEqual (id , transformed )
730730
731731 def test_vector (self ):
732+ """Tests of subtype 9"""
733+ # We start with valid cases, across the 3 dtypes implemented.
734+ # Work with a simple vector that can be interpreted as int8, float32, or ubyte
732735 list_vector = [127 , 7 ]
733736 # As INT8, vector has length 2
734737 binary_vector = Binary .from_vector (list_vector , BinaryVectorDtype .INT8 )
735738 vector = binary_vector .as_vector ()
736739 assert vector .data == list_vector
737740 # test encoding roundtrip
738741 assert {"vector" : binary_vector } == decode (encode ({"vector" : binary_vector }))
739- # test json roundtrip # TODO - Is this the wrong place?
742+ # test json roundtrip
740743 assert binary_vector == json_util .loads (json_util .dumps (binary_vector ))
741744
742745 # For vectors of bits, aka PACKED_BIT type, vector has length 8 * 2
@@ -758,13 +761,33 @@ def test_vector(self):
758761 len (padded_vec .as_vector (BinaryVectorDtype .INT8 ).data ) == 8 * len (list_vector ) - padding
759762 )
760763
764+ # It is worthwhile explicitly showing the values encoded to BSON
765+ padded_doc = {"padded_vec" : padded_vec }
766+ assert (
767+ encode (padded_doc )
768+ == b"\x1a \x00 \x00 \x00 \x05 padded_vec\x00 \x04 \x00 \x00 \x00 \t \x10 \x03 \x7f \x07 \x00 "
769+ )
770+ # and dumped to json
771+ assert (
772+ json_util .dumps (padded_doc )
773+ == '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}'
774+ )
775+
761776 # FLOAT32 is also implemented
762777 float_binary = Binary .from_vector (list_vector , BinaryVectorDtype .FLOAT32 )
763778 assert all (isinstance (d , float ) for d in float_binary .as_vector ().data )
764779
765- # The C extension was segfaulting on unicode RegExs, so we have this test
766- # that doesn't really test anything but the lack of a segfault.
780+ # Now some invalid cases
781+ for x in [- 1 , 257 ]:
782+ try :
783+ Binary .from_vector ([x ], BinaryVectorDtype .PACKED_BIT )
784+ except struct .error as e :
785+ assert str (e ) == "ubyte format requires 0 <= number <= 255"
786+
767787 def test_unicode_regex (self ):
788+ """Tests we do not get a segfault for C extension on unicode RegExs.
789+ This had been happening.
790+ """
768791 regex = re .compile ("revisi\xf3 n" )
769792 decode (encode ({"regex" : regex }))
770793
0 commit comments