Skip to content

Commit

Permalink
Fix octet string parsing in yaml test parser
Browse files Browse the repository at this point in the history
Old parsing had issue since str.encode for utf-8 character would be
improperly convert. For example `\xff` would become `b'\xc3\xbf'` when
we wanted it to be `b'\xff`.

Co-authored-by: Tennessee Carmel-Veilleux <[email protected]>
  • Loading branch information
tehampson and tcarmelveilleux committed Nov 17, 2022
1 parent b87a86b commit f707749
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 7 deletions.
3 changes: 2 additions & 1 deletion src/app/tests/suites/TestCluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,8 @@ tests:
command: "readAttribute"
attribute: "octet_string"
response:
value: "\r\n\xff\"\xa0"
# This is the properly 'hex:...' version of "\r\n\xff\"\xa0"
value: "hex:0d0aff22a0"

- label: "Write attribute OCTET_STRING"
command: "writeAttribute"
Expand Down
40 changes: 34 additions & 6 deletions src/controller/python/chip/yaml/format_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,39 @@
from chip.tlv import uint, float32
import enum
from chip.yaml.errors import ValidationError


_HEX_PREFIX = 'hex:'
from binascii import unhexlify
import re


def convert_yaml_octet_string_to_bytes(s: str) -> bytes:
"""Convert YAML octet string body to bytes, handling any c-style hex escapes (e.g. \x5a) and hex: prefix"""
is_hex_string_re = r"^hex:(?P<hex_content>[A-Fa-f0-9]*)$"

hex_prefixed_match = re.match(is_hex_string_re, s)

# Step 1: handle explicit "hex:" prefix
if hex_prefixed_match:
hex_content = hex_prefixed_match.group("hex_content")
if (len(hex_content) % 2) != 0:
raise ValueError("Hex literal is not even length!")
return unhexlify(hex_content)

# Step 2: convert non-hex-prefixed to bytes
# TODO(#23669): This does not properly support utf8 octet strings. We mimic
# javascript codegen behavior. Behavior or javascript is:
# * Octet string character >= u+0200 errors out.
# * Any character greater than 0xFF has the upper bytes chopped off.
known_javascript_max_char_value = 0x200
accumulated_hex = ""
for char in s:
char_value = ord(char)
if char_value >= known_javascript_max_char_value:
# If you got here see TODO #23669 mentioned above.
raise ValueError("Unsupport char in octet string")
char_value_lsb = char_value & 0xFF
hex_with_leading_0x = '{0:02x}'.format(char_value_lsb)
accumulated_hex += hex_with_leading_0x
return unhexlify(accumulated_hex)


def convert_name_value_pair_to_dict(arg_values):
Expand Down Expand Up @@ -118,9 +148,7 @@ def convert_yaml_type(field_value, field_type, use_from_dict=False):
return field_type(field_value)
# YAML treats bytes as strings. Convert to a byte string.
elif (field_type == bytes and type(field_value) != bytes):
if isinstance(field_value, str) and field_value.startswith(_HEX_PREFIX):
return bytes.fromhex(field_value[len(_HEX_PREFIX):])
return str.encode(field_value)
return convert_yaml_octet_string_to_bytes(field_value)
# By default, just return the field_value casted to field_type.
else:
return field_type(field_value)
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#
# Copyright (c) 2022 Project CHIP Authors
# All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from chip.yaml.format_converter import convert_yaml_octet_string_to_bytes
from binascii import unhexlify
import unittest


class TestOctetStringYamlDecode(unittest.TestCase):
def test_common_cases(self):
self.assertEqual(convert_yaml_octet_string_to_bytes("hex:aa55"), unhexlify("aa55"))
self.assertEqual(convert_yaml_octet_string_to_bytes("hex:"), unhexlify(""))
self.assertEqual(convert_yaml_octet_string_to_bytes("hex:AA55"), unhexlify("aa55"))

self.assertEqual(convert_yaml_octet_string_to_bytes("0\xaa\x55"), unhexlify("30aa55"))
self.assertEqual(convert_yaml_octet_string_to_bytes("0\xAA\x55"), unhexlify("30aa55"))
self.assertEqual(convert_yaml_octet_string_to_bytes("0\xAa\x55"), unhexlify("30aa55"))

self.assertEqual(convert_yaml_octet_string_to_bytes("0hex:"), b"0hex:")
self.assertEqual(convert_yaml_octet_string_to_bytes("0hex:A"), b"0hex:A")
self.assertEqual(convert_yaml_octet_string_to_bytes("0hex:AA55"), b"0hex:AA55")

self.assertEqual(convert_yaml_octet_string_to_bytes("AA55"), b"AA55")
self.assertEqual(convert_yaml_octet_string_to_bytes("AA\n\r\t55"), unhexlify("41410a0d093535"))
# TODO(#23669): After utf8 is properly supported expected result is unhexlify("c3a9c3a90a0a")
self.assertEqual(convert_yaml_octet_string_to_bytes("\xC3\xA9é\n\n"), unhexlify("c3a9e90a0a"))

# Partial hex nibble
with self.assertRaises(ValueError):
convert_yaml_octet_string_to_bytes("hex:aa5")


def main():
unittest.main()


if __name__ == "__main__":
main()

0 comments on commit f707749

Please sign in to comment.