From 59eaa5f28eb2969e9d497669ef0436eb87b7525d Mon Sep 17 00:00:00 2001 From: Jan Kowalleck Date: Sun, 1 Oct 2023 11:36:52 +0200 Subject: [PATCH] fix: xml defaultNamespace serialization and detection (#20) * fixes: serialization with defaultNS fails [#12](https://github.com/madpah/serializable/issues/12) * fixes: defaultNamespace detection fails on XML-attributes when deserializing [#11](https://github.com/madpah/serializable/issues/11) --------- Signed-off-by: Jan Kowalleck --- serializable/__init__.py | 30 +++---- tests/base.py | 27 ++++++- ...the-phoenix-project-defaultNS-isset-v4.xml | 43 ++++++++++ ...oenix-project-defaultNS-isset.SNAPSHOT.xml | 1 + ...the-phoenix-project-defaultNS-mixed-v4.xml | 44 ++++++++++ ...the-phoenix-project-defaultNS-unset-v4.xml | 43 ++++++++++ ...oenix-project-defaultNS-unset.SNAPSHOT.xml | 1 + tests/model.py | 4 +- tests/test_xml.py | 80 ++++++++++++++++++- 9 files changed, 254 insertions(+), 19 deletions(-) create mode 100644 tests/fixtures/the-phoenix-project-defaultNS-isset-v4.xml create mode 100644 tests/fixtures/the-phoenix-project-defaultNS-isset.SNAPSHOT.xml create mode 100644 tests/fixtures/the-phoenix-project-defaultNS-mixed-v4.xml create mode 100644 tests/fixtures/the-phoenix-project-defaultNS-unset-v4.xml create mode 100644 tests/fixtures/the-phoenix-project-defaultNS-unset.SNAPSHOT.xml diff --git a/serializable/__init__.py b/serializable/__init__.py index b40c917..4cf4786 100644 --- a/serializable/__init__.py +++ b/serializable/__init__.py @@ -352,11 +352,11 @@ def _as_xml(self: _T, view_: Optional[Type[_T]] = None, as_string: bool = True, elif prop_info.is_enum: v = v.value - this_e_attributes.update({new_key: str(v)}) + this_e_attributes.update({_namespace_element_name(new_key, xmlns): str(v)}) - element_name = _namespace_element_name(tag_name=element_name, - xmlns=xmlns) if element_name else _namespace_element_name( - tag_name=CurrentFormatter.formatter.encode(self.__class__.__name__), xmlns=xmlns) + element_name = _namespace_element_name( + element_name if element_name else CurrentFormatter.formatter.encode(self.__class__.__name__), + xmlns) this_e = Element(element_name, this_e_attributes) # Handle remaining Properties that will be sub elements @@ -389,11 +389,11 @@ def _as_xml(self: _T, view_: Optional[Type[_T]] = None, as_string: bool = True, if CurrentFormatter.formatter: new_key = CurrentFormatter.formatter.encode(property_name=new_key) - new_key = _namespace_element_name(tag_name=new_key, xmlns=xmlns) + new_key = _namespace_element_name(new_key, xmlns) if prop_info.is_array and prop_info.xml_array_config: _array_type, nested_key = prop_info.xml_array_config - nested_key = _namespace_element_name(tag_name=nested_key, xmlns=xmlns) + nested_key = _namespace_element_name(nested_key, xmlns) if _array_type and _array_type == XmlArraySerializationType.NESTED: nested_e = SubElement(this_e, new_key) else: @@ -459,16 +459,20 @@ def _from_xml(cls: Type[_T], data: Union[TextIOWrapper, Element], _namespaces = dict([node for _, node in SafeElementTree.iterparse(StringIO(SafeElementTree.tostring(data, 'unicode')), events=['start-ns'])]) - if 'ns0' in _namespaces: - default_namespace = _namespaces['ns0'] - else: - default_namespace = '' + default_namespace = (re.compile(r'^\{(.*?)\}.').search(data.tag) or (None, _namespaces.get('')))[1] + + if default_namespace is None: + def strip_default_namespace(s: str) -> str: + return s + else: + def strip_default_namespace(s: str) -> str: + return s.replace(f'{{{default_namespace}}}', '') _data: Dict[str, Any] = {} # Handle attributes on the root element if there are any for k, v in data.attrib.items(): - decoded_k = CurrentFormatter.formatter.decode(property_name=k) + decoded_k = CurrentFormatter.formatter.decode(strip_default_namespace(k)) if decoded_k in klass.ignore_during_deserialization: logger.debug(f'Ignoring {decoded_k} when deserializing {cls.__module__}.{cls.__qualname__}') continue @@ -500,9 +504,7 @@ def _from_xml(cls: Type[_T], data: Union[TextIOWrapper, Element], # Handle Sub-Elements for child_e in data: - child_e_tag_name = str(child_e.tag).replace('{' + default_namespace + '}', '') - - decoded_k = CurrentFormatter.formatter.decode(property_name=child_e_tag_name) + decoded_k = CurrentFormatter.formatter.decode(strip_default_namespace(child_e.tag)) if decoded_k in klass.ignore_during_deserialization: logger.debug(f'Ignoring {decoded_k} when deserializing {cls.__module__}.{cls.__qualname__}') continue diff --git a/tests/base.py b/tests/base.py index 2268786..aefbb59 100644 --- a/tests/base.py +++ b/tests/base.py @@ -19,7 +19,7 @@ import json import os -from typing import Any +from typing import Any, Optional, Union from unittest import TestCase import lxml # type: ignore @@ -59,3 +59,28 @@ def assertEqualXml(self, a: str, b: str) -> None: diff_results = main.diff_texts(a, b, diff_options={'F': 0.5}) diff_results = list(filter(lambda o: not isinstance(o, MoveNode), diff_results)) self.assertEqual(len(diff_results), 0, f'There are XML differences: {diff_results}\n- {a}\n+ {b}') + + +class DeepCompareMixin(object): + def assertDeepEqual(self, first: Any, second: Any, msg: Optional[str] = None) -> None: + """costly compare, but very verbose""" + self: Union[TestCase, 'DeepCompareMixin'] + _omd = self.maxDiff + try: + self.maxDiff = None + dd1 = self.__deepDict(first) + dd2 = self.__deepDict(second) + self.assertDictEqual(dd1, dd2, msg) + finally: + self.maxDiff = _omd + + def __deepDict(self, o: Any) -> Any: + if isinstance(o, (list, tuple)): + return tuple(self.__deepDict(i) for i in o) + if isinstance(o, dict): + return {k: self.__deepDict(v) for k, v in o} + if isinstance(o, set): + return tuple(sorted((self.__deepDict(i) for i in o), key=repr)) + if hasattr(o, '__dict__'): + return {k: self.__deepDict(v) for k, v in vars(o).items() if not (k.startswith('__') and k.endswith('__'))} + return o diff --git a/tests/fixtures/the-phoenix-project-defaultNS-isset-v4.xml b/tests/fixtures/the-phoenix-project-defaultNS-isset-v4.xml new file mode 100644 index 0000000..a02cb91 --- /dev/null +++ b/tests/fixtures/the-phoenix-project-defaultNS-isset-v4.xml @@ -0,0 +1,43 @@ + + + f3758bf0-0ff7-4366-a5e5-c209d4352b2d + The Phoenix Project + 5th Anniversary Limited Edition + 2018-04-16 + Gene Kim + George Spafford + Kevin Behr + fiction + +
10 Downing Street
+ IT Revolution Press LLC +
+ + + + + + + + + + + + + 1 + Tuesday, September 2 + + + 2 + Tuesday, September 2 + + + 3 + Tuesday, September 2 + + + 4 + Wednesday, September 3 + + +
\ No newline at end of file diff --git a/tests/fixtures/the-phoenix-project-defaultNS-isset.SNAPSHOT.xml b/tests/fixtures/the-phoenix-project-defaultNS-isset.SNAPSHOT.xml new file mode 100644 index 0000000..c2548ee --- /dev/null +++ b/tests/fixtures/the-phoenix-project-defaultNS-isset.SNAPSHOT.xml @@ -0,0 +1 @@ +f3758bf0-0ff7-4366-a5e5-c209d4352b2dThe Phoenix Project5th Anniversary Limited Edition2018-04-16Karl RanseierfictionIT Revolution Press LLC1Tuesday, September 22Tuesday, September 23Tuesday, September 24Wednesday, September 3 \ No newline at end of file diff --git a/tests/fixtures/the-phoenix-project-defaultNS-mixed-v4.xml b/tests/fixtures/the-phoenix-project-defaultNS-mixed-v4.xml new file mode 100644 index 0000000..80242e1 --- /dev/null +++ b/tests/fixtures/the-phoenix-project-defaultNS-mixed-v4.xml @@ -0,0 +1,44 @@ + + + + f3758bf0-0ff7-4366-a5e5-c209d4352b2d + The Phoenix Project + 5th Anniversary Limited Edition + 2018-04-16 + Gene Kim + George Spafford + Kevin Behr + fiction + + 10 Downing Street + IT Revolution Press LLC + + + + + + + + + + + + + + 1 + Tuesday, September 2 + + + 2 + Tuesday, September 2 + + + 3 + Tuesday, September 2 + + + 4 + Wednesday, September 3 + + + \ No newline at end of file diff --git a/tests/fixtures/the-phoenix-project-defaultNS-unset-v4.xml b/tests/fixtures/the-phoenix-project-defaultNS-unset-v4.xml new file mode 100644 index 0000000..4d497ce --- /dev/null +++ b/tests/fixtures/the-phoenix-project-defaultNS-unset-v4.xml @@ -0,0 +1,43 @@ + + + f3758bf0-0ff7-4366-a5e5-c209d4352b2d + The Phoenix Project + 5th Anniversary Limited Edition + 2018-04-16 + Gene Kim + George Spafford + Kevin Behr + fiction + + 10 Downing Street + IT Revolution Press LLC + + + + + + + + + + + + + + 1 + Tuesday, September 2 + + + 2 + Tuesday, September 2 + + + 3 + Tuesday, September 2 + + + 4 + Wednesday, September 3 + + + \ No newline at end of file diff --git a/tests/fixtures/the-phoenix-project-defaultNS-unset.SNAPSHOT.xml b/tests/fixtures/the-phoenix-project-defaultNS-unset.SNAPSHOT.xml new file mode 100644 index 0000000..8a9393e --- /dev/null +++ b/tests/fixtures/the-phoenix-project-defaultNS-unset.SNAPSHOT.xml @@ -0,0 +1 @@ +f3758bf0-0ff7-4366-a5e5-c209d4352b2dThe Phoenix Project5th Anniversary Limited Edition2018-04-16Karl RanseierfictionIT Revolution Press LLC1Tuesday, September 22Tuesday, September 23Tuesday, September 24Wednesday, September 3 \ No newline at end of file diff --git a/tests/model.py b/tests/model.py index b020b93..8025ed5 100644 --- a/tests/model.py +++ b/tests/model.py @@ -204,7 +204,7 @@ class Book: def __init__(self, title: str, isbn: str, publish_date: date, authors: Iterable[str], publisher: Optional[Publisher] = None, chapters: Optional[Iterable[Chapter]] = None, edition: Optional[BookEdition] = None, type: BookType = BookType.FICTION, - id: Optional[UUID] = None, references: Optional[List[BookReference]] = None) -> None: + id: Optional[UUID] = None, references: Optional[Iterable[BookReference]] = None) -> None: self._id = id or uuid4() self._title = title self._isbn = isbn @@ -214,7 +214,7 @@ def __init__(self, title: str, isbn: str, publish_date: date, authors: Iterable[ self._publisher = publisher self.chapters = list(chapters or []) self._type = type - self.references = set(references or {}) + self.references = set(references or []) @property # type: ignore[misc] @serializable.xml_sequence(1) diff --git a/tests/test_xml.py b/tests/test_xml.py index 085e48c..6af0172 100644 --- a/tests/test_xml.py +++ b/tests/test_xml.py @@ -19,6 +19,9 @@ import logging import os +from copy import deepcopy +from sys import version_info +from unittest import skipIf from defusedxml import ElementTree as SafeElementTree @@ -28,7 +31,7 @@ KebabCasePropertyNameFormatter, SnakeCasePropertyNameFormatter, ) -from tests.base import FIXTURES_DIRECTORY, BaseTestCase +from tests.base import FIXTURES_DIRECTORY, BaseTestCase, DeepCompareMixin from tests.model import Book, SchemaVersion2, SchemaVersion3, SchemaVersion4, ThePhoenixProject, ThePhoenixProject_v1 logger = logging.getLogger('serializable') @@ -36,7 +39,9 @@ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') -class TestXml(BaseTestCase): +class TestXml(BaseTestCase, DeepCompareMixin): + + # region test_serialize def test_serialize_tfp_cc1(self) -> None: CurrentFormatter.formatter = CamelCasePropertyNameFormatter @@ -73,6 +78,47 @@ def test_serialize_tfp_sc1(self) -> None: with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-snake-case-1.xml')) as expected_xml: self.assertEqualXml(expected_xml.read(), ThePhoenixProject.as_xml()) + def test_serializable_no_defaultNS(self) -> None: + """regression test for https://github.com/madpah/serializable/issues/12""" + from xml.etree import ElementTree + xmlns = 'http://the.phoenix.project/testing/defaultNS' + with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-defaultNS-unset.SNAPSHOT.xml')) as expected_xml: + expected = expected_xml.read() + data = deepcopy(ThePhoenixProject_v1) + data._authors = {'Karl Ranseier', } # only one item, so order is no issue + actual = ElementTree.tostring( + data.as_xml(as_string=False, xmlns=xmlns), + method='xml', + encoding='unicode', + # default_namespace=None + ) + # byte-wise string compare is intentional! + self.maxDiff = None + self.assertEqual(expected, actual) + + @skipIf(version_info < (3, 8), '`ElementTree.tostring(default_namespace=)` not available') + def test_serializable_with_defaultNS(self) -> None: + """regression test for https://github.com/madpah/serializable/issues/12""" + from xml.etree import ElementTree + xmlns = 'http://the.phoenix.project/testing/defaultNS' + with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-defaultNS-isset.SNAPSHOT.xml')) as expected_xml: + expected = expected_xml.read() + data = deepcopy(ThePhoenixProject_v1) + data._authors = {'Karl Ranseier', } # only one item, so order is no issue + actual = ElementTree.tostring( + data.as_xml(SchemaVersion4, as_string=False, xmlns=xmlns), + method='xml', + encoding='unicode', + default_namespace=xmlns, + ) + # byte-wise string compare is intentional! + self.maxDiff = None + self.assertEqual(expected, actual) + + # endregion test_serialize + + # region test_deserialize + def test_deserialize_tfp_cc1(self) -> None: CurrentFormatter.formatter = CamelCasePropertyNameFormatter with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-camel-case-1-v1.xml')) as input_xml: @@ -162,3 +208,33 @@ def test_deserialize_tfp_sc1(self) -> None: self.assertEqual(ThePhoenixProject_v1.publisher, book.publisher) self.assertEqual(ThePhoenixProject_v1.authors, book.authors) self.assertEqual(ThePhoenixProject_v1.chapters, book.chapters) + + def test_deserializable_with_defaultNS(self) -> None: + """regression test for https://github.com/madpah/serializable/issues/11""" + expected = ThePhoenixProject + with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-defaultNS-isset-v4.xml')) as fixture_xml: + actual = Book.from_xml(fixture_xml) + self.assertDeepEqual(expected, actual) + + def test_deserializable_no_defaultNS_explicit(self) -> None: + """regression test for https://github.com/madpah/serializable/issues/11""" + expected = ThePhoenixProject + with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-defaultNS-unset-v4.xml')) as fixture_xml: + actual = Book.from_xml(fixture_xml, 'http://the.phoenix.project/testing/defaultNS') + self.assertDeepEqual(expected, actual) + + def test_deserializable_no_defaultNS_autodetect(self) -> None: + """regression test for https://github.com/madpah/serializable/issues/11""" + expected = ThePhoenixProject + with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-defaultNS-unset-v4.xml')) as fixture_xml: + actual = Book.from_xml(fixture_xml) + self.assertDeepEqual(expected, actual) + + def test_deserializable_mixed_defaultNS_autodetect(self) -> None: + """regression test for https://github.com/madpah/serializable/issues/11""" + expected = ThePhoenixProject + with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-defaultNS-mixed-v4.xml')) as fixture_xml: + actual = Book.from_xml(fixture_xml) + self.assertDeepEqual(expected, actual) + + # region test_deserialize