Skip to content

Commit

Permalink
fix: xml defaultNamespace serialization and detection (#20)
Browse files Browse the repository at this point in the history
* fixes:  serialization with defaultNS fails [#12](#12)
* fixes: defaultNamespace detection fails on XML-attributes when deserializing [#11](#11)

---------

Signed-off-by: Jan Kowalleck <[email protected]>
  • Loading branch information
jkowalleck authored Oct 1, 2023
1 parent a2b5503 commit 59eaa5f
Show file tree
Hide file tree
Showing 9 changed files with 254 additions and 19 deletions.
30 changes: 16 additions & 14 deletions serializable/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,11 +352,11 @@ def _as_xml(self: _T, view_: Optional[Type[_T]] = None, as_string: bool = True,
elif prop_info.is_enum:
v = v.value

this_e_attributes.update({new_key: str(v)})
this_e_attributes.update({_namespace_element_name(new_key, xmlns): str(v)})

element_name = _namespace_element_name(tag_name=element_name,
xmlns=xmlns) if element_name else _namespace_element_name(
tag_name=CurrentFormatter.formatter.encode(self.__class__.__name__), xmlns=xmlns)
element_name = _namespace_element_name(
element_name if element_name else CurrentFormatter.formatter.encode(self.__class__.__name__),
xmlns)
this_e = Element(element_name, this_e_attributes)

# Handle remaining Properties that will be sub elements
Expand Down Expand Up @@ -389,11 +389,11 @@ def _as_xml(self: _T, view_: Optional[Type[_T]] = None, as_string: bool = True,

if CurrentFormatter.formatter:
new_key = CurrentFormatter.formatter.encode(property_name=new_key)
new_key = _namespace_element_name(tag_name=new_key, xmlns=xmlns)
new_key = _namespace_element_name(new_key, xmlns)

if prop_info.is_array and prop_info.xml_array_config:
_array_type, nested_key = prop_info.xml_array_config
nested_key = _namespace_element_name(tag_name=nested_key, xmlns=xmlns)
nested_key = _namespace_element_name(nested_key, xmlns)
if _array_type and _array_type == XmlArraySerializationType.NESTED:
nested_e = SubElement(this_e, new_key)
else:
Expand Down Expand Up @@ -459,16 +459,20 @@ def _from_xml(cls: Type[_T], data: Union[TextIOWrapper, Element],
_namespaces = dict([node for _, node in
SafeElementTree.iterparse(StringIO(SafeElementTree.tostring(data, 'unicode')),
events=['start-ns'])])
if 'ns0' in _namespaces:
default_namespace = _namespaces['ns0']
else:
default_namespace = ''
default_namespace = (re.compile(r'^\{(.*?)\}.').search(data.tag) or (None, _namespaces.get('')))[1]

if default_namespace is None:
def strip_default_namespace(s: str) -> str:
return s
else:
def strip_default_namespace(s: str) -> str:
return s.replace(f'{{{default_namespace}}}', '')

_data: Dict[str, Any] = {}

# Handle attributes on the root element if there are any
for k, v in data.attrib.items():
decoded_k = CurrentFormatter.formatter.decode(property_name=k)
decoded_k = CurrentFormatter.formatter.decode(strip_default_namespace(k))
if decoded_k in klass.ignore_during_deserialization:
logger.debug(f'Ignoring {decoded_k} when deserializing {cls.__module__}.{cls.__qualname__}')
continue
Expand Down Expand Up @@ -500,9 +504,7 @@ def _from_xml(cls: Type[_T], data: Union[TextIOWrapper, Element],

# Handle Sub-Elements
for child_e in data:
child_e_tag_name = str(child_e.tag).replace('{' + default_namespace + '}', '')

decoded_k = CurrentFormatter.formatter.decode(property_name=child_e_tag_name)
decoded_k = CurrentFormatter.formatter.decode(strip_default_namespace(child_e.tag))
if decoded_k in klass.ignore_during_deserialization:
logger.debug(f'Ignoring {decoded_k} when deserializing {cls.__module__}.{cls.__qualname__}')
continue
Expand Down
27 changes: 26 additions & 1 deletion tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import json
import os
from typing import Any
from typing import Any, Optional, Union
from unittest import TestCase

import lxml # type: ignore
Expand Down Expand Up @@ -59,3 +59,28 @@ def assertEqualXml(self, a: str, b: str) -> None:
diff_results = main.diff_texts(a, b, diff_options={'F': 0.5})
diff_results = list(filter(lambda o: not isinstance(o, MoveNode), diff_results))
self.assertEqual(len(diff_results), 0, f'There are XML differences: {diff_results}\n- {a}\n+ {b}')


class DeepCompareMixin(object):
def assertDeepEqual(self, first: Any, second: Any, msg: Optional[str] = None) -> None:
"""costly compare, but very verbose"""
self: Union[TestCase, 'DeepCompareMixin']
_omd = self.maxDiff
try:
self.maxDiff = None
dd1 = self.__deepDict(first)
dd2 = self.__deepDict(second)
self.assertDictEqual(dd1, dd2, msg)
finally:
self.maxDiff = _omd

def __deepDict(self, o: Any) -> Any:
if isinstance(o, (list, tuple)):
return tuple(self.__deepDict(i) for i in o)
if isinstance(o, dict):
return {k: self.__deepDict(v) for k, v in o}
if isinstance(o, set):
return tuple(sorted((self.__deepDict(i) for i in o), key=repr))
if hasattr(o, '__dict__'):
return {k: self.__deepDict(v) for k, v in vars(o).items() if not (k.startswith('__') and k.endswith('__'))}
return o
43 changes: 43 additions & 0 deletions tests/fixtures/the-phoenix-project-defaultNS-isset-v4.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?xml version='1.0' encoding='utf-8'?>
<book xmlns="http://the.phoenix.project/testing/defaultNS" isbn_number="978-1942788294">
<id>f3758bf0-0ff7-4366-a5e5-c209d4352b2d</id>
<title>The Phoenix Project</title>
<edition number="5">5th Anniversary Limited Edition</edition>
<publish_date>2018-04-16</publish_date>
<author>Gene Kim</author>
<author>George Spafford</author>
<author>Kevin Behr</author>
<type>fiction</type>
<publisher>
<address>10 Downing Street</address>
<name>IT Revolution Press LLC</name>
</publisher>
<references>
<reference ref="my-ref-1"/>
<reference ref="my-ref-3">
<reference ref="sub-ref-2"/>
</reference>
<reference ref="my-ref-2">
<reference ref="sub-ref-1"/>
<reference ref="sub-ref-3"/>
</reference>
</references>
<chapters>
<chapter>
<number>1</number>
<title>Tuesday, September 2</title>
</chapter>
<chapter>
<number>2</number>
<title>Tuesday, September 2</title>
</chapter>
<chapter>
<number>3</number>
<title>Tuesday, September 2</title>
</chapter>
<chapter>
<number>4</number>
<title>Wednesday, September 3</title>
</chapter>
</chapters>
</book>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<book xmlns="http://the.phoenix.project/testing/defaultNS" isbn_number="978-1942788294"><id>f3758bf0-0ff7-4366-a5e5-c209d4352b2d</id><title>The Phoenix Project</title><edition number="5">5th Anniversary Limited Edition</edition><publish_date>2018-04-16</publish_date><author>Karl Ranseier</author><type>fiction</type><publisher><name>IT Revolution Press LLC</name></publisher><chapters><chapter><number>1</number><title>Tuesday, September 2</title></chapter><chapter><number>2</number><title>Tuesday, September 2</title></chapter><chapter><number>3</number><title>Tuesday, September 2</title></chapter><chapter><number>4</number><title>Wednesday, September 3</title></chapter></chapters></book>
44 changes: 44 additions & 0 deletions tests/fixtures/the-phoenix-project-defaultNS-mixed-v4.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?xml version='1.0' encoding='utf-8'?>
<!-- elements are in special NS, attributes in defaultNS -->
<my:book xmlns:my="http://the.phoenix.project/testing/defaultNS" isbn_number="978-1942788294">
<my:id>f3758bf0-0ff7-4366-a5e5-c209d4352b2d</my:id>
<my:title>The Phoenix Project</my:title>
<my:edition number="5">5th Anniversary Limited Edition</my:edition>
<my:publish_date>2018-04-16</my:publish_date>
<my:author>Gene Kim</my:author>
<my:author>George Spafford</my:author>
<my:author>Kevin Behr</my:author>
<my:type>fiction</my:type>
<my:publisher>
<my:address>10 Downing Street</my:address>
<my:name>IT Revolution Press LLC</my:name>
</my:publisher>
<my:references>
<my:reference ref="my-ref-1"/>
<my:reference ref="my-ref-3">
<my:reference ref="sub-ref-2"/>
</my:reference>
<my:reference ref="my-ref-2">
<my:reference ref="sub-ref-1"/>
<my:reference ref="sub-ref-3"/>
</my:reference>
</my:references>
<my:chapters>
<my:chapter>
<my:number>1</my:number>
<my:title>Tuesday, September 2</my:title>
</my:chapter>
<my:chapter>
<my:number>2</my:number>
<my:title>Tuesday, September 2</my:title>
</my:chapter>
<my:chapter>
<my:number>3</my:number>
<my:title>Tuesday, September 2</my:title>
</my:chapter>
<my:chapter>
<my:number>4</my:number>
<my:title>Wednesday, September 3</my:title>
</my:chapter>
</my:chapters>
</my:book>
43 changes: 43 additions & 0 deletions tests/fixtures/the-phoenix-project-defaultNS-unset-v4.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?xml version='1.0' encoding='utf-8'?>
<my:book xmlns:my="http://the.phoenix.project/testing/defaultNS" my:isbn_number="978-1942788294">
<my:id>f3758bf0-0ff7-4366-a5e5-c209d4352b2d</my:id>
<my:title>The Phoenix Project</my:title>
<my:edition my:number="5">5th Anniversary Limited Edition</my:edition>
<my:publish_date>2018-04-16</my:publish_date>
<my:author>Gene Kim</my:author>
<my:author>George Spafford</my:author>
<my:author>Kevin Behr</my:author>
<my:type>fiction</my:type>
<my:publisher>
<my:address>10 Downing Street</my:address>
<my:name>IT Revolution Press LLC</my:name>
</my:publisher>
<my:references>
<my:reference my:ref="my-ref-1"/>
<my:reference my:ref="my-ref-3">
<my:reference my:ref="sub-ref-2"/>
</my:reference>
<my:reference my:ref="my-ref-2">
<my:reference my:ref="sub-ref-1"/>
<my:reference my:ref="sub-ref-3"/>
</my:reference>
</my:references>
<my:chapters>
<my:chapter>
<my:number>1</my:number>
<my:title>Tuesday, September 2</my:title>
</my:chapter>
<my:chapter>
<my:number>2</my:number>
<my:title>Tuesday, September 2</my:title>
</my:chapter>
<my:chapter>
<my:number>3</my:number>
<my:title>Tuesday, September 2</my:title>
</my:chapter>
<my:chapter>
<my:number>4</my:number>
<my:title>Wednesday, September 3</my:title>
</my:chapter>
</my:chapters>
</my:book>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<ns0:book xmlns:ns0="http://the.phoenix.project/testing/defaultNS" ns0:isbn_number="978-1942788294"><ns0:id>f3758bf0-0ff7-4366-a5e5-c209d4352b2d</ns0:id><ns0:title>The Phoenix Project</ns0:title><ns0:edition ns0:number="5">5th Anniversary Limited Edition</ns0:edition><ns0:publish_date>2018-04-16</ns0:publish_date><ns0:author>Karl Ranseier</ns0:author><ns0:type>fiction</ns0:type><ns0:publisher><ns0:name>IT Revolution Press LLC</ns0:name></ns0:publisher><ns0:chapters><ns0:chapter><ns0:number>1</ns0:number><ns0:title>Tuesday, September 2</ns0:title></ns0:chapter><ns0:chapter><ns0:number>2</ns0:number><ns0:title>Tuesday, September 2</ns0:title></ns0:chapter><ns0:chapter><ns0:number>3</ns0:number><ns0:title>Tuesday, September 2</ns0:title></ns0:chapter><ns0:chapter><ns0:number>4</ns0:number><ns0:title>Wednesday, September 3</ns0:title></ns0:chapter></ns0:chapters></ns0:book>
4 changes: 2 additions & 2 deletions tests/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ class Book:
def __init__(self, title: str, isbn: str, publish_date: date, authors: Iterable[str],
publisher: Optional[Publisher] = None, chapters: Optional[Iterable[Chapter]] = None,
edition: Optional[BookEdition] = None, type: BookType = BookType.FICTION,
id: Optional[UUID] = None, references: Optional[List[BookReference]] = None) -> None:
id: Optional[UUID] = None, references: Optional[Iterable[BookReference]] = None) -> None:
self._id = id or uuid4()
self._title = title
self._isbn = isbn
Expand All @@ -214,7 +214,7 @@ def __init__(self, title: str, isbn: str, publish_date: date, authors: Iterable[
self._publisher = publisher
self.chapters = list(chapters or [])
self._type = type
self.references = set(references or {})
self.references = set(references or [])

@property # type: ignore[misc]
@serializable.xml_sequence(1)
Expand Down
80 changes: 78 additions & 2 deletions tests/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@

import logging
import os
from copy import deepcopy
from sys import version_info
from unittest import skipIf

from defusedxml import ElementTree as SafeElementTree

Expand All @@ -28,15 +31,17 @@
KebabCasePropertyNameFormatter,
SnakeCasePropertyNameFormatter,
)
from tests.base import FIXTURES_DIRECTORY, BaseTestCase
from tests.base import FIXTURES_DIRECTORY, BaseTestCase, DeepCompareMixin
from tests.model import Book, SchemaVersion2, SchemaVersion3, SchemaVersion4, ThePhoenixProject, ThePhoenixProject_v1

logger = logging.getLogger('serializable')
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')


class TestXml(BaseTestCase):
class TestXml(BaseTestCase, DeepCompareMixin):

# region test_serialize

def test_serialize_tfp_cc1(self) -> None:
CurrentFormatter.formatter = CamelCasePropertyNameFormatter
Expand Down Expand Up @@ -73,6 +78,47 @@ def test_serialize_tfp_sc1(self) -> None:
with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-snake-case-1.xml')) as expected_xml:
self.assertEqualXml(expected_xml.read(), ThePhoenixProject.as_xml())

def test_serializable_no_defaultNS(self) -> None:
"""regression test for https://github.com/madpah/serializable/issues/12"""
from xml.etree import ElementTree
xmlns = 'http://the.phoenix.project/testing/defaultNS'
with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-defaultNS-unset.SNAPSHOT.xml')) as expected_xml:
expected = expected_xml.read()
data = deepcopy(ThePhoenixProject_v1)
data._authors = {'Karl Ranseier', } # only one item, so order is no issue
actual = ElementTree.tostring(
data.as_xml(as_string=False, xmlns=xmlns),
method='xml',
encoding='unicode',
# default_namespace=None
)
# byte-wise string compare is intentional!
self.maxDiff = None
self.assertEqual(expected, actual)

@skipIf(version_info < (3, 8), '`ElementTree.tostring(default_namespace=)` not available')
def test_serializable_with_defaultNS(self) -> None:
"""regression test for https://github.com/madpah/serializable/issues/12"""
from xml.etree import ElementTree
xmlns = 'http://the.phoenix.project/testing/defaultNS'
with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-defaultNS-isset.SNAPSHOT.xml')) as expected_xml:
expected = expected_xml.read()
data = deepcopy(ThePhoenixProject_v1)
data._authors = {'Karl Ranseier', } # only one item, so order is no issue
actual = ElementTree.tostring(
data.as_xml(SchemaVersion4, as_string=False, xmlns=xmlns),
method='xml',
encoding='unicode',
default_namespace=xmlns,
)
# byte-wise string compare is intentional!
self.maxDiff = None
self.assertEqual(expected, actual)

# endregion test_serialize

# region test_deserialize

def test_deserialize_tfp_cc1(self) -> None:
CurrentFormatter.formatter = CamelCasePropertyNameFormatter
with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-camel-case-1-v1.xml')) as input_xml:
Expand Down Expand Up @@ -162,3 +208,33 @@ def test_deserialize_tfp_sc1(self) -> None:
self.assertEqual(ThePhoenixProject_v1.publisher, book.publisher)
self.assertEqual(ThePhoenixProject_v1.authors, book.authors)
self.assertEqual(ThePhoenixProject_v1.chapters, book.chapters)

def test_deserializable_with_defaultNS(self) -> None:
"""regression test for https://github.com/madpah/serializable/issues/11"""
expected = ThePhoenixProject
with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-defaultNS-isset-v4.xml')) as fixture_xml:
actual = Book.from_xml(fixture_xml)
self.assertDeepEqual(expected, actual)

def test_deserializable_no_defaultNS_explicit(self) -> None:
"""regression test for https://github.com/madpah/serializable/issues/11"""
expected = ThePhoenixProject
with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-defaultNS-unset-v4.xml')) as fixture_xml:
actual = Book.from_xml(fixture_xml, 'http://the.phoenix.project/testing/defaultNS')
self.assertDeepEqual(expected, actual)

def test_deserializable_no_defaultNS_autodetect(self) -> None:
"""regression test for https://github.com/madpah/serializable/issues/11"""
expected = ThePhoenixProject
with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-defaultNS-unset-v4.xml')) as fixture_xml:
actual = Book.from_xml(fixture_xml)
self.assertDeepEqual(expected, actual)

def test_deserializable_mixed_defaultNS_autodetect(self) -> None:
"""regression test for https://github.com/madpah/serializable/issues/11"""
expected = ThePhoenixProject
with open(os.path.join(FIXTURES_DIRECTORY, 'the-phoenix-project-defaultNS-mixed-v4.xml')) as fixture_xml:
actual = Book.from_xml(fixture_xml)
self.assertDeepEqual(expected, actual)

# region test_deserialize

0 comments on commit 59eaa5f

Please sign in to comment.