Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix pickling problem in LinkedDataMapping #655

Merged
merged 3 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion src/fundus/parser/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import more_itertools
import xmltodict
from dict2xml import dict2xml
from lxml.etree import XPath, tostring
from lxml.etree import XPath, fromstring, tostring
from typing_extensions import Self, TypeAlias, deprecated

from fundus.utils.serialization import JSONVal, replace_keys_in_nested_dict
Expand Down Expand Up @@ -61,6 +61,17 @@ def __init__(self, lds: Iterable[Dict[str, Any]] = ()):
self.add_ld(ld)
self.__xml: Optional[lxml.etree._Element] = None

def __getstate__(self):
state = self.__dict__.copy()
if self.__xml is not None:
state["_LinkedDataMapping__xml"] = tostring(self.__xml)
return state

def __setstate__(self, state):
if (xml_element := state.get("_LinkedDataMapping__xml")) is not None:
state["_LinkedDataMapping__xml"] = fromstring(xml_element)
self.__dict__ = state

def serialize(self) -> Dict[str, Any]:
return {attribute: value for attribute, value in self.__dict__.items() if "__" not in attribute}

Expand Down
14 changes: 13 additions & 1 deletion tests/test_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pickle import dumps, loads
from typing import Any, Dict, List

from lxml.etree import XPath
from lxml.etree import XPath, tostring

from fundus.parser.data import LinkedDataMapping

Expand All @@ -22,3 +23,14 @@ def test_xpath_search(self):
assert ld.xpath_search(XPath("//_U003AU002AU0040")) == ["Howdy"]
assert ld.xpath_search(XPath("//dict")) == ["True"]
assert ld.xpath_search(XPath("//Example2")) == [{"@type": "Example2", "value": "2", "_:*@": "Howdy"}]

def test_pickle(self):
ld = LinkedDataMapping(lds)
ld.__as_xml__()
ld_pickled = loads(dumps(ld))
assert ld_pickled.__getattribute__("Example1") == ld.__getattribute__("Example1")
assert ld_pickled.__getattribute__("Example2") == ld.__getattribute__("Example2")
assert ld_pickled.__getattribute__("UNKNOWN_TYPE") == ld.__getattribute__("UNKNOWN_TYPE")
assert tostring(ld_pickled.__getattribute__("_LinkedDataMapping__xml")) == tostring(
ld.__getattribute__("_LinkedDataMapping__xml")
)
4 changes: 4 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import pickle
from typing import Any, Dict, List, Optional, Tuple, Union

import lxml.html
Expand Down Expand Up @@ -236,6 +237,9 @@ def test_parsing(self, publisher: Publisher) -> None:
for key, value in version_data.items():
assert value == extraction[key]

# check if extraction is pickable
pickle.dumps(extraction)

def test_reserved_attribute_names(self, publisher: Publisher):
parser = publisher.parser
for attr in attribute_annotations_mapping.keys():
Expand Down