diff --git a/rdflib/plugins/serializers/longturtle.py b/rdflib/plugins/serializers/longturtle.py index 95b94efd7..2aaed36e6 100644 --- a/rdflib/plugins/serializers/longturtle.py +++ b/rdflib/plugins/serializers/longturtle.py @@ -39,23 +39,20 @@ class LongTurtleSerializer(RecursiveSerializer): + """LongTurtle, a Turtle serialization format. + + When the optional parameter ``canon`` is set to :py:obj:`True`, the graph is canonicalized + before serialization. This normalizes blank node identifiers and allows for + deterministic serialization of the graph. Useful when consistent outputs are required. + """ + short_name = "longturtle" indentString = " " def __init__(self, store): self._ns_rewrite = {} - namespace_manager = store.namespace_manager - store = to_canonical_graph(store) - content = store.serialize(format="application/n-triples") - lines = content.split("\n") - lines.sort() - graph = Graph() - graph.parse( - data="\n".join(lines), format="application/n-triples", skolemize=True - ) - graph = graph.de_skolemize() - graph.namespace_manager = namespace_manager - super(LongTurtleSerializer, self).__init__(graph) + self._canon = False + super(LongTurtleSerializer, self).__init__(store) self.keywords = {RDF.type: "a"} self.reset() self.stream = None @@ -85,11 +82,34 @@ def addNamespace(self, prefix, namespace): super(LongTurtleSerializer, self).addNamespace(prefix, namespace) return prefix + def canonize(self): + """Apply canonicalization to the store. + + This normalizes blank node identifiers and allows for deterministic + serialization of the graph. + """ + if not self._canon: + return + + namespace_manager = self.store.namespace_manager + store = to_canonical_graph(self.store) + content = store.serialize(format="application/n-triples") + lines = content.split("\n") + lines.sort() + graph = Graph() + graph.parse( + data="\n".join(lines), format="application/n-triples", skolemize=True + ) + graph = graph.de_skolemize() + graph.namespace_manager = namespace_manager + self.store = graph + def reset(self): super(LongTurtleSerializer, self).reset() self._shortNames = {} self._started = False self._ns_rewrite = {} + self.canonize() def serialize( self, @@ -99,6 +119,7 @@ def serialize( spacious: Optional[bool] = None, **kwargs: Any, ) -> None: + self._canon = kwargs.get("canon", False) self.reset() self.stream = stream # if base is given here, use, if not and a base is set for the graph use that diff --git a/test/test_serializers/test_serializer_longturtle.py b/test/test_serializers/test_serializer_longturtle.py index c1761b6da..65821784e 100644 --- a/test/test_serializers/test_serializer_longturtle.py +++ b/test/test_serializers/test_serializer_longturtle.py @@ -167,7 +167,7 @@ def test_longturtle(): g.bind("sdo", SDO) # run the long turtle serializer - output = g.serialize(format="longturtle") + output = g.serialize(format="longturtle", canon=True) # fix the target current_dir = Path.cwd() # Get the current directory diff --git a/test/test_serializers/test_serializer_longturtle_sort.py b/test/test_serializers/test_serializer_longturtle_sort.py index fa192253d..044660e3e 100644 --- a/test/test_serializers/test_serializer_longturtle_sort.py +++ b/test/test_serializers/test_serializer_longturtle_sort.py @@ -62,7 +62,7 @@ def test_sort_semiblank_graph() -> None: graph.add((outer_node, EX.has, inner_node)) graph.add((inner_node, RDFS.seeAlso, nested)) - graph_text = graph.serialize(format="longturtle", sort=True) + graph_text = graph.serialize(format="longturtle", canon=True) if first_graph_text == "": first_graph_text = graph_text