diff --git a/rdflib/plugins/serializers/longturtle.py b/rdflib/plugins/serializers/longturtle.py index 3800c40dd7..d35485c911 100644 --- a/rdflib/plugins/serializers/longturtle.py +++ b/rdflib/plugins/serializers/longturtle.py @@ -18,6 +18,7 @@ from __future__ import annotations +import warnings from typing import IO, Any, Optional from rdflib.compare import to_canonical_graph @@ -166,14 +167,14 @@ def preprocessTriple(self, triple: _TripleType) -> None: # predicate corresponds to base namespace continue # Don't use generated prefixes for subjects and objects - self.getQName(node, gen_prefix=(i == VERB)) + self.get_pname(node, gen_prefix=(i == VERB)) if isinstance(node, Literal) and node.datatype: - self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT) + self.get_pname(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT) p = triple[1] if isinstance(p, BNode): # hmm - when is P ever a bnode? self._references[p] += 1 - def getQName(self, uri, gen_prefix=True): + def get_pname(self, uri, gen_prefix=True): if not isinstance(uri, URIRef): return None @@ -191,9 +192,15 @@ def getQName(self, uri, gen_prefix=True): prefix, namespace, local = parts + # To understand treatment of % character refer to Productions for terminal PLX at + # https://www.w3.org/TR/turtle/#grammar-production-PLX + # Only % NOT followed by two hex chars requires manual backslash escaping local = local.replace(r"(", r"\(").replace(r")", r"\)") + local = self.LOCALNAME_PECRENT_CHARACTER_REQUIRING_ESCAPE_REGEX.sub( + "\\%", local + ) - # QName cannot end with . + # PName cannot end with . if local.endswith("."): return None @@ -201,6 +208,14 @@ def getQName(self, uri, gen_prefix=True): return "%s:%s" % (prefix, local) + def getQName(self, uri, gen_prefix=True): + warnings.warn( + "LongTurtleSerializer.getQName is deprecated, use LongTurtleSerializer.get_pname instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.get_pname(uri, gen_prefix) + def startDocument(self): self._started = True ns_list = sorted(self.namespaces.items()) @@ -256,12 +271,12 @@ def label(self, node, position): if isinstance(node, Literal): return node._literal_n3( use_plain=True, - qname_callback=lambda dt: self.getQName(dt, _GEN_QNAME_FOR_DT), + qname_callback=lambda dt: self.get_pname(dt, _GEN_QNAME_FOR_DT), ) else: node = self.relativize(node) - return self.getQName(node, position == VERB) or node.n3() + return self.get_pname(node, position == VERB) or node.n3() def p_squared( self, diff --git a/rdflib/plugins/serializers/n3.py b/rdflib/plugins/serializers/n3.py index 627bbe19ca..dc45869cc8 100644 --- a/rdflib/plugins/serializers/n3.py +++ b/rdflib/plugins/serializers/n3.py @@ -2,6 +2,8 @@ Notation 3 (N3) RDF graph serializer for RDFLib. """ +import warnings + from rdflib.graph import Graph from rdflib.namespace import OWL, Namespace from rdflib.plugins.serializers.turtle import OBJECT, SUBJECT, TurtleSerializer @@ -47,14 +49,22 @@ def preprocessTriple(self, triple): # noqa: N802 for t in triple[2]: self.preprocessTriple(t) - def getQName(self, uri, gen_prefix=True): # noqa: N802 + def get_pname(self, uri, gen_prefix=True): qname = None if self.parent is not None: - qname = self.parent.getQName(uri, gen_prefix) + qname = self.parent.get_pname(uri, gen_prefix) if qname is None: - qname = super(N3Serializer, self).getQName(uri, gen_prefix) + qname = super(N3Serializer, self).get_pname(uri, gen_prefix) return qname + def getQName(self, uri, gen_prefix=True): # noqa: N802 + warnings.warn( + "N3Serializer.getQName is deprecated, use N3Serializer.get_pname instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.get_pname(uri, gen_prefix) + def statement(self, subject): self.subjectDone(subject) properties = self.buildPredicateHash(subject) diff --git a/rdflib/plugins/serializers/trig.py b/rdflib/plugins/serializers/trig.py index 25f8d2a128..2c3fe2be16 100644 --- a/rdflib/plugins/serializers/trig.py +++ b/rdflib/plugins/serializers/trig.py @@ -45,7 +45,7 @@ def preprocess(self) -> None: continue self.store = context # Don't generate a new prefix for a graph URI if one already exists - self.getQName(context.identifier, False) + self.get_pname(context.identifier, False) self._subjects = {} for triple in context: @@ -103,7 +103,7 @@ def serialize( iri = store.identifier.n3() else: # Show the full graph URI if a prefix for it doesn't already exist - iri = self.getQName(store.identifier, False) + iri = self.get_pname(store.identifier, False) if iri is None: iri = store.identifier.n3() self.write(self.indent() + "\n%s {" % iri) diff --git a/rdflib/plugins/serializers/turtle.py b/rdflib/plugins/serializers/turtle.py index 9a77debb9b..5a76992fb9 100644 --- a/rdflib/plugins/serializers/turtle.py +++ b/rdflib/plugins/serializers/turtle.py @@ -5,6 +5,8 @@ from __future__ import annotations +import re +import warnings from collections import defaultdict from typing import ( IO, @@ -43,6 +45,9 @@ class RecursiveSerializer(Serializer): maxDepth = 10 indentString = " " roundtrip_prefixes: Tuple[Any, ...] = () + LOCALNAME_PECRENT_CHARACTER_REQUIRING_ESCAPE_REGEX = re.compile( + r"%(?![0-9A-Fa-f]{2})" + ) def __init__(self, store: Graph): super(RecursiveSerializer, self).__init__(store) @@ -204,6 +209,9 @@ class TurtleSerializer(RecursiveSerializer): short_name = "turtle" indentString = " " + LOCALNAME_PECRENT_CHARACTER_REQUIRING_ESCAPE_REGEX = re.compile( + r"%(?![0-9A-Fa-f]{2})" + ) def __init__(self, store: Graph): self._ns_rewrite: Dict[str, str] = {} @@ -300,15 +308,15 @@ def preprocessTriple(self, triple: _TripleType) -> None: # predicate corresponds to base namespace continue # Don't use generated prefixes for subjects and objects - self.getQName(node, gen_prefix=(i == VERB)) + self.get_pname(node, gen_prefix=(i == VERB)) if isinstance(node, Literal) and node.datatype: - self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT) + self.get_pname(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT) p = triple[1] if isinstance(p, BNode): # hmm - when is P ever a bnode? self._references[p] += 1 - # TODO: Rename to get_pname - def getQName(self, uri: Node, gen_prefix: bool = True) -> Optional[str]: + # Refer to Productions for terminals PNAME_NS and PNAME_LN https://www.w3.org/TR/turtle/#sec-grammar-grammar + def get_pname(self, uri: Node, gen_prefix: bool = True) -> Optional[str]: if not isinstance(uri, URIRef): return None @@ -328,9 +336,15 @@ def getQName(self, uri: Node, gen_prefix: bool = True) -> Optional[str]: prefix, namespace, local = parts + # To understand treatment of % character refer to Productions for terminal PLX at + # https://www.w3.org/TR/turtle/#grammar-production-PLX + # Only % NOT followed by two hex chars requires manual backslash escaping local = local.replace(r"(", r"\(").replace(r")", r"\)") + local = self.LOCALNAME_PECRENT_CHARACTER_REQUIRING_ESCAPE_REGEX.sub( + "\\%", local + ) - # QName cannot end with . + # PName cannot end with . if local.endswith("."): return None @@ -338,6 +352,14 @@ def getQName(self, uri: Node, gen_prefix: bool = True) -> Optional[str]: return "%s:%s" % (prefix, local) + def getQName(self, uri: Node, gen_prefix: bool = True) -> Optional[str]: + warnings.warn( + "TurtleSerializer.getQName is deprecated, use TurtleSerializer.get_pname instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.get_pname(uri, gen_prefix) + def startDocument(self) -> None: self._started = True ns_list = sorted(self.namespaces.items()) @@ -393,12 +415,12 @@ def label(self, node: Node, position: int) -> str: if isinstance(node, Literal): return node._literal_n3( use_plain=True, - qname_callback=lambda dt: self.getQName(dt, _GEN_QNAME_FOR_DT), + qname_callback=lambda dt: self.get_pname(dt, _GEN_QNAME_FOR_DT), ) else: node = self.relativize(node) # type: ignore[type-var] - return self.getQName(node, position == VERB) or node.n3() + return self.get_pname(node, position == VERB) or node.n3() def p_squared(self, node: Node, position: int, newline: bool = False) -> bool: if ( diff --git a/test/data/roundtrip/iri_with_escaped_percent.ttl b/test/data/roundtrip/iri_with_escaped_percent.ttl new file mode 100644 index 0000000000..7fb84ecec3 --- /dev/null +++ b/test/data/roundtrip/iri_with_escaped_percent.ttl @@ -0,0 +1,4 @@ +@prefix : <#> . +:zzz_\%_zzz :prop "test iri including backslash-escaped percent char \\%" . +:zzz%20zzz :prop "test iri including percent-encoded space char %20" . +:zzz\%azzz :prop "test iri including backslash-escpaed percent char followed by single hex character" . diff --git a/test/test_dataset/test_dataset_deprec_notice.py b/test/test_dataset/test_dataset_deprec_notice.py deleted file mode 100644 index 068ddd3c25..0000000000 --- a/test/test_dataset/test_dataset_deprec_notice.py +++ /dev/null @@ -1,37 +0,0 @@ -import pytest - -from rdflib import Dataset - - -def test_dataset_contexts_method(): - ds = Dataset() - with pytest.warns( - DeprecationWarning, - match="Dataset.contexts is deprecated, use Dataset.graphs instead.", - ): - # Call list() to consume the generator to emit the warning. - list(ds.contexts()) - - -def test_dataset_default_context_property(): - ds = Dataset() - with pytest.warns( - DeprecationWarning, - match="Dataset.default_context is deprecated, use Dataset.default_graph instead.", - ): - ds.default_context - - with pytest.warns( - DeprecationWarning, - match="Dataset.default_context is deprecated, use Dataset.default_graph instead.", - ): - ds.default_context = ds.graph() - - -def test_dataset_identifier_property(): - ds = Dataset() - with pytest.warns( - DeprecationWarning, - match="Dataset.identifier is deprecated and will be removed in future versions.", - ): - ds.identifier diff --git a/test/test_deprecation_notice.py b/test/test_deprecation_notice.py new file mode 100644 index 0000000000..19190635ef --- /dev/null +++ b/test/test_deprecation_notice.py @@ -0,0 +1,72 @@ +import pytest + +from rdflib import Dataset, Graph, Namespace, URIRef +from rdflib.plugins.serializers.longturtle import LongTurtleSerializer +from rdflib.plugins.serializers.n3 import N3Serializer +from rdflib.plugins.serializers.turtle import TurtleSerializer + + +def test_dataset_contexts_method(): + ds = Dataset() + with pytest.warns( + DeprecationWarning, + match="Dataset.contexts is deprecated, use Dataset.graphs instead.", + ): + # Call list() to consume the generator to emit the warning. + list(ds.contexts()) + + +def test_dataset_default_context_property(): + ds = Dataset() + with pytest.warns( + DeprecationWarning, + match="Dataset.default_context is deprecated, use Dataset.default_graph instead.", + ): + ds.default_context + + with pytest.warns( + DeprecationWarning, + match="Dataset.default_context is deprecated, use Dataset.default_graph instead.", + ): + ds.default_context = ds.graph() + + +def test_dataset_identifier_property(): + ds = Dataset() + with pytest.warns( + DeprecationWarning, + match="Dataset.identifier is deprecated and will be removed in future versions.", + ): + ds.identifier + + +@pytest.mark.parametrize( + ("serializer_cls", "warning_message"), + [ + ( + TurtleSerializer, + "TurtleSerializer.getQName is deprecated, use TurtleSerializer.get_pname instead.", + ), + ( + LongTurtleSerializer, + "LongTurtleSerializer.getQName is deprecated, use LongTurtleSerializer.get_pname instead.", + ), + ( + N3Serializer, + "N3Serializer.getQName is deprecated, use N3Serializer.get_pname instead.", + ), + ], +) +def test_serializer_getqname_method( + serializer_cls, + warning_message: str, +): + graph = Graph() + ex = Namespace("http://example.org/") + graph.bind("ex", ex) + serializer = serializer_cls(graph) + + with pytest.warns(DeprecationWarning, match=warning_message): + qname = serializer.getQName(URIRef("http://example.org/value")) + + assert qname == "ex:value" diff --git a/test/test_roundtrip.py b/test/test_roundtrip.py index bff01b2850..8dc951b6cd 100644 --- a/test/test_roundtrip.py +++ b/test/test_roundtrip.py @@ -526,6 +526,7 @@ def test_n3_suite( (TEST_DATA_DIR / "variants" / "diverse_quads.nq", "nquads"), (TEST_DATA_DIR / "variants" / "diverse_quads.trig", "trig"), (TEST_DATA_DIR / "roundtrip" / "bnode_refs.trig", "trig"), + (TEST_DATA_DIR / "roundtrip" / "iri_with_escaped_percent.ttl", "ttl"), (TEST_DATA_DIR / "example-lots_of_graphs.n3", "n3"), (TEST_DATA_DIR / "issue156.n3", "n3"), ] diff --git a/test/test_trig.py b/test/test_trig.py index afcf2c4cff..1c24a18d86 100644 --- a/test/test_trig.py +++ b/test/test_trig.py @@ -73,7 +73,7 @@ def test_graph_qname_syntax(): def test_graph_uri_syntax(): g = rdflib.Dataset() - # getQName will not abbreviate this, so it should serialize as + # get_pname will not abbreviate this, so it should serialize as # a '<...>' term. g.add(TRIPLE + (rdflib.URIRef("http://example.com/foo."),)) out = g.serialize(format="trig", encoding="latin-1")