From 8cf158b092774d1795133aff73e74d790ac8933f Mon Sep 17 00:00:00 2001 From: Graham Higgins Date: Sat, 9 Apr 2022 16:27:04 +0100 Subject: [PATCH 1/6] Fix issue with namespace rebinding Also add additional tests for namespace rebinding. --- rdflib/plugins/stores/memory.py | 18 +- test/test_namespace_rebinding.py | 488 +++++++++++++++++++++++++++++++ 2 files changed, 501 insertions(+), 5 deletions(-) create mode 100644 test/test_namespace_rebinding.py diff --git a/rdflib/plugins/stores/memory.py b/rdflib/plugins/stores/memory.py index edb255778..b95dc8b64 100644 --- a/rdflib/plugins/stores/memory.py +++ b/rdflib/plugins/stores/memory.py @@ -403,11 +403,19 @@ def triples(self, triple_pattern, context=None): yield triple, self.__contexts(triple) def bind(self, prefix, namespace, override=True): - bound_prefix = self.__prefix.get(namespace) - if override and bound_prefix: - del self.__namespace[bound_prefix] - self.__prefix[namespace] = prefix - self.__namespace[prefix] = namespace + bound_namespace = self.__namespace.get(prefix) + bound_prefix = self.__prefix.get(namespace) or self.__prefix.get(bound_namespace) + if override: + if bound_prefix: + del self.__namespace[bound_prefix] + if bound_namespace: + del self.__prefix[bound_namespace] + + self.__prefix[namespace] = prefix + self.__namespace[prefix] = namespace + else: + self.__prefix[bound_namespace or namespace] = bound_prefix or prefix + self.__namespace[bound_prefix or prefix] = bound_namespace or namespace def namespace(self, prefix): return self.__namespace.get(prefix, None) diff --git a/test/test_namespace_rebinding.py b/test/test_namespace_rebinding.py new file mode 100644 index 000000000..ba6b692ce --- /dev/null +++ b/test/test_namespace_rebinding.py @@ -0,0 +1,488 @@ +import pytest + +from rdflib.term import URIRef +from rdflib import ConjunctiveGraph, Graph, Literal +from rdflib.plugins.stores.memory import Memory +from rdflib.namespace import Namespace +from rdflib.namespace import OWL +from test.data import tarek, context1, context2 + +foaf1_uri = URIRef("http://xmlns.com/foaf/0.1/") +foaf2_uri = URIRef("http://xmlns.com/foaf/2.0/") + +FOAF1 = Namespace(foaf1_uri) +FOAF2 = Namespace(foaf2_uri) + +# @pytest.mark.xfail(reason="'foaf=>FOAF2' binding improperly removed from namespaces") +def test_binding_replace(): + + # The confusion here is in the two arguments “override” and “replace” and + # how they serve two different purposes --- changing a prefix already bound + # to a namespace versus changing a namespace already bound to a prefix. + + g = Graph(bind_namespaces="none") + assert len(list(g.namespaces())) == 0 + + # 1. Changing the namespace of an existing namespace=>prefix binding: + + # Say they release FOAF 2.0 and you want to change the binding of + # `http://xmlns.com/foaf/2.0/` to `foaf`. + + # Up to now you had `http://xmlns.com/foaf/0.1/=>foaf` and `http://xmlns.com/foaf/2.0/=>foaf2` + + # We'll just set up those two bindings ... + g.bind("foaf", FOAF1) + g.bind("foaf2", FOAF2) + assert len(list(g.namespaces())) == 2 + assert list(g.namespaces()) == [ + ('foaf', foaf1_uri), + ("foaf2", foaf2_uri) + ] + + # Now you want to "upgrade" to FOAF2=>foaf and try the obvious: + g.bind("foaf", FOAF2) + + # But that doesn't happen, instead a different prefix, `foaf1` is bound: + assert list(g.namespaces()) == [ + ("foaf", foaf1_uri), + ("foaf1", foaf2_uri) + ] + + # The rationale behind this behaviour is that the prefix "foaf" was already + # bound to the FOAF1 namespace, so a different prefix for the FOAF2 namespace + # was automatically created. + + # You can achieve the intended result by setting `replace` to `True`: + g.bind("foaf", FOAF2, replace=True) + + # Because the FOAF2 namespace was rebound to a different prefix, the old + # "foaf2" prefix has gone and because the "foaf" prefix was rebound to a + # different namespace, the old FOAF1 namespace has gone, leaving just: + + assert list(g.namespaces()) == [ + ("foaf", foaf2_uri) + ] + + # Now, if you wish, you can re-bind the FOAF1.0 namespace to a prefix + # of your choice + g.bind("oldfoaf", FOAF1) + + assert list(g.namespaces()) == [ + ("foaf", foaf2_uri), # Should be present but has been removed. + ('oldfoaf', foaf1_uri), + ] + + # 2. Changing the prefix of an existing namespace=>prefix binding: + + # The namespace manager preserves the existing bindings from any + # subsequent unwanted programmatic rebindings such as: + g.bind("foaf", FOAF1) + + # Which, as things stand, results in: + + assert list(g.namespaces()) == [ + ("foaf", foaf2_uri), + ('foaf1', foaf1_uri) + ] + + # In which the attempted change from `oldfoaf` to (the already + # bound-to-a different-namespace `foaf`) was intercepted and + # changed to a non-clashing prefix of `foaf1` + + # So, after undoing the above unwanted rebinding .. + g.bind("oldfoaf", FOAF1, replace=True) + + # The bindings are again as desired + assert list(g.namespaces()) == [ + ("foaf", foaf2_uri), + ('oldfoaf', foaf1_uri) + ] + + # Next time through, set override to False + g.bind("foaf", FOAF1, override=False) + + # And the bindings will remain as desired + assert list(g.namespaces()) == [ + ("foaf", foaf2_uri), + ('oldfoaf', foaf1_uri) + ] + + # 3. Parsing data with prefix=>namespace bindings + # Let's see the situation regarding namespace bindings + # in parsed data - it can be a bit confusing ... + + # Starting with a very likely example where `foaf` is a + # prefix for `FOAF1` + + data = """\ + @prefix foaf: . + + foaf:name "Bob" .""" + + g.parse(data=data, format="n3") + + # The FOAF2 namespace is already bound to `foaf` so a + # non-clashing prefix of `foaf1` is rebound to FOAF1 in + # place of the existing `oldfoaf` prefix + + assert list(g.namespaces()) == [ + ("foaf", foaf2_uri), + ('foaf1', foaf1_uri) + ] + + # Yes, namespace bindings in parsed data replace existing + # bindings (i.e. `oldfoaf` was replaced by `foaf1`), just + # live with it ... + + # A different example of the same principle where `foaf2` + # replaces the exsting `foaf` + + data = """\ + @prefix foaf2: . + + foaf2:name "Alice" .""" + + g.parse(data=data, format="n3") + + # The already-bound namespace of `foaf=>FOAF2` is replaced + # by the `foaf2=>FOAF2` binding specified in the N3 source + + assert list(g.namespaces()) == [ + ('foaf1', foaf1_uri), + ("foaf2", foaf2_uri), + ] + + # Where a prefix-namespace binding in the data does + # not clash with the existing binding ... + + data = """\ + @prefix foaf1: . + + foaf1:name "Bob" .""" + + g.parse(data=data, format="n3") + + # The prefix `foaf1`, already bound to FOAF1 is + # used + + assert list(g.namespaces()) == [ + ('foaf1', foaf1_uri), + ("foaf2", foaf2_uri), + ] + + # Otherwise, the existing prefix binding is replaced + + data = """\ + @prefix foaf: . + + foaf:name "Alice" .""" + + g.parse(data=data, format="n3") + + assert list(g.namespaces()) == [ + ("foaf2", foaf2_uri), + ('foaf', foaf1_uri), + ] + + # Prefixes are bound to namespaces which in turn have a URIRef + # representation - so a different prefix=>namespace binding + # means a different predicate and thus a different statement: + + expected = """ + @prefix foaf: . + @prefix foaf2: . + + foaf:name "Bob" . + + foaf:name "Alice" ; + foaf2:name "Alice" . + + """ + + s = g.serialize(format="n3") + + for l in expected.split(): + assert l in s + + +def test_prefix_alias_disallowed(): + + # CANNOT BIND A PREFIX ALIASED TO AN EXISTING BOUND NAMESPACE + + g = Graph(bind_namespaces="none") + g.bind("owl", OWL) + assert ('owl', URIRef('http://www.w3.org/2002/07/owl#')) in list(g.namespaces()) + + g.bind("wol", OWL, override=False) + assert ('owl', URIRef('http://www.w3.org/2002/07/owl#')) in list(g.namespaces()) + assert ('wol', URIRef('http://www.w3.org/2002/07/owl#')) not in list(g.namespaces()) + + +def test_rebind_prefix_succeeds(): + + # CAN REPLACE A PREFIX-NAMESPACE BINDING + + g = Graph(bind_namespaces="none") + g.bind("owl", OWL) + assert ('owl', URIRef('http://www.w3.org/2002/07/owl#')) in list(g.namespaces()) + + g.bind("wol", OWL) + assert ('wol', URIRef('http://www.w3.org/2002/07/owl#')) in list(g.namespaces()) + assert ('owl', URIRef('http://www.w3.org/2002/07/owl#')) not in list(g.namespaces()) + + +def test_parse_rebinds_prefix(): + + # PARSED PREFIX-NAMESPACE BINDINGS REPLACE EXISTING BINDINGS + + data = """@prefix friend-of-a-friend: . + + friend-of-a-friend:name "Bob" . + + """ + + # Use full set of namespace bindings, including foaf + g = Graph(bind_namespaces="none") + g.bind('foaf', FOAF1) + assert ('foaf', foaf1_uri) in list(g.namespaces()) + + g.parse(data=data, format="n3") + + # foaf no longer in set of namespace bindings + assert ('foaf', foaf1_uri) not in list(g.namespaces()) + assert ('friend-of-a-friend', foaf1_uri) in list(g.namespaces()) + + +@pytest.mark.xfail(reason="Automatic handling of unknown predicates not automatically registered with namespace manager") +def test_automatic_handling_of_unknown_predicates(): + + # AUTOMATIC HANDLING OF UNKNOWN PREDICATES + + """ + Automatic handling of unknown predicates + ----------------------------------------- + + As a programming convenience, a namespace binding is automatically + created when :class:`rdflib.term.URIRef` predicates are added to the graph. + """ + + g = Graph(bind_namespaces="none") + + g.add((tarek, URIRef('http://xmlns.com/foaf/0.1/name'), Literal("Tarek"))) + + assert len(list(g.namespaces())) > 0 + + +def test_automatic_handling_of_unknown_predicates_only_effected_after_serialization(): + + g = Graph(bind_namespaces="none") + + g.add((tarek, URIRef('http://xmlns.com/foaf/0.1/name'), Literal("Tarek"))) + + assert "@prefix ns1: ." in g.serialize(format="n3") + + assert len(list(g.namespaces())) > 0 + assert ("ns1", URIRef("http://xmlns.com/foaf/0.1/")) in list(g.namespaces()) + + +def test_multigraph_bindings(): + data = """@prefix friend-of-a-friend: . + + friend-of-a-friend:name "Bob" . + + """ + + store = Memory() + + g1 = Graph(store, identifier=context1, bind_namespaces="none") + + g1.bind('foaf', FOAF1) + assert list(g1.namespaces()) == [ + ('foaf', foaf1_uri) + ] + assert list(store.namespaces()) == [ + ('foaf', foaf1_uri) + ] + + g1.add((tarek, FOAF1.name, Literal("tarek"))) + + assert list(store.namespaces()) == [ + ('foaf', foaf1_uri) + ] + + g2 = Graph(store, identifier=context2, bind_namespaces="none") + g2.parse(data=data, format="n3") + + # The parser-caused rebind is in the underlying store and all objects + # that use the store see the changed binding: + assert list(store.namespaces()) == [ + ('friend-of-a-friend', foaf1_uri) + ] + assert list(g1.namespaces()) == [ + ('friend-of-a-friend', foaf1_uri) + ] + + # Including newly-created objects that use the store + cg = ConjunctiveGraph(store=store) + + assert ('foaf', foaf1_uri) not in list(cg.namespaces()) + assert ('friend-of-a-friend', foaf1_uri) in list(cg.namespaces()) + + assert len(list(g1.namespaces())) == 6 + assert len(list(g2.namespaces())) == 6 + assert len(list(cg.namespaces())) == 6 + assert len(list(store.namespaces())) == 6 + + cg.store.add_graph(g1) + cg.store.add_graph(g2) + + assert "@prefix friend-of-a-friend: " in cg.serialize(format="n3") + + # In the notation3 format, the statement asserting tarek's name + # now references the changed prefix: + assert ' friend-of-a-friend:name "tarek" .' in cg.serialize(format="n3") + + # Add foaf2 binding if not already bound + cg.bind("foaf2", FOAF2, override=False) + assert ('foaf2', foaf2_uri) in list(cg.namespaces()) + + # Impose foaf binding ... if not already bound + cg.bind("foaf", FOAF1, override=False) + assert ('foaf', foaf1_uri) not in list(cg.namespaces()) + + +def test_new_namespace_new_prefix(): + g = Graph(bind_namespaces="none") + g.bind("foaf", FOAF1) + assert list(g.namespaces()) == [ + ('foaf', foaf1_uri) + ] + + +def test_change_prefix_override_true(): + g = Graph(bind_namespaces="none") + + g.bind("foaf", FOAF1) + assert list(g.namespaces()) == [ + ('foaf', foaf1_uri) + ] + + g.bind("oldfoaf", FOAF1) + # Changed + assert list(g.namespaces()) == [ + ("oldfoaf", foaf1_uri) + ] + + +def test_change_prefix_override_false(): + g = Graph(bind_namespaces="none") + + g.bind("foaf", FOAF1) + assert list(g.namespaces()) == [ + ('foaf', foaf1_uri) + ] + + g.bind("oldfoaf", FOAF1, override=False) + # No change + assert list(g.namespaces()) == [ + ("foaf", foaf1_uri) + ] + + +def test_change_namespace_override_true(): + g = Graph(bind_namespaces="none") + + g.bind("foaf", FOAF1) + assert list(g.namespaces()) == [ + ('foaf', foaf1_uri) + ] + + g.bind("foaf", FOAF2) + # Different prefix used + assert list(g.namespaces()) == [ + ("foaf", foaf1_uri), + ("foaf1", foaf2_uri) + ] + + +def test_change_namespace_override_false(): + g = Graph(bind_namespaces="none") + + g.bind("foaf", FOAF1) + assert list(g.namespaces()) == [ + ('foaf', foaf1_uri) + ] + + # Different namespace so override is irrelevant in this case + g.bind("foaf", FOAF2, override=False) + # Different prefix used + assert list(g.namespaces()) == [ + ("foaf", foaf1_uri), + ("foaf1", foaf2_uri) + ] + + +def test_new_namespace_override_false(): + g = Graph(bind_namespaces="none") + + g.bind("foaf", FOAF2) + assert list(g.namespaces()) == [ + ('foaf', foaf2_uri) + ] + + # Namespace not already bound so override is irrelevant in this case + g.bind("owl", OWL, override=False) + # Given prefix used + assert list(g.namespaces()) == [ + ("foaf", foaf2_uri), + ('owl', URIRef('http://www.w3.org/2002/07/owl#')), + ] + +# @pytest.mark.xfail() +def test_change_namespace_and_prefix(): + + # A more extensive illustration of attempted rebinds of + # `foaf` being intercepted and changed to a non-clashing + # prefix of `foafN` (where N is an incrementing integer) ... + + g = Graph(bind_namespaces="none") + + g.bind("foaf", FOAF1) + assert list(g.namespaces()) == [ + ('foaf', foaf1_uri) + ] + + g.bind("foaf", FOAF2, replace=True) + assert list(g.namespaces()) == [ + ("foaf", foaf2_uri) + ] + + g.bind("foaf1", FOAF1) + + assert list(g.namespaces()) == [ + ("foaf", foaf2_uri), + ("foaf1", foaf1_uri) + ] + + foaf3_uri = URIRef("http://xmlns.com/foaf/3.0/") + FOAF3 = Namespace("http://xmlns.com/foaf/3.0/") + + g.bind("foaf", FOAF3) + + assert list(g.namespaces()) == [ + ("foaf", foaf2_uri), + ("foaf1", foaf1_uri), + ("foaf2", foaf3_uri), + ] + + foaf4_uri = URIRef("http://xmlns.com/foaf/4.0/") + FOAF4 = Namespace("http://xmlns.com/foaf/4.0/") + + g.bind("foaf", FOAF4) + + assert list(g.namespaces()) == [ + ("foaf", foaf2_uri), + ("foaf1", foaf1_uri), + ("foaf2", foaf3_uri), + ("foaf3", foaf4_uri), + ] From b54ac411894f7b06a87929b9eff50b7286f22ef0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 9 Apr 2022 17:44:46 +0000 Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- rdflib/plugins/stores/memory.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rdflib/plugins/stores/memory.py b/rdflib/plugins/stores/memory.py index b95dc8b64..aa422ed89 100644 --- a/rdflib/plugins/stores/memory.py +++ b/rdflib/plugins/stores/memory.py @@ -404,7 +404,9 @@ def triples(self, triple_pattern, context=None): def bind(self, prefix, namespace, override=True): bound_namespace = self.__namespace.get(prefix) - bound_prefix = self.__prefix.get(namespace) or self.__prefix.get(bound_namespace) + bound_prefix = self.__prefix.get(namespace) or self.__prefix.get( + bound_namespace + ) if override: if bound_prefix: del self.__namespace[bound_prefix] From 0a9c94abdcceb5136c6bbbd9cd58dd6672a9c01f Mon Sep 17 00:00:00 2001 From: Graham Higgins Date: Sat, 9 Apr 2022 22:49:21 +0200 Subject: [PATCH 3/6] here's a patch for `berkleydb.py` --- rdflib/plugins/stores/berkeleydb.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/rdflib/plugins/stores/berkeleydb.py b/rdflib/plugins/stores/berkeleydb.py index 331157adc..b01f74778 100644 --- a/rdflib/plugins/stores/berkeleydb.py +++ b/rdflib/plugins/stores/berkeleydb.py @@ -471,10 +471,17 @@ def bind(self, prefix, namespace, override=True): prefix = prefix.encode("utf-8") namespace = namespace.encode("utf-8") bound_prefix = self.__prefix.get(namespace) - if override and bound_prefix: - self.__namespace.delete(bound_prefix) - self.__prefix[namespace] = prefix - self.__namespace[prefix] = namespace + bound_namespace = self.__namespace.get(prefix) + if override: + if bound_prefix: + self.__namespace.delete(bound_prefix) + if bound_namespace: + self.__prefix.delete(bound_namespace) + self.__prefix[namespace] = prefix + self.__namespace[prefix] = namespace + else: + self.__prefix[bound_namespace or namespace] = bound_prefix or prefix + self.__namespace[bound_prefix or prefix] = bound_namespace or namespace def namespace(self, prefix): prefix = prefix.encode("utf-8") From e7faf157ea31ef2ea718d07ad51b7f93c0894141 Mon Sep 17 00:00:00 2001 From: Iwan Aucamp Date: Sat, 16 Apr 2022 23:21:59 +0200 Subject: [PATCH 4/6] refactor: move graph namespace rebinding test to graph directory --- test/{ => test_graph}/test_namespace_rebinding.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test/{ => test_graph}/test_namespace_rebinding.py (100%) diff --git a/test/test_namespace_rebinding.py b/test/test_graph/test_namespace_rebinding.py similarity index 100% rename from test/test_namespace_rebinding.py rename to test/test_graph/test_namespace_rebinding.py From b58c5b673727745008823e7427d4525d563d8969 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 16 Apr 2022 22:46:15 +0000 Subject: [PATCH 5/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test/test_graph/test_namespace_rebinding.py | 130 ++++++-------------- 1 file changed, 39 insertions(+), 91 deletions(-) diff --git a/test/test_graph/test_namespace_rebinding.py b/test/test_graph/test_namespace_rebinding.py index ba6b692ce..cd3eb414e 100644 --- a/test/test_graph/test_namespace_rebinding.py +++ b/test/test_graph/test_namespace_rebinding.py @@ -1,11 +1,11 @@ +from test.data import context1, context2, tarek + import pytest -from rdflib.term import URIRef from rdflib import ConjunctiveGraph, Graph, Literal +from rdflib.namespace import OWL, Namespace from rdflib.plugins.stores.memory import Memory -from rdflib.namespace import Namespace -from rdflib.namespace import OWL -from test.data import tarek, context1, context2 +from rdflib.term import URIRef foaf1_uri = URIRef("http://xmlns.com/foaf/0.1/") foaf2_uri = URIRef("http://xmlns.com/foaf/2.0/") @@ -34,19 +34,13 @@ def test_binding_replace(): g.bind("foaf", FOAF1) g.bind("foaf2", FOAF2) assert len(list(g.namespaces())) == 2 - assert list(g.namespaces()) == [ - ('foaf', foaf1_uri), - ("foaf2", foaf2_uri) - ] + assert list(g.namespaces()) == [('foaf', foaf1_uri), ("foaf2", foaf2_uri)] # Now you want to "upgrade" to FOAF2=>foaf and try the obvious: g.bind("foaf", FOAF2) # But that doesn't happen, instead a different prefix, `foaf1` is bound: - assert list(g.namespaces()) == [ - ("foaf", foaf1_uri), - ("foaf1", foaf2_uri) - ] + assert list(g.namespaces()) == [("foaf", foaf1_uri), ("foaf1", foaf2_uri)] # The rationale behind this behaviour is that the prefix "foaf" was already # bound to the FOAF1 namespace, so a different prefix for the FOAF2 namespace @@ -59,9 +53,7 @@ def test_binding_replace(): # "foaf2" prefix has gone and because the "foaf" prefix was rebound to a # different namespace, the old FOAF1 namespace has gone, leaving just: - assert list(g.namespaces()) == [ - ("foaf", foaf2_uri) - ] + assert list(g.namespaces()) == [("foaf", foaf2_uri)] # Now, if you wish, you can re-bind the FOAF1.0 namespace to a prefix # of your choice @@ -80,10 +72,7 @@ def test_binding_replace(): # Which, as things stand, results in: - assert list(g.namespaces()) == [ - ("foaf", foaf2_uri), - ('foaf1', foaf1_uri) - ] + assert list(g.namespaces()) == [("foaf", foaf2_uri), ('foaf1', foaf1_uri)] # In which the attempted change from `oldfoaf` to (the already # bound-to-a different-namespace `foaf`) was intercepted and @@ -93,19 +82,13 @@ def test_binding_replace(): g.bind("oldfoaf", FOAF1, replace=True) # The bindings are again as desired - assert list(g.namespaces()) == [ - ("foaf", foaf2_uri), - ('oldfoaf', foaf1_uri) - ] + assert list(g.namespaces()) == [("foaf", foaf2_uri), ('oldfoaf', foaf1_uri)] # Next time through, set override to False g.bind("foaf", FOAF1, override=False) # And the bindings will remain as desired - assert list(g.namespaces()) == [ - ("foaf", foaf2_uri), - ('oldfoaf', foaf1_uri) - ] + assert list(g.namespaces()) == [("foaf", foaf2_uri), ('oldfoaf', foaf1_uri)] # 3. Parsing data with prefix=>namespace bindings # Let's see the situation regarding namespace bindings @@ -125,10 +108,7 @@ def test_binding_replace(): # non-clashing prefix of `foaf1` is rebound to FOAF1 in # place of the existing `oldfoaf` prefix - assert list(g.namespaces()) == [ - ("foaf", foaf2_uri), - ('foaf1', foaf1_uri) - ] + assert list(g.namespaces()) == [("foaf", foaf2_uri), ('foaf1', foaf1_uri)] # Yes, namespace bindings in parsed data replace existing # bindings (i.e. `oldfoaf` was replaced by `foaf1`), just @@ -253,7 +233,9 @@ def test_parse_rebinds_prefix(): assert ('friend-of-a-friend', foaf1_uri) in list(g.namespaces()) -@pytest.mark.xfail(reason="Automatic handling of unknown predicates not automatically registered with namespace manager") +@pytest.mark.xfail( + reason="Automatic handling of unknown predicates not automatically registered with namespace manager" +) def test_automatic_handling_of_unknown_predicates(): # AUTOMATIC HANDLING OF UNKNOWN PREDICATES @@ -297,30 +279,20 @@ def test_multigraph_bindings(): g1 = Graph(store, identifier=context1, bind_namespaces="none") g1.bind('foaf', FOAF1) - assert list(g1.namespaces()) == [ - ('foaf', foaf1_uri) - ] - assert list(store.namespaces()) == [ - ('foaf', foaf1_uri) - ] + assert list(g1.namespaces()) == [('foaf', foaf1_uri)] + assert list(store.namespaces()) == [('foaf', foaf1_uri)] g1.add((tarek, FOAF1.name, Literal("tarek"))) - assert list(store.namespaces()) == [ - ('foaf', foaf1_uri) - ] + assert list(store.namespaces()) == [('foaf', foaf1_uri)] g2 = Graph(store, identifier=context2, bind_namespaces="none") g2.parse(data=data, format="n3") # The parser-caused rebind is in the underlying store and all objects # that use the store see the changed binding: - assert list(store.namespaces()) == [ - ('friend-of-a-friend', foaf1_uri) - ] - assert list(g1.namespaces()) == [ - ('friend-of-a-friend', foaf1_uri) - ] + assert list(store.namespaces()) == [('friend-of-a-friend', foaf1_uri)] + assert list(g1.namespaces()) == [('friend-of-a-friend', foaf1_uri)] # Including newly-created objects that use the store cg = ConjunctiveGraph(store=store) @@ -336,11 +308,15 @@ def test_multigraph_bindings(): cg.store.add_graph(g1) cg.store.add_graph(g2) - assert "@prefix friend-of-a-friend: " in cg.serialize(format="n3") + assert "@prefix friend-of-a-friend: " in cg.serialize( + format="n3" + ) # In the notation3 format, the statement asserting tarek's name # now references the changed prefix: - assert ' friend-of-a-friend:name "tarek" .' in cg.serialize(format="n3") + assert ' friend-of-a-friend:name "tarek" .' in cg.serialize( + format="n3" + ) # Add foaf2 binding if not already bound cg.bind("foaf2", FOAF2, override=False) @@ -354,81 +330,59 @@ def test_multigraph_bindings(): def test_new_namespace_new_prefix(): g = Graph(bind_namespaces="none") g.bind("foaf", FOAF1) - assert list(g.namespaces()) == [ - ('foaf', foaf1_uri) - ] + assert list(g.namespaces()) == [('foaf', foaf1_uri)] def test_change_prefix_override_true(): g = Graph(bind_namespaces="none") g.bind("foaf", FOAF1) - assert list(g.namespaces()) == [ - ('foaf', foaf1_uri) - ] + assert list(g.namespaces()) == [('foaf', foaf1_uri)] g.bind("oldfoaf", FOAF1) # Changed - assert list(g.namespaces()) == [ - ("oldfoaf", foaf1_uri) - ] + assert list(g.namespaces()) == [("oldfoaf", foaf1_uri)] def test_change_prefix_override_false(): g = Graph(bind_namespaces="none") g.bind("foaf", FOAF1) - assert list(g.namespaces()) == [ - ('foaf', foaf1_uri) - ] + assert list(g.namespaces()) == [('foaf', foaf1_uri)] g.bind("oldfoaf", FOAF1, override=False) # No change - assert list(g.namespaces()) == [ - ("foaf", foaf1_uri) - ] + assert list(g.namespaces()) == [("foaf", foaf1_uri)] def test_change_namespace_override_true(): g = Graph(bind_namespaces="none") g.bind("foaf", FOAF1) - assert list(g.namespaces()) == [ - ('foaf', foaf1_uri) - ] + assert list(g.namespaces()) == [('foaf', foaf1_uri)] g.bind("foaf", FOAF2) # Different prefix used - assert list(g.namespaces()) == [ - ("foaf", foaf1_uri), - ("foaf1", foaf2_uri) - ] + assert list(g.namespaces()) == [("foaf", foaf1_uri), ("foaf1", foaf2_uri)] def test_change_namespace_override_false(): g = Graph(bind_namespaces="none") g.bind("foaf", FOAF1) - assert list(g.namespaces()) == [ - ('foaf', foaf1_uri) - ] + assert list(g.namespaces()) == [('foaf', foaf1_uri)] # Different namespace so override is irrelevant in this case g.bind("foaf", FOAF2, override=False) # Different prefix used - assert list(g.namespaces()) == [ - ("foaf", foaf1_uri), - ("foaf1", foaf2_uri) - ] + assert list(g.namespaces()) == [("foaf", foaf1_uri), ("foaf1", foaf2_uri)] def test_new_namespace_override_false(): g = Graph(bind_namespaces="none") g.bind("foaf", FOAF2) - assert list(g.namespaces()) == [ - ('foaf', foaf2_uri) - ] + assert list(g.namespaces()) == [('foaf', foaf2_uri)] # Namespace not already bound so override is irrelevant in this case g.bind("owl", OWL, override=False) @@ -438,6 +392,7 @@ def test_new_namespace_override_false(): ('owl', URIRef('http://www.w3.org/2002/07/owl#')), ] + # @pytest.mark.xfail() def test_change_namespace_and_prefix(): @@ -448,21 +403,14 @@ def test_change_namespace_and_prefix(): g = Graph(bind_namespaces="none") g.bind("foaf", FOAF1) - assert list(g.namespaces()) == [ - ('foaf', foaf1_uri) - ] + assert list(g.namespaces()) == [('foaf', foaf1_uri)] g.bind("foaf", FOAF2, replace=True) - assert list(g.namespaces()) == [ - ("foaf", foaf2_uri) - ] + assert list(g.namespaces()) == [("foaf", foaf2_uri)] g.bind("foaf1", FOAF1) - assert list(g.namespaces()) == [ - ("foaf", foaf2_uri), - ("foaf1", foaf1_uri) - ] + assert list(g.namespaces()) == [("foaf", foaf2_uri), ("foaf1", foaf1_uri)] foaf3_uri = URIRef("http://xmlns.com/foaf/3.0/") FOAF3 = Namespace("http://xmlns.com/foaf/3.0/") From 1f132264005f2f2b9168aa3db27d991d3ce6095c Mon Sep 17 00:00:00 2001 From: Iwan Aucamp Date: Mon, 18 Apr 2022 00:16:11 +0200 Subject: [PATCH 6/6] Fixes, improvements and test for namespace (re)-binding on stores. - Added store specific tests for namespace binding and rebinding. - Copied @gjhiggins fix for `rdflib.plugins.stores.memory.Memory.bind` to `rdflib.plugins.stores.memory.SimpleMemory.bind`. - Changed `bind` on `Memory`, `SimpleMemory`, `BerkleyDB` stores to explicitly check for `is not None`/`is None` instead of falsey/truthy. - Added `pytest.util._coalesce` to do null coalescing, for more info see deferred PEP-505 https://peps.python.org/pep-0505/. --- rdflib/graph.py | 7 + rdflib/plugins/stores/memory.py | 45 +++-- rdflib/util.py | 26 ++- test/test_graph/test_namespace_rebinding.py | 9 +- test/test_store/test_namespace_binding.py | 210 ++++++++++++++++++++ test/test_util.py | 18 ++ test/testutils.py | 31 +++ 7 files changed, 330 insertions(+), 16 deletions(-) create mode 100644 test/test_store/test_namespace_binding.py diff --git a/rdflib/graph.py b/rdflib/graph.py index 32c135838..3633b2084 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -1018,6 +1018,13 @@ def bind(self, prefix, namespace, override=True, replace=False) -> None: for example: graph.bind("foaf", "http://xmlns.com/foaf/0.1/") """ + # TODO FIXME: This method's behaviour should be simplified and made + # more robust. If the method cannot do what it is asked it should raise + # an exception, it is also unclear why this method has all the + # different modes. It seems to just make it more complex to use, maybe + # it should be clarified when someone will need to use override=False + # and replace=False. And also why silent failure here is preferred over + # raising an excpetion. return self.namespace_manager.bind( prefix, namespace, override=override, replace=replace ) diff --git a/rdflib/plugins/stores/memory.py b/rdflib/plugins/stores/memory.py index aa422ed89..99dbf39c1 100644 --- a/rdflib/plugins/stores/memory.py +++ b/rdflib/plugins/stores/memory.py @@ -1,6 +1,7 @@ # # from rdflib.store import Store +from rdflib.util import _coalesce __all__ = ["SimpleMemory", "Memory"] @@ -149,11 +150,26 @@ def __len__(self, context=None): return i def bind(self, prefix, namespace, override=True): - bound_prefix = self.__prefix.get(namespace) - if override and bound_prefix: - del self.__namespace[bound_prefix] - self.__prefix[namespace] = prefix - self.__namespace[prefix] = namespace + # should be identical to `Memory.bind` + bound_namespace = self.__namespace.get(prefix) + bound_prefix = _coalesce( + self.__prefix.get(namespace), + self.__prefix.get(bound_namespace), + ) + if override: + if bound_prefix is not None: + del self.__namespace[bound_prefix] + if bound_namespace is not None: + del self.__prefix[bound_namespace] + self.__prefix[namespace] = prefix + self.__namespace[prefix] = namespace + else: + self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( + bound_prefix, prefix + ) + self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( + bound_namespace, namespace + ) def namespace(self, prefix): return self.__namespace.get(prefix, None) @@ -403,21 +419,26 @@ def triples(self, triple_pattern, context=None): yield triple, self.__contexts(triple) def bind(self, prefix, namespace, override=True): + # should be identical to `SimpleMemory.bind` bound_namespace = self.__namespace.get(prefix) - bound_prefix = self.__prefix.get(namespace) or self.__prefix.get( - bound_namespace + bound_prefix = _coalesce( + self.__prefix.get(namespace), + self.__prefix.get(bound_namespace), ) if override: - if bound_prefix: + if bound_prefix is not None: del self.__namespace[bound_prefix] - if bound_namespace: + if bound_namespace is not None: del self.__prefix[bound_namespace] - self.__prefix[namespace] = prefix self.__namespace[prefix] = namespace else: - self.__prefix[bound_namespace or namespace] = bound_prefix or prefix - self.__namespace[bound_prefix or prefix] = bound_namespace or namespace + self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( + bound_prefix, prefix + ) + self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( + bound_namespace, namespace + ) def namespace(self, prefix): return self.__namespace.get(prefix, None) diff --git a/rdflib/util.py b/rdflib/util.py index 1df63f06c..f05b2b736 100644 --- a/rdflib/util.py +++ b/rdflib/util.py @@ -25,7 +25,7 @@ # from time import daylight from time import altzone, gmtime, localtime, time, timezone -from typing import Optional +from typing import Optional, TypeVar import rdflib.graph # avoid circular dependency from rdflib.compat import sign @@ -44,6 +44,7 @@ "guess_format", "find_roots", "get_tree", + "_coalesce", ] @@ -430,6 +431,29 @@ def get_tree( return (mapper(root), sorted(tree, key=sortkey)) +_AnyT = TypeVar("_AnyT") + + +def _coalesce(*args: Optional[_AnyT]) -> Optional[_AnyT]: + """ + This is a null coalescing function, it will return the first non-`None` + argument passed to it, otherwise it will return `None`. + + For more info regarding the rationale of this function see deferred `PEP + 505 `_. + + :param args: Values to consider as candidates to return, the first arg that + is not `None` will be returned. If no argument is passed this function + will return None. + :return: The first ``arg`` that is not `None`, otherwise `None` if there + are no args or if all args are `None`. + """ + for arg in args: + if arg is not None: + return arg + return None + + def test(): import doctest diff --git a/test/test_graph/test_namespace_rebinding.py b/test/test_graph/test_namespace_rebinding.py index cd3eb414e..f76216e5a 100644 --- a/test/test_graph/test_namespace_rebinding.py +++ b/test/test_graph/test_namespace_rebinding.py @@ -13,7 +13,7 @@ FOAF1 = Namespace(foaf1_uri) FOAF2 = Namespace(foaf2_uri) -# @pytest.mark.xfail(reason="'foaf=>FOAF2' binding improperly removed from namespaces") + def test_binding_replace(): # The confusion here is in the two arguments “override” and “replace” and @@ -234,7 +234,11 @@ def test_parse_rebinds_prefix(): @pytest.mark.xfail( - reason="Automatic handling of unknown predicates not automatically registered with namespace manager" + reason=""" + Automatic handling of unknown predicates not automatically registered with namespace manager + + NOTE: This is not a bug, but more of a feature request. + """ ) def test_automatic_handling_of_unknown_predicates(): @@ -393,7 +397,6 @@ def test_new_namespace_override_false(): ] -# @pytest.mark.xfail() def test_change_namespace_and_prefix(): # A more extensive illustration of attempted rebinds of diff --git a/test/test_store/test_namespace_binding.py b/test/test_store/test_namespace_binding.py new file mode 100644 index 000000000..0856a7eda --- /dev/null +++ b/test/test_store/test_namespace_binding.py @@ -0,0 +1,210 @@ +import enum +import itertools +import logging +from dataclasses import dataclass, field +from pathlib import Path +from test.testutils import pytest_mark_filter +from typing import Any, Callable, Dict, Set, Union + +import pytest + +from rdflib import Graph +from rdflib.namespace import Namespace +from rdflib.plugins.stores.berkeleydb import has_bsddb +from rdflib.store import Store +from rdflib.term import IdentifiedNode, URIRef + + +class StoreTrait(enum.Enum): + WRAPPER = enum.auto() + DISK_BACKED = enum.auto() + + +@dataclass +class StoreInfo: + name: str + traits: Set[StoreTrait] = field(default_factory=set) + + def make_graph( + self, tmp_path: Path, identifier: Union[IdentifiedNode, str, None] = None + ) -> Graph: + graph = Graph(store=self.name, bind_namespaces="none", identifier=identifier) + if StoreTrait.DISK_BACKED in self.traits: + use_path = tmp_path / f"{self.name}.storedata" + use_path.mkdir(exist_ok=True, parents=True) + logging.debug("use_path = %s", use_path) + graph.open(f"{use_path}", create=True) + return graph + + +def make_store_info_dict(*result_format: StoreInfo) -> Dict[str, StoreInfo]: + result = {} + for item in result_format: + result[item.name] = item + return result + + +store_info_dict = make_store_info_dict( + StoreInfo("Memory"), + StoreInfo("SimpleMemory"), + StoreInfo("SPARQLStore"), + StoreInfo("SPARQLUpdateStore"), + *((StoreInfo("BerkeleyDB", {StoreTrait.DISK_BACKED}),) if has_bsddb else ()), +) + + +@pytest.fixture( + scope="module", + params=store_info_dict.keys(), +) +def store_name(request) -> str: + assert isinstance(request.param, str) + return request.param + + +GraphMutator = Callable[[Graph], Any] + +EGNSSUB = Namespace("http://example.com/sub/") +EGNSSUB_V0 = EGNSSUB["v0"] +EGNSSUB_V1 = EGNSSUB["v1"] +EGNSSUB_V2 = EGNSSUB["v2"] + +NamespaceBindings = Dict[str, URIRef] + + +def make_graph(tmp_path: Path, store_name: str) -> Graph: + logging.info("store_info_dict.keys() = %s", store_info_dict.keys()) + store_info = store_info_dict[store_name] + return store_info.make_graph(tmp_path) + + +def check_ns(graph: Graph, expected_bindings: NamespaceBindings) -> None: + actual_graph_bindings = list(graph.namespaces()) + logging.info("actual_bindings = %s", actual_graph_bindings) + assert list(expected_bindings.items()) == actual_graph_bindings + store: Store = graph.store + actual_store_bindings = list(store.namespaces()) + assert list(expected_bindings.items()) == actual_store_bindings + for prefix, uri in expected_bindings.items(): + assert store.prefix(uri) == prefix + assert store.namespace(prefix) == uri + + +@pytest.mark.parametrize( + ["override", "replace"], itertools.product([True, False], [True, False]) +) +def test_simple_bind( + tmp_path: Path, store_name: str, override: bool, replace: bool +) -> None: + """ + URI binds to a prefix regardless of override and replace values. + """ + graph = make_graph(tmp_path, store_name) + graph.bind("egsub", EGNSSUB_V0, override=override, replace=replace) + check_ns(graph, {"egsub": EGNSSUB_V0}) + + +@pytest.mark.parametrize( + ["override", "replace"], itertools.product([True, False], [True, False]) +) +def test_bind_two_bind( + tmp_path: Path, store_name: str, override: bool, replace: bool +) -> None: + """ + A second prefix with a different URI binds correctly regardless of override + and replace values. + """ + graph = make_graph(tmp_path, store_name) + graph.bind("egsub", EGNSSUB_V0) + graph.bind("egsubv1", EGNSSUB_V1, override=override, replace=replace) + check_ns(graph, {"egsub": EGNSSUB_V0, "egsubv1": EGNSSUB_V1}) + + +@pytest.mark.parametrize("replace", [True, False]) +def test_rebind_uri_override(tmp_path: Path, store_name: str, replace: bool) -> None: + """ + URI binds to new prefix if override is `True` regardless of the value of `replace`. + """ + graph = make_graph(tmp_path, store_name) + graph.bind("egsub", EGNSSUB_V0) + graph.bind("egsubv0", EGNSSUB_V0, override=True, replace=replace) + check_ns(graph, {"egsubv0": EGNSSUB_V0}) + + +@pytest.mark.parametrize("replace", [True, False]) +def test_rebind_uri_no_override(tmp_path: Path, store_name: str, replace: bool) -> None: + """ + URI does not bind to new prefix if override is `False` regardless of the value of `replace`. + """ + graph = make_graph(tmp_path, store_name) + graph.bind("egsub", EGNSSUB_V0) + graph.bind("egsubv0", EGNSSUB_V0, override=False, replace=replace) + check_ns(graph, {"egsub": EGNSSUB_V0}) + + +@pytest.mark.parametrize( + ["store_name", "override"], + pytest_mark_filter( + itertools.product(store_info_dict.keys(), [True, False]), + { + ("SPARQLStore", False): ( + pytest.mark.xfail( + raises=AssertionError, + reason=""" + SPARQLStore's namespace bindings work on a fundementally different way than + the other stores, which is both simpler, but requires some additional work + to make it behave like the other stores. + """, + ), + ), + ("SPARQLUpdateStore", False): ( + pytest.mark.xfail( + raises=AssertionError, + reason=""" + SPARQLStore's namespace bindings work on a fundementally different way than + the other stores, which is both simpler, but requires some additional work + to make it behave like the other stores. + """, + ), + ), + }, + ), +) +def test_rebind_prefix_replace(tmp_path: Path, store_name: str, override: bool) -> None: + """ + If `replace` is `True`, + Prefix binds to a new URI and `override` is `True`, + but does not bind to a new URI if `override` is `False`. + + NOTE re logic, this is mainly just taking what most stores does and saying + that is the right thing, not sure it really makes sense. This method is + quite complicated and it is unlcear which of replace and override should + take precedence in this case, once we sorted it out we should clarify in + the documentation. + """ + graph = make_graph(tmp_path, store_name) + graph.bind("egsub", EGNSSUB_V0) + if override: + graph.bind("egsub", EGNSSUB_V1, override=override, replace=True) + check_ns(graph, {"egsub": EGNSSUB_V1}) + else: + graph.bind("egsub", EGNSSUB_V1, override=override, replace=True) + check_ns(graph, {"egsub": EGNSSUB_V0}) + + +@pytest.mark.parametrize( + ["reuse_override", "reuse_replace"], itertools.product([True, False], [True, False]) +) +def test_rebind_prefix_reuse_uri_replace( + tmp_path: Path, store_name: str, reuse_override: bool, reuse_replace: bool +) -> None: + """ + Prefix binds to a new URI and the old URI can be reused with a new prefix + regardless of the value of override or replace used when reusing the old + URI. + """ + graph = make_graph(tmp_path, store_name) + graph.bind("egsub", EGNSSUB_V0) + graph.bind("egsub", EGNSSUB_V1, override=True, replace=True) + graph.bind("egsubv0", EGNSSUB_V0, override=reuse_override, replace=reuse_replace) + check_ns(graph, {"egsub": EGNSSUB_V1, "egsubv0": EGNSSUB_V0}) diff --git a/test/test_util.py b/test/test_util.py index 3782ffefb..40ebbb674 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import time +from typing import Any, Collection, Tuple from unittest.case import expectedFailure import pytest @@ -8,6 +9,7 @@ from rdflib import XSD, util from rdflib.graph import ConjunctiveGraph, Graph, QuotedGraph from rdflib.term import BNode, Literal, URIRef +from rdflib.util import _coalesce n3source = """\ @prefix : . @@ -339,3 +341,19 @@ def test_util_from_n3_not_escapes(self, string: str) -> None: def test_util_from_n3_not_escapes_xf(self, string: str) -> None: literal_str = str(util.from_n3(f'"{string}"')) assert literal_str == f"{string}" + + +@pytest.mark.parametrize( + ["params", "expected_result"], + [ + ([], None), + (["something"], "something"), + ([False, "something"], False), + (["", "something"], ""), + ([0, "something"], 0), + ([None, "something", 1], "something"), + (["something", None, 1], "something"), + ], +) +def test__coalesce(params: Collection[Any], expected_result: Any) -> None: + assert expected_result == _coalesce(*params) diff --git a/test/testutils.py b/test/testutils.py index c192b5c45..812ca4b74 100644 --- a/test/testutils.py +++ b/test/testutils.py @@ -16,7 +16,9 @@ TYPE_CHECKING, Any, Callable, + Collection, Dict, + Generator, Iterable, Iterator, List, @@ -35,6 +37,8 @@ from urllib.request import urlopen import isodate +import pytest +from _pytest.mark.structures import Mark, MarkDecorator, ParameterSet from nturl2path import url2pathname as nt_url2pathname import rdflib.compare @@ -580,3 +584,30 @@ def file_uri_to_path( pathname = url2pathname(file_uri_parsed.path) result = path_class(pathname) return result + + +ParamsT = TypeVar("ParamsT", bound=tuple) +Marks = Collection[Union[Mark, MarkDecorator]] + + +def pytest_mark_filter( + param_sets: Iterable[Union[ParamsT, ParameterSet]], mark_dict: Dict[ParamsT, Marks] +) -> Generator[ParameterSet, None, None]: + """ + Adds marks to test parameters. Useful for adding xfails to test parameters. + """ + for param_set in param_sets: + if isinstance(param_set, ParameterSet): + # param_set.marks = [*param_set.marks, *marks.get(param_set.values, ())] + yield pytest.param( + *param_set.values, + id=param_set.id, + marks=[ + *param_set.marks, + *mark_dict.get(cast(ParamsT, param_set.values), cast(Marks, ())), + ], + ) + else: + yield pytest.param( + *param_set, marks=mark_dict.get(param_set, cast(Marks, ())) + )