diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index f5becb937..f737b9b00 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -1,7 +1,7 @@
---
# https://docs.readthedocs.io/en/stable/config-file/v2.html
version: 2
-# NOTE: not builing epub because epub does not know how to handle .ico files
+# NOTE: not building epub because epub does not know how to handle .ico files
# which results in a warning which causes the build to fail due to
# `sphinx.fail_on_warning`
# https://github.com/sphinx-doc/sphinx/issues/10350
diff --git a/examples/datasets.py b/examples/datasets.py
index d550775a1..7dfc4e48b 100644
--- a/examples/datasets.py
+++ b/examples/datasets.py
@@ -1,13 +1,23 @@
"""
-An RDFLib Dataset is a slight extension to ConjunctiveGraph: it uses simpler terminology
-and has a few additional convenience methods, for example add() can be used to
-add quads directly to a specific Graph within the Dataset.
+This file contains a number of common tasks using the RDFLib Dataset class.
-This example file shows how to declare a Dataset, add content to it, serialise it, query it
-and remove things from it.
+An RDFLib Dataset is an object that stores multiple Named Graphs - instances of RDFLib
+Graph identified by IRI - within it and allows whole-of-dataset or single Graph use.
+
+Dataset extends Graph's Subject, Predicate, Object structure to include Graph -
+archaically called Context - producing quads of s, p, o, g.
+
+There is an older implementation of a Dataset-like class in RDFLib < 7.x called
+ConjunctiveGraph that is now deprecated.
+
+Sections in this file:
+
+1. Creating & Adding
+2. Looping & Counting
+3. Manipulating Graphs
"""
-from rdflib import Dataset, Literal, Namespace, URIRef
+from rdflib import Dataset, Graph, Literal, URIRef
# Note regarding `mypy: ignore_errors=true`:
#
@@ -19,41 +29,48 @@
# mypy: ignore_errors=true
-#
-# Create & Add
-#
+#######################################################################################
+# 1. Creating & Adding
+#######################################################################################
# Create an empty Dataset
d = Dataset()
+
# Add a namespace prefix to it, just like for Graph
-d.bind("ex", Namespace("http://example.com/"))
+d.bind("ex", "http://example.com/")
-# Declare a Graph URI to be used to identify a Graph
-graph_1 = URIRef("http://example.com/graph-1")
+# Declare a Graph identifier to be used to identify a Graph
+# A string or a URIRef may be used, but safer to always use a URIRef for usage consistency
+graph_1_id = URIRef("http://example.com/graph-1")
-# Add an empty Graph, identified by graph_1, to the Dataset
-d.graph(identifier=graph_1)
+# Add an empty Graph, identified by graph_1_id, to the Dataset
+d.graph(identifier=graph_1_id)
-# Add two quads to Graph graph_1 in the Dataset
+# Add two quads to the Dataset which are triples + graph ID
+# These insert the triple into the GRaph specified by the ID
d.add(
(
URIRef("http://example.com/subject-x"),
URIRef("http://example.com/predicate-x"),
Literal("Triple X"),
- graph_1,
+ graph_1_id,
)
)
+
d.add(
(
URIRef("http://example.com/subject-z"),
URIRef("http://example.com/predicate-z"),
Literal("Triple Z"),
- graph_1,
+ graph_1_id,
)
)
-# Add another quad to the Dataset to a non-existent Graph:
-# the Graph is created automatically
+# We now have 2 distinct quads in the Dataset to the Dataset has a length of 2
+assert len(d) == 2
+
+# Add another quad to the Dataset specifying a non-existent Graph.
+# The Graph is created automatically
d.add(
(
URIRef("http://example.com/subject-y"),
@@ -63,8 +80,15 @@
)
)
-# printing the Dataset like this: print(d.serialize(format="trig"))
-# produces a result like this:
+assert len(d) == 3
+
+
+# You can print the Dataset like you do a Graph but you must specify a quads format like
+# 'trig' or 'trix', not 'turtle', unless the default_union parameter is set to True, and
+# then you can print the entire Dataset in triples.
+# print(d.serialize(format="trig").strip())
+
+# you should see something like this:
"""
@prefix ex: .
@@ -78,85 +102,278 @@
ex:subject-y ex:predicate-y "Triple Y" .
}
"""
-print("Printing Serialised Dataset:")
-print("---")
-print(d.serialize(format="trig"))
-print("---")
-print()
-print()
-#
-# Use & Query
-#
-# print the length of the Dataset, i.e. the count of all triples in all Graphs
-# we should get
+# Print out one graph in the Dataset, using a standard Graph serialization format - longturtle
+print(d.get_graph(URIRef("http://example.com/graph-2")).serialize(format="longturtle"))
+
+# you should see something like this:
"""
-3
+PREFIX ex:
+
+ex:subject-y
+ ex:predicate-y "Triple Y" ;
+.
"""
-print("Printing Dataset Length:")
-print("---")
-print(len(d))
-print("---")
-print()
-print()
-# Query one graph in the Dataset for all its triples
-# we should get
+
+#######################################################################################
+# 2. Looping & Counting
+#######################################################################################
+
+# Loop through all quads in the dataset
+for s, p, o, g in d.quads((None, None, None, None)): # type: ignore[arg-type]
+ print(f"{s}, {p}, {o}, {g}")
+
+# you should see something like this:
"""
-(rdflib.term.URIRef('http://example.com/subject-z'), rdflib.term.URIRef('http://example.com/predicate-z'), rdflib.term.Literal('Triple Z'))
-(rdflib.term.URIRef('http://example.com/subject-x'), rdflib.term.URIRef('http://example.com/predicate-x'), rdflib.term.Literal('Triple X'))
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1
+http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y, http://example.com/graph-2
"""
-print("Printing all triple from one Graph in the Dataset:")
-print("---")
-for triple in d.triples((None, None, None, graph_1)): # type: ignore[arg-type]
- print(triple)
-print("---")
-print()
-print()
-# Query the union of all graphs in the dataset for all triples
-# we should get nothing:
+# Loop through all the quads in one Graph - just constrain the Graph field
+for s, p, o, g in d.quads((None, None, None, graph_1_id)): # type: ignore[arg-type]
+ print(f"{s}, {p}, {o}, {g}")
+
+# you should see something like this:
"""
+http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1
"""
-# A Dataset's default union graph does not exist by default (default_union property is False)
-print("Attempt #1 to print all triples in the Dataset:")
-print("---")
-for triple in d.triples((None, None, None, None)):
- print(triple)
-print("---")
-print()
-print()
-# Set the Dataset's default_union property to True and re-query
+# Looping through triples in one Graph still works too
+for s, p, o in d.triples((None, None, None, graph_1_id)): # type: ignore[arg-type]
+ print(f"{s}, {p}, {o}")
+
+# you should see something like this:
+"""
+http://example.com/subject-x, http://example.com/predicate-x, Triple X
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z
+"""
+
+# Looping through triples across the whole Dataset will produce nothing
+# unless we set the default_union parameter to True, since each triple is in a Named Graph
+
+# Setting the default_union parameter to True essentially presents all triples in all
+# Graphs as a single Graph
d.default_union = True
-print("Attempt #2 to print all triples in the Dataset:")
-print("---")
-for triple in d.triples((None, None, None, None)):
- print(triple)
-print("---")
-print()
-print()
+for s, p, o in d.triples((None, None, None)):
+ print(f"{s}, {p}, {o}")
+# you should see something like this:
+"""
+http://example.com/subject-x, http://example.com/predicate-x, Triple X
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y
+"""
-#
-# Remove
-#
+# You can still loop through all quads now with the default_union parameter to True
+for s, p, o, g in d.quads((None, None, None)):
+ print(f"{s}, {p}, {o}, {g}")
+
+# you should see something like this:
+"""
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1
+http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y, http://example.com/graph-2
+"""
+
+# Adding a triple in graph-1 to graph-2 increases the number of distinct of quads in
+# the Dataset
+d.add(
+ (
+ URIRef("http://example.com/subject-z"),
+ URIRef("http://example.com/predicate-z"),
+ Literal("Triple Z"),
+ URIRef("http://example.com/graph-2"),
+ )
+)
+
+for s, p, o, g in d.quads((None, None, None, None)):
+ print(f"{s}, {p}, {o}, {g}")
+
+# you should see something like this, with the 'Z' triple in graph-1 and graph-2:
+"""
+http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y, http://example.com/graph-2
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-2
+"""
+
+# but the 'length' of the Dataset is still only 3 as only distinct triples are counted
+assert len(d) == 3
+
+
+# Looping through triples sees the 'Z' triple only once
+for s, p, o in d.triples((None, None, None)):
+ print(f"{s}, {p}, {o}")
+
+# you should see something like this:
+"""
+http://example.com/subject-x, http://example.com/predicate-x, Triple X
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y
+"""
+
+#######################################################################################
+# 3. Manipulating Graphs
+#######################################################################################
+
+# List all the Graphs in the Dataset
+for x in d.graphs():
+ print(x)
+
+# this returns the graphs, something like:
+"""
+ a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory'].
+ a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory'].
+ a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory'].
+"""
+
+# So try this
+for x in d.graphs():
+ print(x.identifier)
+
+# you should see something like this, noting the default, currently empty, graph:
+"""
+urn:x-rdflib:default
+http://example.com/graph-2
+http://example.com/graph-1
+"""
-# Remove Graph graph_1 from the Dataset
-d.remove_graph(graph_1)
+# To add to the default Graph, just add a triple, not a quad, to the Dataset directly
+d.add(
+ (
+ URIRef("http://example.com/subject-n"),
+ URIRef("http://example.com/predicate-n"),
+ Literal("Triple N"),
+ )
+)
+for s, p, o, g in d.quads((None, None, None, None)):
+ print(f"{s}, {p}, {o}, {g}")
+
+# you should see something like this, noting the triple in the default Graph:
+"""
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-2
+http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y, http://example.com/graph-2
+http://example.com/subject-n, http://example.com/predicate-n, Triple N, urn:x-rdflib:default
+"""
+
+# Loop through triples per graph
+for x in d.graphs():
+ print(x.identifier)
+ for s, p, o in x.triples((None, None, None)):
+ print(f"\t{s}, {p}, {o}")
-# printing the Dataset like this: print(d.serialize(format="trig"))
-# now produces a result like this:
+# you should see something like this:
+"""
+urn:x-rdflib:default
+ http://example.com/subject-n, http://example.com/predicate-n, Triple N
+http://example.com/graph-1
+ http://example.com/subject-x, http://example.com/predicate-x, Triple X
+ http://example.com/subject-z, http://example.com/predicate-z, Triple Z
+http://example.com/graph-2
+ http://example.com/subject-y, http://example.com/predicate-y, Triple Y
+ http://example.com/subject-z, http://example.com/predicate-z, Triple Z
+"""
+# The default_union parameter includes all triples in the Named Graphs and the Default Graph
+for s, p, o in d.triples((None, None, None)):
+ print(f"{s}, {p}, {o}")
+
+# you should see something like this:
+"""
+http://example.com/subject-x, http://example.com/predicate-x, Triple X
+http://example.com/subject-n, http://example.com/predicate-n, Triple N
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y
"""
+
+# To remove a graph
+d.remove_graph(graph_1_id)
+
+# To remove the default graph
+d.remove_graph(URIRef("urn:x-rdflib:default"))
+
+# print what's left - one graph, graph-2
+print(d.serialize(format="trig"))
+
+# you should see something like this:
+"""
+@prefix ex: .
+
ex:graph-2 {
ex:subject-y ex:predicate-y "Triple Y" .
+
+ ex:subject-z ex:predicate-z "Triple Z" .
+}
+"""
+
+# To add a Graph that already exists, you must give it an Identifier or else it will be assigned a Blank Node ID
+g_with_id = Graph(identifier=URIRef("http://example.com/graph-3"))
+g_with_id.bind("ex", "http://example.com/")
+
+# Add a distinct triple to the exiting Graph, using Namepspace IRI shortcuts
+# g_with_id.bind("ex", "http://example.com/")
+g_with_id.add(
+ (
+ URIRef("http://example.com/subject-k"),
+ URIRef("http://example.com/predicate-k"),
+ Literal("Triple K"),
+ )
+)
+d.add_graph(g_with_id)
+print(d.serialize(format="trig"))
+
+# you should see something like this:
+"""
+@prefix ex: .
+
+ex:graph-3 {
+ ex:subject_k ex:predicate_k "Triple K" .
+}
+
+ex:graph-2 {
+ ex:subject-y ex:predicate-y "Triple Y" .
+
+ ex:subject-z ex:predicate-z "Triple Z" .
+}
+"""
+
+# If you add a Graph with no specified identifier...
+g_no_id = Graph()
+g_no_id.bind("ex", "http://example.com/")
+
+g_no_id.add(
+ (
+ URIRef("http://example.com/subject-l"),
+ URIRef("http://example.com/predicate-l"),
+ Literal("Triple L"),
+ )
+)
+d.add_graph(g_no_id)
+
+# now when we print it, we will see a Graph with a Blank Node id:
+print(d.serialize(format="trig"))
+
+# you should see somthing like this, but with a different Blank Node ID , as this is rebuilt each code execution
+"""
+@prefix ex: .
+
+ex:graph-3 {
+ ex:subject-k ex:predicate-k "Triple K" .
+}
+
+ex:graph-2 {
+ ex:subject-y ex:predicate-y "Triple Y" .
+
+ ex:subject-z ex:predicate-z "Triple Z" .
+}
+
+_:N9cc8b54c91724e31896da5ce41e0c937 {
+ ex:subject-l ex:predicate-l "Triple L" .
}
"""
-print("Printing Serialised Dataset after graph_1 removal:")
-print("---")
-print(d.serialize(format="trig").strip())
-print("---")
-print()
-print()
diff --git a/rdflib/graph.py b/rdflib/graph.py
index fcad4ae70..b43bafba2 100644
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -423,11 +423,50 @@
# Graph is a node because technically a formula-aware graph
# take a Graph as subject or object, but we usually use QuotedGraph for that.
class Graph(Node):
- """An RDF Graph
+ """An RDF Graph: a Python object containing nodes and relations between them as
+ RDF 'triples'.
- The constructor accepts one argument, the "store"
- that will be used to store the graph data (see the "store"
- package for stores currently shipped with rdflib).
+ This is the central RDFLib object class and Graph objects are almost always present
+ it all uses of RDFLib.
+
+ The basic use is to create a Graph and iterate through or query its content, e.g.:
+
+ >>> from rdflib import Graph, URIRef
+ >>> g = Graph()
+
+ >>> g.add((
+ ... URIRef("http://example.com/s1"), # subject
+ ... URIRef("http://example.com/p1"), # predicate
+ ... URIRef("http://example.com/o1"), # object
+ ... )) # doctest: +ELLIPSIS
+ )>
+
+ >>> g.add((
+ ... URIRef("http://example.com/s2"), # subject
+ ... URIRef("http://example.com/p2"), # predicate
+ ... URIRef("http://example.com/o2"), # object
+ ... )) # doctest: +ELLIPSIS
+ )>
+
+ >>> for triple in sorted(g): # simple looping
+ ... print(triple)
+ (rdflib.term.URIRef('http://example.com/s1'), rdflib.term.URIRef('http://example.com/p1'), rdflib.term.URIRef('http://example.com/o1'))
+ (rdflib.term.URIRef('http://example.com/s2'), rdflib.term.URIRef('http://example.com/p2'), rdflib.term.URIRef('http://example.com/o2'))
+
+ >>> # get the object of the triple with subject s1 and predicate p1
+ >>> o = g.value(
+ ... subject=URIRef("http://example.com/s1"),
+ ... predicate=URIRef("http://example.com/p1")
+ ... )
+
+
+ The constructor accepts one argument, the "store" that will be used to store the
+ graph data with the default being the `Memory `
+ (in memory) Store. Other Stores that persist content to disk using various file
+ databases or Stores that use remote servers (SPARQL systems) are supported. See
+ the :doc:`rdflib.plugins.stores` package for Stores currently shipped with RDFLib.
+ Other Stores not shipped with RDFLib can be added, such as
+ `HDT `_.
Stores can be context-aware or unaware. Unaware stores take up
(some) less space but cannot support features that require
@@ -435,14 +474,15 @@ class Graph(Node):
provenance.
Even if used with a context-aware store, Graph will only expose the quads which
- belong to the default graph. To access the rest of the data, `ConjunctiveGraph` or
- `Dataset` classes can be used instead.
+ belong to the default graph. To access the rest of the data the
+ `Dataset` class can be used instead.
The Graph constructor can take an identifier which identifies the Graph
by name. If none is given, the graph is assigned a BNode for its
identifier.
- For more on named graphs, see: http://www.w3.org/2004/03/trix/
+ For more on Named Graphs, see the RDFLib `Dataset` class and the TriG Specification,
+ https://www.w3.org/TR/trig/.
"""
context_aware: bool
@@ -1153,10 +1193,10 @@ def transitiveClosure( # noqa: N802
function against the graph
>>> from rdflib.collection import Collection
- >>> g=Graph()
- >>> a=BNode("foo")
- >>> b=BNode("bar")
- >>> c=BNode("baz")
+ >>> g = Graph()
+ >>> a = BNode("foo")
+ >>> b = BNode("bar")
+ >>> c = BNode("baz")
>>> g.add((a,RDF.first,RDF.type)) # doctest: +ELLIPSIS
)>
>>> g.add((a,RDF.rest,b)) # doctest: +ELLIPSIS
@@ -2370,21 +2410,49 @@ def __reduce__(self) -> tuple[type[Graph], tuple[Store, _ContextIdentifierType]]
class Dataset(ConjunctiveGraph):
"""
- RDF 1.1 Dataset. Small extension to the Conjunctive Graph:
- - the primary term is graphs in the datasets and not contexts with quads,
- so there is a separate method to set/retrieve a graph in a dataset and
- operate with graphs
- - graphs cannot be identified with blank nodes
- - added a method to directly add a single quad
+ An RDFLib Dataset is an object that stores multiple Named Graphs - instances of
+ RDFLib Graph identified by IRI - within it and allows whole-of-dataset or single
+ Graph use.
+
+ RDFLib's Dataset class is based on the `RDF 1.2. 'Dataset' definition
+ `_:
+
+ ..
+
+ An RDF dataset is a collection of RDF graphs, and comprises:
+
+ - Exactly one default graph, being an RDF graph. The default graph does not
+ have a name and MAY be empty.
+ - Zero or more named graphs. Each named graph is a pair consisting of an IRI or
+ a blank node (the graph name), and an RDF graph. Graph names are unique
+ within an RDF dataset.
- Examples of usage:
+ Accordingly, a Dataset allows for `Graph` objects to be added to it with
+ :class:`rdflib.term.URIRef` or :class:`rdflib.term.BNode` identifiers and always
+ creats a default graph with the :class:`rdflib.term.URIRef` identifier
+ :code:`urn:x-rdflib:default`.
+
+ Dataset extends Graph's Subject, Predicate, Object (s, p, o) 'triple'
+ structure to include a graph identifier - archaically called Context - producing
+ 'quads' of s, p, o, g.
+
+ Triples, or quads, can be added to a Dataset. Triples, or quads with the graph
+ identifer :code:`urn:x-rdflib:default` go into the default graph.
+
+ .. note:: Dataset builds on the `ConjunctiveGraph` class but that class's direct
+ use is now deprecated (since RDFLib 7.x) and it should not be used.
+ `ConjunctiveGraph` will be removed from future RDFLib versions.
+
+ Examples of usage and see also the examples/datast.py file:
>>> # Create a new Dataset
>>> ds = Dataset()
>>> # simple triples goes to default graph
- >>> ds.add((URIRef("http://example.org/a"),
- ... URIRef("http://www.example.org/b"),
- ... Literal("foo"))) # doctest: +ELLIPSIS
+ >>> ds.add((
+ ... URIRef("http://example.org/a"),
+ ... URIRef("http://www.example.org/b"),
+ ... Literal("foo")
+ ... )) # doctest: +ELLIPSIS
)>
>>>
>>> # Create a graph in the dataset, if the graph name has already been
@@ -2393,16 +2461,19 @@ class Dataset(ConjunctiveGraph):
>>> g = ds.graph(URIRef("http://www.example.com/gr"))
>>>
>>> # add triples to the new graph as usual
- >>> g.add(
- ... (URIRef("http://example.org/x"),
+ >>> g.add((
+ ... URIRef("http://example.org/x"),
... URIRef("http://example.org/y"),
- ... Literal("bar")) ) # doctest: +ELLIPSIS
+ ... Literal("bar")
+ ... )) # doctest: +ELLIPSIS
)>
>>> # alternatively: add a quad to the dataset -> goes to the graph
- >>> ds.add(
- ... (URIRef("http://example.org/x"),
+ >>> ds.add((
+ ... URIRef("http://example.org/x"),
... URIRef("http://example.org/z"),
- ... Literal("foo-bar"),g) ) # doctest: +ELLIPSIS
+ ... Literal("foo-bar"),
+ ... g
+ ... )) # doctest: +ELLIPSIS
)>
>>>
>>> # querying triples return them all regardless of the graph
@@ -2468,8 +2539,8 @@ class Dataset(ConjunctiveGraph):
>>>
>>> # graph names in the dataset can be queried:
>>> for c in ds.graphs(): # doctest: +SKIP
- ... print(c) # doctest:
- DEFAULT
+ ... print(c.identifier) # doctest:
+ urn:x-rdflib:default
http://www.example.com/gr
>>> # A graph can be created without specifying a name; a skolemized genid
>>> # is created on the fly
@@ -2488,7 +2559,7 @@ class Dataset(ConjunctiveGraph):
>>>
>>> # a graph can also be removed from a dataset via ds.remove_graph(g)
- .. versionadded:: 4.0
+ ... versionadded:: 4.0
"""
def __init__(
diff --git a/rdflib/plugins/stores/auditable.py b/rdflib/plugins/stores/auditable.py
index a5e51087a..253f59530 100644
--- a/rdflib/plugins/stores/auditable.py
+++ b/rdflib/plugins/stores/auditable.py
@@ -10,7 +10,7 @@
Calls to commit or rollback, flush the list of reverse operations This
provides thread-safe atomicity and isolation (assuming concurrent operations
occur with different store instances), but no durability (transactions are
-persisted in memory and wont be available to reverse operations after the
+persisted in memory and won't be available to reverse operations after the
system fails): A and I out of ACID.
"""
diff --git a/test/test_dataset/test_dataset.py b/test/test_dataset/test_dataset.py
index 19b9fe830..9f9bc9c26 100644
--- a/test/test_dataset/test_dataset.py
+++ b/test/test_dataset/test_dataset.py
@@ -5,11 +5,10 @@
import pytest
-from rdflib import URIRef, plugin
+from rdflib import BNode, Namespace, URIRef, plugin
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph
from rdflib.store import Store
from test.data import CONTEXT1, LIKES, PIZZA, TAREK
-from test.utils.namespace import EGSCHEME
# Will also run SPARQLUpdateStore tests against local SPARQL1.1 endpoint if
# available. This assumes SPARQL1.1 query/update endpoints running locally at
@@ -58,9 +57,9 @@ def get_dataset(request):
except ImportError:
pytest.skip("Dependencies for store '%s' not available!" % store)
- graph = Dataset(store=store)
+ d = Dataset(store=store)
- if not graph.store.graph_aware:
+ if not d.store.graph_aware:
return
if store in ["SQLiteLSM", "LevelDB"]:
@@ -75,31 +74,39 @@ def get_dataset(request):
else:
path = tempfile.mkdtemp()
- graph.open(path, create=True if store != "SPARQLUpdateStore" else False)
+ d.open(path, create=True if store != "SPARQLUpdateStore" else False)
if store == "SPARQLUpdateStore":
try:
- graph.store.update("CLEAR ALL")
+ d.graph()
+ d.add(
+ (
+ URIRef("http://example.com/s"),
+ URIRef("http://example.com/p"),
+ URIRef("http://example.com/o"),
+ )
+ )
+ d.store.update("CLEAR ALL")
except Exception as e:
if "SPARQLStore does not support BNodes! " in str(e):
pass
else:
raise Exception(e)
- yield store, graph
+ yield store, d
if store == "SPARQLUpdateStore":
try:
- graph.store.update("CLEAR ALL")
+ d.update("CLEAR ALL")
except Exception as e:
if "SPARQLStore does not support BNodes! " in str(e):
pass
else:
raise Exception(e)
- graph.close()
+ d.close()
else:
- graph.close()
- graph.destroy(path)
+ d.close()
+ d.destroy(path)
if os.path.isdir(path):
shutil.rmtree(path)
else:
@@ -121,7 +128,7 @@ def test_graph_aware(get_dataset):
# empty named graphs
if store != "SPARQLUpdateStore":
# added graph exists
- assert set(x.identifier for x in dataset.contexts()) == set(
+ assert set(x.identifier for x in dataset.graphs()) == set(
[CONTEXT1, DATASET_DEFAULT_GRAPH_ID]
)
@@ -131,7 +138,7 @@ def test_graph_aware(get_dataset):
g1.add((TAREK, LIKES, PIZZA))
# added graph still exists
- assert set(x.identifier for x in dataset.contexts()) == set(
+ assert set(x.identifier for x in dataset.graphs()) == set(
[CONTEXT1, DATASET_DEFAULT_GRAPH_ID]
)
@@ -147,14 +154,14 @@ def test_graph_aware(get_dataset):
# empty named graphs
if store != "SPARQLUpdateStore":
# graph still exists, although empty
- assert set(x.identifier for x in dataset.contexts()) == set(
+ assert set(x.identifier for x in dataset.graphs()) == set(
[CONTEXT1, DATASET_DEFAULT_GRAPH_ID]
)
dataset.remove_graph(CONTEXT1)
# graph is gone
- assert set(x.identifier for x in dataset.contexts()) == set(
+ assert set(x.identifier for x in dataset.graphs()) == set(
[DATASET_DEFAULT_GRAPH_ID]
)
@@ -173,7 +180,7 @@ def test_default_graph(get_dataset):
dataset.add((TAREK, LIKES, PIZZA))
assert len(dataset) == 1
# only default exists
- assert list(dataset.contexts()) == [dataset.default_context]
+ assert list(dataset.graphs()) == [dataset.default_context]
# removing default graph removes triples but not actual graph
dataset.remove_graph(DATASET_DEFAULT_GRAPH_ID)
@@ -181,7 +188,7 @@ def test_default_graph(get_dataset):
assert len(dataset) == 0
# default still exists
- assert set(dataset.contexts()) == set([dataset.default_context])
+ assert set(dataset.graphs()) == set([dataset.default_context])
def test_not_union(get_dataset):
@@ -193,11 +200,11 @@ def test_not_union(get_dataset):
"its default graph as the union of the named graphs"
)
- subgraph1 = dataset.graph(CONTEXT1)
- subgraph1.add((TAREK, LIKES, PIZZA))
+ g1 = dataset.graph(CONTEXT1)
+ g1.add((TAREK, LIKES, PIZZA))
assert list(dataset.objects(TAREK, None)) == []
- assert list(subgraph1.objects(TAREK, None)) == [PIZZA]
+ assert list(g1.objects(TAREK, None)) == [PIZZA]
def test_iter(get_dataset):
@@ -208,16 +215,16 @@ def test_iter(get_dataset):
uri_c = URIRef("https://example.com/c")
uri_d = URIRef("https://example.com/d")
- d.graph(URIRef("https://example.com/subgraph1"))
- d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/subgraph1")))
+ d.graph(URIRef("https://example.com/g1"))
+ d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1")))
d.add(
- (uri_a, uri_b, uri_c, URIRef("https://example.com/subgraph1"))
+ (uri_a, uri_b, uri_c, URIRef("https://example.com/g1"))
) # pointless addition: duplicates above
d.graph(URIRef("https://example.com/g2"))
d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2")))
- d.add((uri_a, uri_b, uri_d, URIRef("https://example.com/subgraph1")))
+ d.add((uri_a, uri_b, uri_d, URIRef("https://example.com/g1")))
# traditional iterator
i_trad = 0
@@ -232,7 +239,7 @@ def test_iter(get_dataset):
assert i_new == i_trad # both should be 3
-def test_subgraph_without_identifier() -> None:
+def test_graph_without_identifier() -> None:
"""
Graphs with no identifies assigned are identified by Skolem IRIs with a
prefix that is bound to `genid`.
@@ -241,9 +248,9 @@ def test_subgraph_without_identifier() -> None:
reviewed at some point.
"""
- dataset = Dataset()
+ d = Dataset()
- nman = dataset.namespace_manager
+ nman = d.namespace_manager
genid_prefix = URIRef("https://rdflib.github.io/.well-known/genid/rdflib/")
@@ -253,15 +260,36 @@ def test_subgraph_without_identifier() -> None:
is None
)
- subgraph: Graph = dataset.graph()
- subgraph.add((EGSCHEME["subject"], EGSCHEME["predicate"], EGSCHEME["object"]))
+ ex = Namespace("http://example.com/")
+ g1: Graph = d.graph()
+ g1.add((ex.subject, ex.predicate, ex.object))
namespaces = set(nman.namespaces())
assert next(
(namespace for namespace in namespaces if namespace[0] == "genid"), None
) == ("genid", genid_prefix)
- assert f"{subgraph.identifier}".startswith(genid_prefix)
+ assert f"{g1.identifier}".startswith(genid_prefix)
+
+ # now add a preexisting graph with no identifier
+ # i.e. not one created within this Dataset object
+ g2 = Graph()
+ g2.add((ex.subject, ex.predicate, ex.object))
+ d.add_graph(g2)
+
+ iris = 0
+ bns = 0
+ others = 0
+ for g in d.graphs():
+ if type(g.identifier) is URIRef:
+ iris += 1
+ elif type(g.identifier) is BNode:
+ bns += 1
+ else:
+ others += 1
+ assert iris == 2
+ assert bns == 1
+ assert others == 0
def test_not_deprecated():