From 97ff66884760d5dea21cb2a1957c46bb89defa12 Mon Sep 17 00:00:00 2001 From: Nicholas Car Date: Thu, 9 Jan 2025 15:17:01 +1000 Subject: [PATCH] Dataset documentation improvements (#3012) * example printout improvements * added BN graph creation * updated tests var names & added one subtest * typos & improved formatting * updated Graph & Dataset docco * typo fix * fix code-in-comment syntax * fix code-in-comment syntax 2 * fix code-in-comment syntax - ellipses * fix code-in-comment syntax - sort print loop output * blacked * ruff fixes * Poetry 2.0.0 pyproject.toml file * move to PEP621 (Poetry 2.0.0) pyproject.toml * require poetry 2.0.0 * require poetry 2.0.0 * add in requirement for poetry-plugin-export * change from --sync to sync command * further pyproject.toml format updates * add poetry plugin to requirements-poetry.in * fix pre-commit poetry version to 2.0.0 * remove testing artifact * update license to 2025 * add me to contributors * remove outdated --check arg * typo * test add back in precommit args * test remove precommit args * match ruff version to pre-commit autoupdate PR #3026; add back in --check * re-remove --check * add David to CONTRIBUTORS * ruff in pyproject.toml to match pre-commit * updates for David's comments * fix Dataset docc ReST formatting --- .readthedocs.yaml | 2 +- examples/datasets.py | 379 +++++++++++++++++++++++------ rdflib/graph.py | 131 +++++++--- rdflib/plugins/stores/auditable.py | 2 +- test/test_dataset/test_dataset.py | 88 ++++--- 5 files changed, 459 insertions(+), 143 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index f5becb937..f737b9b00 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,7 +1,7 @@ --- # https://docs.readthedocs.io/en/stable/config-file/v2.html version: 2 -# NOTE: not builing epub because epub does not know how to handle .ico files +# NOTE: not building epub because epub does not know how to handle .ico files # which results in a warning which causes the build to fail due to # `sphinx.fail_on_warning` # https://github.com/sphinx-doc/sphinx/issues/10350 diff --git a/examples/datasets.py b/examples/datasets.py index d550775a1..7dfc4e48b 100644 --- a/examples/datasets.py +++ b/examples/datasets.py @@ -1,13 +1,23 @@ """ -An RDFLib Dataset is a slight extension to ConjunctiveGraph: it uses simpler terminology -and has a few additional convenience methods, for example add() can be used to -add quads directly to a specific Graph within the Dataset. +This file contains a number of common tasks using the RDFLib Dataset class. -This example file shows how to declare a Dataset, add content to it, serialise it, query it -and remove things from it. +An RDFLib Dataset is an object that stores multiple Named Graphs - instances of RDFLib +Graph identified by IRI - within it and allows whole-of-dataset or single Graph use. + +Dataset extends Graph's Subject, Predicate, Object structure to include Graph - +archaically called Context - producing quads of s, p, o, g. + +There is an older implementation of a Dataset-like class in RDFLib < 7.x called +ConjunctiveGraph that is now deprecated. + +Sections in this file: + +1. Creating & Adding +2. Looping & Counting +3. Manipulating Graphs """ -from rdflib import Dataset, Literal, Namespace, URIRef +from rdflib import Dataset, Graph, Literal, URIRef # Note regarding `mypy: ignore_errors=true`: # @@ -19,41 +29,48 @@ # mypy: ignore_errors=true -# -# Create & Add -# +####################################################################################### +# 1. Creating & Adding +####################################################################################### # Create an empty Dataset d = Dataset() + # Add a namespace prefix to it, just like for Graph -d.bind("ex", Namespace("http://example.com/")) +d.bind("ex", "http://example.com/") -# Declare a Graph URI to be used to identify a Graph -graph_1 = URIRef("http://example.com/graph-1") +# Declare a Graph identifier to be used to identify a Graph +# A string or a URIRef may be used, but safer to always use a URIRef for usage consistency +graph_1_id = URIRef("http://example.com/graph-1") -# Add an empty Graph, identified by graph_1, to the Dataset -d.graph(identifier=graph_1) +# Add an empty Graph, identified by graph_1_id, to the Dataset +d.graph(identifier=graph_1_id) -# Add two quads to Graph graph_1 in the Dataset +# Add two quads to the Dataset which are triples + graph ID +# These insert the triple into the GRaph specified by the ID d.add( ( URIRef("http://example.com/subject-x"), URIRef("http://example.com/predicate-x"), Literal("Triple X"), - graph_1, + graph_1_id, ) ) + d.add( ( URIRef("http://example.com/subject-z"), URIRef("http://example.com/predicate-z"), Literal("Triple Z"), - graph_1, + graph_1_id, ) ) -# Add another quad to the Dataset to a non-existent Graph: -# the Graph is created automatically +# We now have 2 distinct quads in the Dataset to the Dataset has a length of 2 +assert len(d) == 2 + +# Add another quad to the Dataset specifying a non-existent Graph. +# The Graph is created automatically d.add( ( URIRef("http://example.com/subject-y"), @@ -63,8 +80,15 @@ ) ) -# printing the Dataset like this: print(d.serialize(format="trig")) -# produces a result like this: +assert len(d) == 3 + + +# You can print the Dataset like you do a Graph but you must specify a quads format like +# 'trig' or 'trix', not 'turtle', unless the default_union parameter is set to True, and +# then you can print the entire Dataset in triples. +# print(d.serialize(format="trig").strip()) + +# you should see something like this: """ @prefix ex: . @@ -78,85 +102,278 @@ ex:subject-y ex:predicate-y "Triple Y" . } """ -print("Printing Serialised Dataset:") -print("---") -print(d.serialize(format="trig")) -print("---") -print() -print() -# -# Use & Query -# -# print the length of the Dataset, i.e. the count of all triples in all Graphs -# we should get +# Print out one graph in the Dataset, using a standard Graph serialization format - longturtle +print(d.get_graph(URIRef("http://example.com/graph-2")).serialize(format="longturtle")) + +# you should see something like this: """ -3 +PREFIX ex: + +ex:subject-y + ex:predicate-y "Triple Y" ; +. """ -print("Printing Dataset Length:") -print("---") -print(len(d)) -print("---") -print() -print() -# Query one graph in the Dataset for all its triples -# we should get + +####################################################################################### +# 2. Looping & Counting +####################################################################################### + +# Loop through all quads in the dataset +for s, p, o, g in d.quads((None, None, None, None)): # type: ignore[arg-type] + print(f"{s}, {p}, {o}, {g}") + +# you should see something like this: """ -(rdflib.term.URIRef('http://example.com/subject-z'), rdflib.term.URIRef('http://example.com/predicate-z'), rdflib.term.Literal('Triple Z')) -(rdflib.term.URIRef('http://example.com/subject-x'), rdflib.term.URIRef('http://example.com/predicate-x'), rdflib.term.Literal('Triple X')) +http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1 +http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1 +http://example.com/subject-y, http://example.com/predicate-y, Triple Y, http://example.com/graph-2 """ -print("Printing all triple from one Graph in the Dataset:") -print("---") -for triple in d.triples((None, None, None, graph_1)): # type: ignore[arg-type] - print(triple) -print("---") -print() -print() -# Query the union of all graphs in the dataset for all triples -# we should get nothing: +# Loop through all the quads in one Graph - just constrain the Graph field +for s, p, o, g in d.quads((None, None, None, graph_1_id)): # type: ignore[arg-type] + print(f"{s}, {p}, {o}, {g}") + +# you should see something like this: """ +http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1 +http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1 """ -# A Dataset's default union graph does not exist by default (default_union property is False) -print("Attempt #1 to print all triples in the Dataset:") -print("---") -for triple in d.triples((None, None, None, None)): - print(triple) -print("---") -print() -print() -# Set the Dataset's default_union property to True and re-query +# Looping through triples in one Graph still works too +for s, p, o in d.triples((None, None, None, graph_1_id)): # type: ignore[arg-type] + print(f"{s}, {p}, {o}") + +# you should see something like this: +""" +http://example.com/subject-x, http://example.com/predicate-x, Triple X +http://example.com/subject-z, http://example.com/predicate-z, Triple Z +""" + +# Looping through triples across the whole Dataset will produce nothing +# unless we set the default_union parameter to True, since each triple is in a Named Graph + +# Setting the default_union parameter to True essentially presents all triples in all +# Graphs as a single Graph d.default_union = True -print("Attempt #2 to print all triples in the Dataset:") -print("---") -for triple in d.triples((None, None, None, None)): - print(triple) -print("---") -print() -print() +for s, p, o in d.triples((None, None, None)): + print(f"{s}, {p}, {o}") +# you should see something like this: +""" +http://example.com/subject-x, http://example.com/predicate-x, Triple X +http://example.com/subject-z, http://example.com/predicate-z, Triple Z +http://example.com/subject-y, http://example.com/predicate-y, Triple Y +""" -# -# Remove -# +# You can still loop through all quads now with the default_union parameter to True +for s, p, o, g in d.quads((None, None, None)): + print(f"{s}, {p}, {o}, {g}") + +# you should see something like this: +""" +http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1 +http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1 +http://example.com/subject-y, http://example.com/predicate-y, Triple Y, http://example.com/graph-2 +""" + +# Adding a triple in graph-1 to graph-2 increases the number of distinct of quads in +# the Dataset +d.add( + ( + URIRef("http://example.com/subject-z"), + URIRef("http://example.com/predicate-z"), + Literal("Triple Z"), + URIRef("http://example.com/graph-2"), + ) +) + +for s, p, o, g in d.quads((None, None, None, None)): + print(f"{s}, {p}, {o}, {g}") + +# you should see something like this, with the 'Z' triple in graph-1 and graph-2: +""" +http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1 +http://example.com/subject-y, http://example.com/predicate-y, Triple Y, http://example.com/graph-2 +http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1 +http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-2 +""" + +# but the 'length' of the Dataset is still only 3 as only distinct triples are counted +assert len(d) == 3 + + +# Looping through triples sees the 'Z' triple only once +for s, p, o in d.triples((None, None, None)): + print(f"{s}, {p}, {o}") + +# you should see something like this: +""" +http://example.com/subject-x, http://example.com/predicate-x, Triple X +http://example.com/subject-z, http://example.com/predicate-z, Triple Z +http://example.com/subject-y, http://example.com/predicate-y, Triple Y +""" + +####################################################################################### +# 3. Manipulating Graphs +####################################################################################### + +# List all the Graphs in the Dataset +for x in d.graphs(): + print(x) + +# this returns the graphs, something like: +""" + a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory']. + a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory']. + a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory']. +""" + +# So try this +for x in d.graphs(): + print(x.identifier) + +# you should see something like this, noting the default, currently empty, graph: +""" +urn:x-rdflib:default +http://example.com/graph-2 +http://example.com/graph-1 +""" -# Remove Graph graph_1 from the Dataset -d.remove_graph(graph_1) +# To add to the default Graph, just add a triple, not a quad, to the Dataset directly +d.add( + ( + URIRef("http://example.com/subject-n"), + URIRef("http://example.com/predicate-n"), + Literal("Triple N"), + ) +) +for s, p, o, g in d.quads((None, None, None, None)): + print(f"{s}, {p}, {o}, {g}") + +# you should see something like this, noting the triple in the default Graph: +""" +http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1 +http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-2 +http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1 +http://example.com/subject-y, http://example.com/predicate-y, Triple Y, http://example.com/graph-2 +http://example.com/subject-n, http://example.com/predicate-n, Triple N, urn:x-rdflib:default +""" + +# Loop through triples per graph +for x in d.graphs(): + print(x.identifier) + for s, p, o in x.triples((None, None, None)): + print(f"\t{s}, {p}, {o}") -# printing the Dataset like this: print(d.serialize(format="trig")) -# now produces a result like this: +# you should see something like this: +""" +urn:x-rdflib:default + http://example.com/subject-n, http://example.com/predicate-n, Triple N +http://example.com/graph-1 + http://example.com/subject-x, http://example.com/predicate-x, Triple X + http://example.com/subject-z, http://example.com/predicate-z, Triple Z +http://example.com/graph-2 + http://example.com/subject-y, http://example.com/predicate-y, Triple Y + http://example.com/subject-z, http://example.com/predicate-z, Triple Z +""" +# The default_union parameter includes all triples in the Named Graphs and the Default Graph +for s, p, o in d.triples((None, None, None)): + print(f"{s}, {p}, {o}") + +# you should see something like this: +""" +http://example.com/subject-x, http://example.com/predicate-x, Triple X +http://example.com/subject-n, http://example.com/predicate-n, Triple N +http://example.com/subject-z, http://example.com/predicate-z, Triple Z +http://example.com/subject-y, http://example.com/predicate-y, Triple Y """ + +# To remove a graph +d.remove_graph(graph_1_id) + +# To remove the default graph +d.remove_graph(URIRef("urn:x-rdflib:default")) + +# print what's left - one graph, graph-2 +print(d.serialize(format="trig")) + +# you should see something like this: +""" +@prefix ex: . + ex:graph-2 { ex:subject-y ex:predicate-y "Triple Y" . + + ex:subject-z ex:predicate-z "Triple Z" . +} +""" + +# To add a Graph that already exists, you must give it an Identifier or else it will be assigned a Blank Node ID +g_with_id = Graph(identifier=URIRef("http://example.com/graph-3")) +g_with_id.bind("ex", "http://example.com/") + +# Add a distinct triple to the exiting Graph, using Namepspace IRI shortcuts +# g_with_id.bind("ex", "http://example.com/") +g_with_id.add( + ( + URIRef("http://example.com/subject-k"), + URIRef("http://example.com/predicate-k"), + Literal("Triple K"), + ) +) +d.add_graph(g_with_id) +print(d.serialize(format="trig")) + +# you should see something like this: +""" +@prefix ex: . + +ex:graph-3 { + ex:subject_k ex:predicate_k "Triple K" . +} + +ex:graph-2 { + ex:subject-y ex:predicate-y "Triple Y" . + + ex:subject-z ex:predicate-z "Triple Z" . +} +""" + +# If you add a Graph with no specified identifier... +g_no_id = Graph() +g_no_id.bind("ex", "http://example.com/") + +g_no_id.add( + ( + URIRef("http://example.com/subject-l"), + URIRef("http://example.com/predicate-l"), + Literal("Triple L"), + ) +) +d.add_graph(g_no_id) + +# now when we print it, we will see a Graph with a Blank Node id: +print(d.serialize(format="trig")) + +# you should see somthing like this, but with a different Blank Node ID , as this is rebuilt each code execution +""" +@prefix ex: . + +ex:graph-3 { + ex:subject-k ex:predicate-k "Triple K" . +} + +ex:graph-2 { + ex:subject-y ex:predicate-y "Triple Y" . + + ex:subject-z ex:predicate-z "Triple Z" . +} + +_:N9cc8b54c91724e31896da5ce41e0c937 { + ex:subject-l ex:predicate-l "Triple L" . } """ -print("Printing Serialised Dataset after graph_1 removal:") -print("---") -print(d.serialize(format="trig").strip()) -print("---") -print() -print() diff --git a/rdflib/graph.py b/rdflib/graph.py index fcad4ae70..b43bafba2 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -423,11 +423,50 @@ # Graph is a node because technically a formula-aware graph # take a Graph as subject or object, but we usually use QuotedGraph for that. class Graph(Node): - """An RDF Graph + """An RDF Graph: a Python object containing nodes and relations between them as + RDF 'triples'. - The constructor accepts one argument, the "store" - that will be used to store the graph data (see the "store" - package for stores currently shipped with rdflib). + This is the central RDFLib object class and Graph objects are almost always present + it all uses of RDFLib. + + The basic use is to create a Graph and iterate through or query its content, e.g.: + + >>> from rdflib import Graph, URIRef + >>> g = Graph() + + >>> g.add(( + ... URIRef("http://example.com/s1"), # subject + ... URIRef("http://example.com/p1"), # predicate + ... URIRef("http://example.com/o1"), # object + ... )) # doctest: +ELLIPSIS + )> + + >>> g.add(( + ... URIRef("http://example.com/s2"), # subject + ... URIRef("http://example.com/p2"), # predicate + ... URIRef("http://example.com/o2"), # object + ... )) # doctest: +ELLIPSIS + )> + + >>> for triple in sorted(g): # simple looping + ... print(triple) + (rdflib.term.URIRef('http://example.com/s1'), rdflib.term.URIRef('http://example.com/p1'), rdflib.term.URIRef('http://example.com/o1')) + (rdflib.term.URIRef('http://example.com/s2'), rdflib.term.URIRef('http://example.com/p2'), rdflib.term.URIRef('http://example.com/o2')) + + >>> # get the object of the triple with subject s1 and predicate p1 + >>> o = g.value( + ... subject=URIRef("http://example.com/s1"), + ... predicate=URIRef("http://example.com/p1") + ... ) + + + The constructor accepts one argument, the "store" that will be used to store the + graph data with the default being the `Memory ` + (in memory) Store. Other Stores that persist content to disk using various file + databases or Stores that use remote servers (SPARQL systems) are supported. See + the :doc:`rdflib.plugins.stores` package for Stores currently shipped with RDFLib. + Other Stores not shipped with RDFLib can be added, such as + `HDT `_. Stores can be context-aware or unaware. Unaware stores take up (some) less space but cannot support features that require @@ -435,14 +474,15 @@ class Graph(Node): provenance. Even if used with a context-aware store, Graph will only expose the quads which - belong to the default graph. To access the rest of the data, `ConjunctiveGraph` or - `Dataset` classes can be used instead. + belong to the default graph. To access the rest of the data the + `Dataset` class can be used instead. The Graph constructor can take an identifier which identifies the Graph by name. If none is given, the graph is assigned a BNode for its identifier. - For more on named graphs, see: http://www.w3.org/2004/03/trix/ + For more on Named Graphs, see the RDFLib `Dataset` class and the TriG Specification, + https://www.w3.org/TR/trig/. """ context_aware: bool @@ -1153,10 +1193,10 @@ def transitiveClosure( # noqa: N802 function against the graph >>> from rdflib.collection import Collection - >>> g=Graph() - >>> a=BNode("foo") - >>> b=BNode("bar") - >>> c=BNode("baz") + >>> g = Graph() + >>> a = BNode("foo") + >>> b = BNode("bar") + >>> c = BNode("baz") >>> g.add((a,RDF.first,RDF.type)) # doctest: +ELLIPSIS )> >>> g.add((a,RDF.rest,b)) # doctest: +ELLIPSIS @@ -2370,21 +2410,49 @@ def __reduce__(self) -> tuple[type[Graph], tuple[Store, _ContextIdentifierType]] class Dataset(ConjunctiveGraph): """ - RDF 1.1 Dataset. Small extension to the Conjunctive Graph: - - the primary term is graphs in the datasets and not contexts with quads, - so there is a separate method to set/retrieve a graph in a dataset and - operate with graphs - - graphs cannot be identified with blank nodes - - added a method to directly add a single quad + An RDFLib Dataset is an object that stores multiple Named Graphs - instances of + RDFLib Graph identified by IRI - within it and allows whole-of-dataset or single + Graph use. + + RDFLib's Dataset class is based on the `RDF 1.2. 'Dataset' definition + `_: + + .. + + An RDF dataset is a collection of RDF graphs, and comprises: + + - Exactly one default graph, being an RDF graph. The default graph does not + have a name and MAY be empty. + - Zero or more named graphs. Each named graph is a pair consisting of an IRI or + a blank node (the graph name), and an RDF graph. Graph names are unique + within an RDF dataset. - Examples of usage: + Accordingly, a Dataset allows for `Graph` objects to be added to it with + :class:`rdflib.term.URIRef` or :class:`rdflib.term.BNode` identifiers and always + creats a default graph with the :class:`rdflib.term.URIRef` identifier + :code:`urn:x-rdflib:default`. + + Dataset extends Graph's Subject, Predicate, Object (s, p, o) 'triple' + structure to include a graph identifier - archaically called Context - producing + 'quads' of s, p, o, g. + + Triples, or quads, can be added to a Dataset. Triples, or quads with the graph + identifer :code:`urn:x-rdflib:default` go into the default graph. + + .. note:: Dataset builds on the `ConjunctiveGraph` class but that class's direct + use is now deprecated (since RDFLib 7.x) and it should not be used. + `ConjunctiveGraph` will be removed from future RDFLib versions. + + Examples of usage and see also the examples/datast.py file: >>> # Create a new Dataset >>> ds = Dataset() >>> # simple triples goes to default graph - >>> ds.add((URIRef("http://example.org/a"), - ... URIRef("http://www.example.org/b"), - ... Literal("foo"))) # doctest: +ELLIPSIS + >>> ds.add(( + ... URIRef("http://example.org/a"), + ... URIRef("http://www.example.org/b"), + ... Literal("foo") + ... )) # doctest: +ELLIPSIS )> >>> >>> # Create a graph in the dataset, if the graph name has already been @@ -2393,16 +2461,19 @@ class Dataset(ConjunctiveGraph): >>> g = ds.graph(URIRef("http://www.example.com/gr")) >>> >>> # add triples to the new graph as usual - >>> g.add( - ... (URIRef("http://example.org/x"), + >>> g.add(( + ... URIRef("http://example.org/x"), ... URIRef("http://example.org/y"), - ... Literal("bar")) ) # doctest: +ELLIPSIS + ... Literal("bar") + ... )) # doctest: +ELLIPSIS )> >>> # alternatively: add a quad to the dataset -> goes to the graph - >>> ds.add( - ... (URIRef("http://example.org/x"), + >>> ds.add(( + ... URIRef("http://example.org/x"), ... URIRef("http://example.org/z"), - ... Literal("foo-bar"),g) ) # doctest: +ELLIPSIS + ... Literal("foo-bar"), + ... g + ... )) # doctest: +ELLIPSIS )> >>> >>> # querying triples return them all regardless of the graph @@ -2468,8 +2539,8 @@ class Dataset(ConjunctiveGraph): >>> >>> # graph names in the dataset can be queried: >>> for c in ds.graphs(): # doctest: +SKIP - ... print(c) # doctest: - DEFAULT + ... print(c.identifier) # doctest: + urn:x-rdflib:default http://www.example.com/gr >>> # A graph can be created without specifying a name; a skolemized genid >>> # is created on the fly @@ -2488,7 +2559,7 @@ class Dataset(ConjunctiveGraph): >>> >>> # a graph can also be removed from a dataset via ds.remove_graph(g) - .. versionadded:: 4.0 + ... versionadded:: 4.0 """ def __init__( diff --git a/rdflib/plugins/stores/auditable.py b/rdflib/plugins/stores/auditable.py index a5e51087a..253f59530 100644 --- a/rdflib/plugins/stores/auditable.py +++ b/rdflib/plugins/stores/auditable.py @@ -10,7 +10,7 @@ Calls to commit or rollback, flush the list of reverse operations This provides thread-safe atomicity and isolation (assuming concurrent operations occur with different store instances), but no durability (transactions are -persisted in memory and wont be available to reverse operations after the +persisted in memory and won't be available to reverse operations after the system fails): A and I out of ACID. """ diff --git a/test/test_dataset/test_dataset.py b/test/test_dataset/test_dataset.py index 19b9fe830..9f9bc9c26 100644 --- a/test/test_dataset/test_dataset.py +++ b/test/test_dataset/test_dataset.py @@ -5,11 +5,10 @@ import pytest -from rdflib import URIRef, plugin +from rdflib import BNode, Namespace, URIRef, plugin from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph from rdflib.store import Store from test.data import CONTEXT1, LIKES, PIZZA, TAREK -from test.utils.namespace import EGSCHEME # Will also run SPARQLUpdateStore tests against local SPARQL1.1 endpoint if # available. This assumes SPARQL1.1 query/update endpoints running locally at @@ -58,9 +57,9 @@ def get_dataset(request): except ImportError: pytest.skip("Dependencies for store '%s' not available!" % store) - graph = Dataset(store=store) + d = Dataset(store=store) - if not graph.store.graph_aware: + if not d.store.graph_aware: return if store in ["SQLiteLSM", "LevelDB"]: @@ -75,31 +74,39 @@ def get_dataset(request): else: path = tempfile.mkdtemp() - graph.open(path, create=True if store != "SPARQLUpdateStore" else False) + d.open(path, create=True if store != "SPARQLUpdateStore" else False) if store == "SPARQLUpdateStore": try: - graph.store.update("CLEAR ALL") + d.graph() + d.add( + ( + URIRef("http://example.com/s"), + URIRef("http://example.com/p"), + URIRef("http://example.com/o"), + ) + ) + d.store.update("CLEAR ALL") except Exception as e: if "SPARQLStore does not support BNodes! " in str(e): pass else: raise Exception(e) - yield store, graph + yield store, d if store == "SPARQLUpdateStore": try: - graph.store.update("CLEAR ALL") + d.update("CLEAR ALL") except Exception as e: if "SPARQLStore does not support BNodes! " in str(e): pass else: raise Exception(e) - graph.close() + d.close() else: - graph.close() - graph.destroy(path) + d.close() + d.destroy(path) if os.path.isdir(path): shutil.rmtree(path) else: @@ -121,7 +128,7 @@ def test_graph_aware(get_dataset): # empty named graphs if store != "SPARQLUpdateStore": # added graph exists - assert set(x.identifier for x in dataset.contexts()) == set( + assert set(x.identifier for x in dataset.graphs()) == set( [CONTEXT1, DATASET_DEFAULT_GRAPH_ID] ) @@ -131,7 +138,7 @@ def test_graph_aware(get_dataset): g1.add((TAREK, LIKES, PIZZA)) # added graph still exists - assert set(x.identifier for x in dataset.contexts()) == set( + assert set(x.identifier for x in dataset.graphs()) == set( [CONTEXT1, DATASET_DEFAULT_GRAPH_ID] ) @@ -147,14 +154,14 @@ def test_graph_aware(get_dataset): # empty named graphs if store != "SPARQLUpdateStore": # graph still exists, although empty - assert set(x.identifier for x in dataset.contexts()) == set( + assert set(x.identifier for x in dataset.graphs()) == set( [CONTEXT1, DATASET_DEFAULT_GRAPH_ID] ) dataset.remove_graph(CONTEXT1) # graph is gone - assert set(x.identifier for x in dataset.contexts()) == set( + assert set(x.identifier for x in dataset.graphs()) == set( [DATASET_DEFAULT_GRAPH_ID] ) @@ -173,7 +180,7 @@ def test_default_graph(get_dataset): dataset.add((TAREK, LIKES, PIZZA)) assert len(dataset) == 1 # only default exists - assert list(dataset.contexts()) == [dataset.default_context] + assert list(dataset.graphs()) == [dataset.default_context] # removing default graph removes triples but not actual graph dataset.remove_graph(DATASET_DEFAULT_GRAPH_ID) @@ -181,7 +188,7 @@ def test_default_graph(get_dataset): assert len(dataset) == 0 # default still exists - assert set(dataset.contexts()) == set([dataset.default_context]) + assert set(dataset.graphs()) == set([dataset.default_context]) def test_not_union(get_dataset): @@ -193,11 +200,11 @@ def test_not_union(get_dataset): "its default graph as the union of the named graphs" ) - subgraph1 = dataset.graph(CONTEXT1) - subgraph1.add((TAREK, LIKES, PIZZA)) + g1 = dataset.graph(CONTEXT1) + g1.add((TAREK, LIKES, PIZZA)) assert list(dataset.objects(TAREK, None)) == [] - assert list(subgraph1.objects(TAREK, None)) == [PIZZA] + assert list(g1.objects(TAREK, None)) == [PIZZA] def test_iter(get_dataset): @@ -208,16 +215,16 @@ def test_iter(get_dataset): uri_c = URIRef("https://example.com/c") uri_d = URIRef("https://example.com/d") - d.graph(URIRef("https://example.com/subgraph1")) - d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/subgraph1"))) + d.graph(URIRef("https://example.com/g1")) + d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1"))) d.add( - (uri_a, uri_b, uri_c, URIRef("https://example.com/subgraph1")) + (uri_a, uri_b, uri_c, URIRef("https://example.com/g1")) ) # pointless addition: duplicates above d.graph(URIRef("https://example.com/g2")) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2"))) - d.add((uri_a, uri_b, uri_d, URIRef("https://example.com/subgraph1"))) + d.add((uri_a, uri_b, uri_d, URIRef("https://example.com/g1"))) # traditional iterator i_trad = 0 @@ -232,7 +239,7 @@ def test_iter(get_dataset): assert i_new == i_trad # both should be 3 -def test_subgraph_without_identifier() -> None: +def test_graph_without_identifier() -> None: """ Graphs with no identifies assigned are identified by Skolem IRIs with a prefix that is bound to `genid`. @@ -241,9 +248,9 @@ def test_subgraph_without_identifier() -> None: reviewed at some point. """ - dataset = Dataset() + d = Dataset() - nman = dataset.namespace_manager + nman = d.namespace_manager genid_prefix = URIRef("https://rdflib.github.io/.well-known/genid/rdflib/") @@ -253,15 +260,36 @@ def test_subgraph_without_identifier() -> None: is None ) - subgraph: Graph = dataset.graph() - subgraph.add((EGSCHEME["subject"], EGSCHEME["predicate"], EGSCHEME["object"])) + ex = Namespace("http://example.com/") + g1: Graph = d.graph() + g1.add((ex.subject, ex.predicate, ex.object)) namespaces = set(nman.namespaces()) assert next( (namespace for namespace in namespaces if namespace[0] == "genid"), None ) == ("genid", genid_prefix) - assert f"{subgraph.identifier}".startswith(genid_prefix) + assert f"{g1.identifier}".startswith(genid_prefix) + + # now add a preexisting graph with no identifier + # i.e. not one created within this Dataset object + g2 = Graph() + g2.add((ex.subject, ex.predicate, ex.object)) + d.add_graph(g2) + + iris = 0 + bns = 0 + others = 0 + for g in d.graphs(): + if type(g.identifier) is URIRef: + iris += 1 + elif type(g.identifier) is BNode: + bns += 1 + else: + others += 1 + assert iris == 2 + assert bns == 1 + assert others == 0 def test_not_deprecated():