From 97ff66884760d5dea21cb2a1957c46bb89defa12 Mon Sep 17 00:00:00 2001
From: Nicholas Car <nick@kurrawong.net>
Date: Thu, 9 Jan 2025 15:17:01 +1000
Subject: [PATCH] Dataset documentation improvements (#3012)

* example printout improvements

* added BN graph creation

* updated tests var names & added one subtest

* typos & improved formatting

* updated Graph & Dataset docco

* typo fix

* fix code-in-comment syntax

* fix code-in-comment syntax 2

* fix code-in-comment syntax - ellipses

* fix code-in-comment syntax - sort print loop output

* blacked

* ruff fixes

* Poetry 2.0.0 pyproject.toml file

* move to PEP621 (Poetry 2.0.0) pyproject.toml

* require poetry 2.0.0

* require poetry 2.0.0

* add in requirement for poetry-plugin-export

* change from --sync to sync command

* further pyproject.toml format updates

* add poetry plugin to requirements-poetry.in

* fix pre-commit poetry version to 2.0.0

* remove testing artifact

* update license to 2025

* add me to contributors

* remove outdated --check arg

* typo

* test add back in precommit args

* test remove precommit args

* match ruff version to pre-commit autoupdate PR #3026; add back in --check

* re-remove --check

* add David to CONTRIBUTORS

* ruff in pyproject.toml to match pre-commit

* updates for David's comments

* fix Dataset docc ReST formatting
---
 .readthedocs.yaml                  |   2 +-
 examples/datasets.py               | 379 +++++++++++++++++++++++------
 rdflib/graph.py                    | 131 +++++++---
 rdflib/plugins/stores/auditable.py |   2 +-
 test/test_dataset/test_dataset.py  |  88 ++++---
 5 files changed, 459 insertions(+), 143 deletions(-)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index f5becb937..f737b9b00 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -1,7 +1,7 @@
 ---
 # https://docs.readthedocs.io/en/stable/config-file/v2.html
 version: 2
-# NOTE: not builing epub because epub does not know how to handle .ico files
+# NOTE: not building epub because epub does not know how to handle .ico files
 # which results in a warning which causes the build to fail due to
 # `sphinx.fail_on_warning`
 # https://github.com/sphinx-doc/sphinx/issues/10350
diff --git a/examples/datasets.py b/examples/datasets.py
index d550775a1..7dfc4e48b 100644
--- a/examples/datasets.py
+++ b/examples/datasets.py
@@ -1,13 +1,23 @@
 """
-An RDFLib Dataset is a slight extension to ConjunctiveGraph: it uses simpler terminology
-and has a few additional convenience methods, for example add() can be used to
-add quads directly to a specific Graph within the Dataset.
+This file contains a number of common tasks using the RDFLib Dataset class.
 
-This example file shows how to declare a Dataset, add content to it, serialise it, query it
-and remove things from it.
+An RDFLib Dataset is an object that stores multiple Named Graphs - instances of RDFLib
+Graph identified by IRI - within it and allows whole-of-dataset or single Graph use.
+
+Dataset extends Graph's Subject, Predicate, Object structure to include Graph -
+archaically called Context - producing quads of s, p, o, g.
+
+There is an older implementation of a Dataset-like class in RDFLib < 7.x called
+ConjunctiveGraph that is now deprecated.
+
+Sections in this file:
+
+1. Creating & Adding
+2. Looping & Counting
+3. Manipulating Graphs
 """
 
-from rdflib import Dataset, Literal, Namespace, URIRef
+from rdflib import Dataset, Graph, Literal, URIRef
 
 # Note regarding `mypy: ignore_errors=true`:
 #
@@ -19,41 +29,48 @@
 
 # mypy: ignore_errors=true
 
-#
-#   Create & Add
-#
+#######################################################################################
+#   1. Creating & Adding
+#######################################################################################
 
 # Create an empty Dataset
 d = Dataset()
+
 # Add a namespace prefix to it, just like for Graph
-d.bind("ex", Namespace("http://example.com/"))
+d.bind("ex", "http://example.com/")
 
-# Declare a Graph URI to be used to identify a Graph
-graph_1 = URIRef("http://example.com/graph-1")
+# Declare a Graph identifier to be used to identify a Graph
+# A string or a URIRef may be used, but safer to always use a URIRef for usage consistency
+graph_1_id = URIRef("http://example.com/graph-1")
 
-# Add an empty Graph, identified by graph_1, to the Dataset
-d.graph(identifier=graph_1)
+# Add an empty Graph, identified by graph_1_id, to the Dataset
+d.graph(identifier=graph_1_id)
 
-# Add two quads to Graph graph_1 in the Dataset
+# Add two quads to the Dataset which are triples + graph ID
+# These insert the triple into the GRaph specified by the ID
 d.add(
     (
         URIRef("http://example.com/subject-x"),
         URIRef("http://example.com/predicate-x"),
         Literal("Triple X"),
-        graph_1,
+        graph_1_id,
     )
 )
+
 d.add(
     (
         URIRef("http://example.com/subject-z"),
         URIRef("http://example.com/predicate-z"),
         Literal("Triple Z"),
-        graph_1,
+        graph_1_id,
     )
 )
 
-# Add another quad to the Dataset to a non-existent Graph:
-# the Graph is created automatically
+# We now have 2 distinct quads in the Dataset to the Dataset has a length of 2
+assert len(d) == 2
+
+# Add another quad to the Dataset specifying a non-existent Graph.
+# The Graph is created automatically
 d.add(
     (
         URIRef("http://example.com/subject-y"),
@@ -63,8 +80,15 @@
     )
 )
 
-# printing the Dataset like this: print(d.serialize(format="trig"))
-# produces a result like this:
+assert len(d) == 3
+
+
+# You can print the Dataset like you do a Graph but you must specify a quads format like
+# 'trig' or 'trix', not 'turtle', unless the default_union parameter is set to True, and
+# then you can print the entire Dataset in triples.
+# print(d.serialize(format="trig").strip())
+
+# you should see something like this:
 """
 @prefix ex: <http://example.com/> .
 
@@ -78,85 +102,278 @@
     ex:subject-y ex:predicate-y "Triple Y" .
 }
 """
-print("Printing Serialised Dataset:")
-print("---")
-print(d.serialize(format="trig"))
-print("---")
-print()
-print()
 
-#
-#   Use & Query
-#
 
-# print the length of the Dataset, i.e. the count of all triples in all Graphs
-# we should get
+# Print out one graph in the Dataset, using a standard Graph serialization format - longturtle
+print(d.get_graph(URIRef("http://example.com/graph-2")).serialize(format="longturtle"))
+
+# you should see something like this:
 """
-3
+PREFIX ex: <http://example.com/>
+
+ex:subject-y
+    ex:predicate-y "Triple Y" ;
+.
 """
-print("Printing Dataset Length:")
-print("---")
-print(len(d))
-print("---")
-print()
-print()
 
-# Query one graph in the Dataset for all its triples
-# we should get
+
+#######################################################################################
+#   2. Looping & Counting
+#######################################################################################
+
+# Loop through all quads in the dataset
+for s, p, o, g in d.quads((None, None, None, None)):  # type: ignore[arg-type]
+    print(f"{s}, {p}, {o}, {g}")
+
+# you should see something like this:
 """
-(rdflib.term.URIRef('http://example.com/subject-z'), rdflib.term.URIRef('http://example.com/predicate-z'), rdflib.term.Literal('Triple Z'))
-(rdflib.term.URIRef('http://example.com/subject-x'), rdflib.term.URIRef('http://example.com/predicate-x'), rdflib.term.Literal('Triple X'))
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1
+http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y, http://example.com/graph-2
 """
-print("Printing all triple from one Graph in the Dataset:")
-print("---")
-for triple in d.triples((None, None, None, graph_1)):  # type: ignore[arg-type]
-    print(triple)
-print("---")
-print()
-print()
 
-# Query the union of all graphs in the dataset for all triples
-# we should get nothing:
+# Loop through all the quads in one Graph - just constrain the Graph field
+for s, p, o, g in d.quads((None, None, None, graph_1_id)):  # type: ignore[arg-type]
+    print(f"{s}, {p}, {o}, {g}")
+
+# you should see something like this:
 """
+http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1
 """
-# A Dataset's default union graph does not exist by default (default_union property is False)
-print("Attempt #1 to print all triples in the Dataset:")
-print("---")
-for triple in d.triples((None, None, None, None)):
-    print(triple)
-print("---")
-print()
-print()
 
-# Set the Dataset's default_union property to True and re-query
+# Looping through triples in one Graph still works too
+for s, p, o in d.triples((None, None, None, graph_1_id)):  # type: ignore[arg-type]
+    print(f"{s}, {p}, {o}")
+
+# you should see something like this:
+"""
+http://example.com/subject-x, http://example.com/predicate-x, Triple X
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z
+"""
+
+# Looping through triples across the whole Dataset will produce nothing
+# unless we set the default_union parameter to True, since each triple is in a Named Graph
+
+# Setting the default_union parameter to True essentially presents all triples in all
+# Graphs as a single Graph
 d.default_union = True
-print("Attempt #2 to print all triples in the Dataset:")
-print("---")
-for triple in d.triples((None, None, None, None)):
-    print(triple)
-print("---")
-print()
-print()
+for s, p, o in d.triples((None, None, None)):
+    print(f"{s}, {p}, {o}")
 
+# you should see something like this:
+"""
+http://example.com/subject-x, http://example.com/predicate-x, Triple X
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y
+"""
 
-#
-#   Remove
-#
+# You can still loop through all quads now with the default_union parameter to True
+for s, p, o, g in d.quads((None, None, None)):
+    print(f"{s}, {p}, {o}, {g}")
+
+# you should see something like this:
+"""
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1
+http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y, http://example.com/graph-2
+"""
+
+# Adding a triple in graph-1 to graph-2 increases the number of distinct of quads in
+# the Dataset
+d.add(
+    (
+        URIRef("http://example.com/subject-z"),
+        URIRef("http://example.com/predicate-z"),
+        Literal("Triple Z"),
+        URIRef("http://example.com/graph-2"),
+    )
+)
+
+for s, p, o, g in d.quads((None, None, None, None)):
+    print(f"{s}, {p}, {o}, {g}")
+
+# you should see something like this, with the 'Z' triple in graph-1 and graph-2:
+"""
+http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y, http://example.com/graph-2
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-2
+"""
+
+# but the 'length' of the Dataset is still only 3 as only distinct triples are counted
+assert len(d) == 3
+
+
+# Looping through triples sees the 'Z' triple only once
+for s, p, o in d.triples((None, None, None)):
+    print(f"{s}, {p}, {o}")
+
+# you should see something like this:
+"""
+http://example.com/subject-x, http://example.com/predicate-x, Triple X
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y
+"""
+
+#######################################################################################
+#   3. Manipulating Graphs
+#######################################################################################
+
+# List all the Graphs in the Dataset
+for x in d.graphs():
+    print(x)
+
+# this returns the graphs, something like:
+"""
+<http://example.com/graph-1> a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory'].
+<urn:x-rdflib:default> a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory'].
+<http://example.com/graph-2> a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'Memory'].
+"""
+
+# So try this
+for x in d.graphs():
+    print(x.identifier)
+
+# you should see something like this, noting the default, currently empty, graph:
+"""
+urn:x-rdflib:default
+http://example.com/graph-2
+http://example.com/graph-1
+"""
 
-# Remove Graph graph_1 from the Dataset
-d.remove_graph(graph_1)
+# To add to the default Graph, just add a triple, not a quad, to the Dataset directly
+d.add(
+    (
+        URIRef("http://example.com/subject-n"),
+        URIRef("http://example.com/predicate-n"),
+        Literal("Triple N"),
+    )
+)
+for s, p, o, g in d.quads((None, None, None, None)):
+    print(f"{s}, {p}, {o}, {g}")
+
+# you should see something like this, noting the triple in the default Graph:
+"""
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-1
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z, http://example.com/graph-2
+http://example.com/subject-x, http://example.com/predicate-x, Triple X, http://example.com/graph-1
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y, http://example.com/graph-2
+http://example.com/subject-n, http://example.com/predicate-n, Triple N, urn:x-rdflib:default
+"""
+
+# Loop through triples per graph
+for x in d.graphs():
+    print(x.identifier)
+    for s, p, o in x.triples((None, None, None)):
+        print(f"\t{s}, {p}, {o}")
 
-# printing the Dataset like this: print(d.serialize(format="trig"))
-# now produces a result like this:
+# you should see something like this:
+"""
+urn:x-rdflib:default
+	http://example.com/subject-n, http://example.com/predicate-n, Triple N
+http://example.com/graph-1
+	http://example.com/subject-x, http://example.com/predicate-x, Triple X
+	http://example.com/subject-z, http://example.com/predicate-z, Triple Z
+http://example.com/graph-2
+	http://example.com/subject-y, http://example.com/predicate-y, Triple Y
+	http://example.com/subject-z, http://example.com/predicate-z, Triple Z
+"""
 
+# The default_union parameter includes all triples in the Named Graphs and the Default Graph
+for s, p, o in d.triples((None, None, None)):
+    print(f"{s}, {p}, {o}")
+
+# you should see something like this:
+"""
+http://example.com/subject-x, http://example.com/predicate-x, Triple X
+http://example.com/subject-n, http://example.com/predicate-n, Triple N
+http://example.com/subject-z, http://example.com/predicate-z, Triple Z
+http://example.com/subject-y, http://example.com/predicate-y, Triple Y
 """
+
+# To remove a graph
+d.remove_graph(graph_1_id)
+
+# To remove the default graph
+d.remove_graph(URIRef("urn:x-rdflib:default"))
+
+# print what's left - one graph, graph-2
+print(d.serialize(format="trig"))
+
+# you should see something like this:
+"""
+@prefix ex: <http://example.com/> .
+
 ex:graph-2 {
     ex:subject-y ex:predicate-y "Triple Y" .
+
+    ex:subject-z ex:predicate-z "Triple Z" .
+}
+"""
+
+# To add a Graph that already exists, you must give it an Identifier or else it will be assigned a Blank Node ID
+g_with_id = Graph(identifier=URIRef("http://example.com/graph-3"))
+g_with_id.bind("ex", "http://example.com/")
+
+# Add a distinct triple to the exiting Graph, using Namepspace IRI shortcuts
+# g_with_id.bind("ex", "http://example.com/")
+g_with_id.add(
+    (
+        URIRef("http://example.com/subject-k"),
+        URIRef("http://example.com/predicate-k"),
+        Literal("Triple K"),
+    )
+)
+d.add_graph(g_with_id)
+print(d.serialize(format="trig"))
+
+# you should see something like this:
+"""
+@prefix ex: <http://example.com/> .
+
+ex:graph-3 {
+    ex:subject_k ex:predicate_k "Triple K" .
+}
+
+ex:graph-2 {
+    ex:subject-y ex:predicate-y "Triple Y" .
+
+    ex:subject-z ex:predicate-z "Triple Z" .
+}
+"""
+
+# If you add a Graph with no specified identifier...
+g_no_id = Graph()
+g_no_id.bind("ex", "http://example.com/")
+
+g_no_id.add(
+    (
+        URIRef("http://example.com/subject-l"),
+        URIRef("http://example.com/predicate-l"),
+        Literal("Triple L"),
+    )
+)
+d.add_graph(g_no_id)
+
+# now when we print it, we will see a Graph with a Blank Node id:
+print(d.serialize(format="trig"))
+
+# you should see somthing like this, but with a different Blank Node ID , as this is rebuilt each code execution
+"""
+@prefix ex: <http://example.com/> .
+
+ex:graph-3 {
+    ex:subject-k ex:predicate-k "Triple K" .
+}
+
+ex:graph-2 {
+    ex:subject-y ex:predicate-y "Triple Y" .
+
+    ex:subject-z ex:predicate-z "Triple Z" .
+}
+
+_:N9cc8b54c91724e31896da5ce41e0c937 {
+    ex:subject-l ex:predicate-l "Triple L" .
 }
 """
-print("Printing Serialised Dataset after graph_1 removal:")
-print("---")
-print(d.serialize(format="trig").strip())
-print("---")
-print()
-print()
diff --git a/rdflib/graph.py b/rdflib/graph.py
index fcad4ae70..b43bafba2 100644
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -423,11 +423,50 @@
 # Graph is a node because technically a formula-aware graph
 # take a Graph as subject or object, but we usually use QuotedGraph for that.
 class Graph(Node):
-    """An RDF Graph
+    """An RDF Graph: a Python object containing nodes and relations between them as
+    RDF 'triples'.
 
-    The constructor accepts one argument, the "store"
-    that will be used to store the graph data (see the "store"
-    package for stores currently shipped with rdflib).
+    This is the central RDFLib object class and Graph objects are almost always present
+    it all uses of RDFLib.
+
+    The basic use is to create a Graph and iterate through or query its content, e.g.:
+
+    >>> from rdflib import Graph, URIRef
+    >>> g = Graph()
+
+    >>> g.add((
+    ...     URIRef("http://example.com/s1"),   # subject
+    ...     URIRef("http://example.com/p1"),   # predicate
+    ...     URIRef("http://example.com/o1"),   # object
+    ... )) # doctest: +ELLIPSIS
+    <Graph identifier=... (<class 'rdflib.graph.Graph'>)>
+
+    >>> g.add((
+    ...     URIRef("http://example.com/s2"),   # subject
+    ...     URIRef("http://example.com/p2"),   # predicate
+    ...     URIRef("http://example.com/o2"),   # object
+    ... )) # doctest: +ELLIPSIS
+    <Graph identifier=... (<class 'rdflib.graph.Graph'>)>
+
+    >>> for triple in sorted(g):  # simple looping
+    ...     print(triple)
+    (rdflib.term.URIRef('http://example.com/s1'), rdflib.term.URIRef('http://example.com/p1'), rdflib.term.URIRef('http://example.com/o1'))
+    (rdflib.term.URIRef('http://example.com/s2'), rdflib.term.URIRef('http://example.com/p2'), rdflib.term.URIRef('http://example.com/o2'))
+
+    >>> # get the object of the triple with subject s1 and predicate p1
+    >>> o = g.value(
+    ...     subject=URIRef("http://example.com/s1"),
+    ...     predicate=URIRef("http://example.com/p1")
+    ... )
+
+
+    The constructor accepts one argument, the "store" that will be used to store the
+    graph data with the default being the `Memory <rdflib.plugins.stores.memory.Memory>`
+    (in memory) Store. Other Stores that persist content to disk using various file
+    databases or Stores that use remote servers (SPARQL systems) are supported. See
+    the :doc:`rdflib.plugins.stores` package for Stores currently shipped with RDFLib.
+    Other Stores not shipped with RDFLib can be added, such as
+    `HDT <https://github.com/rdflib/rdflib-hdt/>`_.
 
     Stores can be context-aware or unaware.  Unaware stores take up
     (some) less space but cannot support features that require
@@ -435,14 +474,15 @@ class Graph(Node):
     provenance.
 
     Even if used with a context-aware store, Graph will only expose the quads which
-    belong to the default graph. To access the rest of the data, `ConjunctiveGraph` or
-    `Dataset` classes can be used instead.
+    belong to the default graph. To access the rest of the data the
+    `Dataset` class can be used instead.
 
     The Graph constructor can take an identifier which identifies the Graph
     by name.  If none is given, the graph is assigned a BNode for its
     identifier.
 
-    For more on named graphs, see: http://www.w3.org/2004/03/trix/
+    For more on Named Graphs, see the RDFLib `Dataset` class and the TriG Specification,
+    https://www.w3.org/TR/trig/.
     """
 
     context_aware: bool
@@ -1153,10 +1193,10 @@ def transitiveClosure(  # noqa: N802
         function against the graph
 
         >>> from rdflib.collection import Collection
-        >>> g=Graph()
-        >>> a=BNode("foo")
-        >>> b=BNode("bar")
-        >>> c=BNode("baz")
+        >>> g = Graph()
+        >>> a = BNode("foo")
+        >>> b = BNode("bar")
+        >>> c = BNode("baz")
         >>> g.add((a,RDF.first,RDF.type)) # doctest: +ELLIPSIS
         <Graph identifier=... (<class 'rdflib.graph.Graph'>)>
         >>> g.add((a,RDF.rest,b)) # doctest: +ELLIPSIS
@@ -2370,21 +2410,49 @@ def __reduce__(self) -> tuple[type[Graph], tuple[Store, _ContextIdentifierType]]
 
 class Dataset(ConjunctiveGraph):
     """
-    RDF 1.1 Dataset. Small extension to the Conjunctive Graph:
-    - the primary term is graphs in the datasets and not contexts with quads,
-    so there is a separate method to set/retrieve a graph in a dataset and
-    operate with graphs
-    - graphs cannot be identified with blank nodes
-    - added a method to directly add a single quad
+    An RDFLib Dataset is an object that stores multiple Named Graphs - instances of
+    RDFLib Graph identified by IRI - within it and allows whole-of-dataset or single
+    Graph use.
+
+    RDFLib's Dataset class is based on the `RDF 1.2. 'Dataset' definition
+    <https://www.w3.org/TR/rdf12-datasets/>`_:
+
+    ..
+
+        An RDF dataset is a collection of RDF graphs, and comprises:
+
+        - Exactly one default graph, being an RDF graph. The default graph does not
+            have a name and MAY be empty.
+        - Zero or more named graphs. Each named graph is a pair consisting of an IRI or
+            a blank node (the graph name), and an RDF graph. Graph names are unique
+            within an RDF dataset.
 
-    Examples of usage:
+    Accordingly, a Dataset allows for `Graph` objects to be added to it with
+    :class:`rdflib.term.URIRef` or :class:`rdflib.term.BNode` identifiers and always
+    creats a default graph with the :class:`rdflib.term.URIRef` identifier
+    :code:`urn:x-rdflib:default`.
+
+    Dataset extends Graph's Subject, Predicate, Object (s, p, o) 'triple'
+    structure to include a graph identifier - archaically called Context - producing
+    'quads' of s, p, o, g.
+
+    Triples, or quads, can be added to a Dataset. Triples, or quads with the graph
+    identifer :code:`urn:x-rdflib:default` go into the default graph.
+
+    .. note:: Dataset builds on the `ConjunctiveGraph` class but that class's direct
+        use is now deprecated (since RDFLib 7.x) and it should not be used.
+        `ConjunctiveGraph` will be removed from future RDFLib versions.
+
+    Examples of usage and see also the examples/datast.py file:
 
     >>> # Create a new Dataset
     >>> ds = Dataset()
     >>> # simple triples goes to default graph
-    >>> ds.add((URIRef("http://example.org/a"),
-    ...    URIRef("http://www.example.org/b"),
-    ...    Literal("foo")))  # doctest: +ELLIPSIS
+    >>> ds.add((
+    ...     URIRef("http://example.org/a"),
+    ...     URIRef("http://www.example.org/b"),
+    ...     Literal("foo")
+    ... ))  # doctest: +ELLIPSIS
     <Graph identifier=... (<class 'rdflib.graph.Dataset'>)>
     >>>
     >>> # Create a graph in the dataset, if the graph name has already been
@@ -2393,16 +2461,19 @@ class Dataset(ConjunctiveGraph):
     >>> g = ds.graph(URIRef("http://www.example.com/gr"))
     >>>
     >>> # add triples to the new graph as usual
-    >>> g.add(
-    ...     (URIRef("http://example.org/x"),
+    >>> g.add((
+    ...     URIRef("http://example.org/x"),
     ...     URIRef("http://example.org/y"),
-    ...     Literal("bar")) ) # doctest: +ELLIPSIS
+    ...     Literal("bar")
+    ... )) # doctest: +ELLIPSIS
     <Graph identifier=... (<class 'rdflib.graph.Graph'>)>
     >>> # alternatively: add a quad to the dataset -> goes to the graph
-    >>> ds.add(
-    ...     (URIRef("http://example.org/x"),
+    >>> ds.add((
+    ...     URIRef("http://example.org/x"),
     ...     URIRef("http://example.org/z"),
-    ...     Literal("foo-bar"),g) ) # doctest: +ELLIPSIS
+    ...     Literal("foo-bar"),
+    ...     g
+    ... )) # doctest: +ELLIPSIS
     <Graph identifier=... (<class 'rdflib.graph.Dataset'>)>
     >>>
     >>> # querying triples return them all regardless of the graph
@@ -2468,8 +2539,8 @@ class Dataset(ConjunctiveGraph):
     >>>
     >>> # graph names in the dataset can be queried:
     >>> for c in ds.graphs():  # doctest: +SKIP
-    ...     print(c)  # doctest:
-    DEFAULT
+    ...     print(c.identifier)  # doctest:
+    urn:x-rdflib:default
     http://www.example.com/gr
     >>> # A graph can be created without specifying a name; a skolemized genid
     >>> # is created on the fly
@@ -2488,7 +2559,7 @@ class Dataset(ConjunctiveGraph):
     >>>
     >>> # a graph can also be removed from a dataset via ds.remove_graph(g)
 
-    .. versionadded:: 4.0
+    ... versionadded:: 4.0
     """
 
     def __init__(
diff --git a/rdflib/plugins/stores/auditable.py b/rdflib/plugins/stores/auditable.py
index a5e51087a..253f59530 100644
--- a/rdflib/plugins/stores/auditable.py
+++ b/rdflib/plugins/stores/auditable.py
@@ -10,7 +10,7 @@
 Calls to commit or rollback, flush the list of reverse operations This
 provides thread-safe atomicity and isolation (assuming concurrent operations
 occur with different store instances), but no durability (transactions are
-persisted in memory and wont  be available to reverse operations after the
+persisted in memory and won't be available to reverse operations after the
 system fails): A and I out of ACID.
 
 """
diff --git a/test/test_dataset/test_dataset.py b/test/test_dataset/test_dataset.py
index 19b9fe830..9f9bc9c26 100644
--- a/test/test_dataset/test_dataset.py
+++ b/test/test_dataset/test_dataset.py
@@ -5,11 +5,10 @@
 
 import pytest
 
-from rdflib import URIRef, plugin
+from rdflib import BNode, Namespace, URIRef, plugin
 from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Dataset, Graph
 from rdflib.store import Store
 from test.data import CONTEXT1, LIKES, PIZZA, TAREK
-from test.utils.namespace import EGSCHEME
 
 # Will also run SPARQLUpdateStore tests against local SPARQL1.1 endpoint if
 # available. This assumes SPARQL1.1 query/update endpoints running locally at
@@ -58,9 +57,9 @@ def get_dataset(request):
     except ImportError:
         pytest.skip("Dependencies for store '%s' not available!" % store)
 
-    graph = Dataset(store=store)
+    d = Dataset(store=store)
 
-    if not graph.store.graph_aware:
+    if not d.store.graph_aware:
         return
 
     if store in ["SQLiteLSM", "LevelDB"]:
@@ -75,31 +74,39 @@ def get_dataset(request):
     else:
         path = tempfile.mkdtemp()
 
-    graph.open(path, create=True if store != "SPARQLUpdateStore" else False)
+    d.open(path, create=True if store != "SPARQLUpdateStore" else False)
 
     if store == "SPARQLUpdateStore":
         try:
-            graph.store.update("CLEAR ALL")
+            d.graph()
+            d.add(
+                (
+                    URIRef("http://example.com/s"),
+                    URIRef("http://example.com/p"),
+                    URIRef("http://example.com/o"),
+                )
+            )
+            d.store.update("CLEAR ALL")
         except Exception as e:
             if "SPARQLStore does not support BNodes! " in str(e):
                 pass
             else:
                 raise Exception(e)
 
-    yield store, graph
+    yield store, d
 
     if store == "SPARQLUpdateStore":
         try:
-            graph.store.update("CLEAR ALL")
+            d.update("CLEAR ALL")
         except Exception as e:
             if "SPARQLStore does not support BNodes! " in str(e):
                 pass
             else:
                 raise Exception(e)
-        graph.close()
+        d.close()
     else:
-        graph.close()
-        graph.destroy(path)
+        d.close()
+        d.destroy(path)
         if os.path.isdir(path):
             shutil.rmtree(path)
         else:
@@ -121,7 +128,7 @@ def test_graph_aware(get_dataset):
     # empty named graphs
     if store != "SPARQLUpdateStore":
         # added graph exists
-        assert set(x.identifier for x in dataset.contexts()) == set(
+        assert set(x.identifier for x in dataset.graphs()) == set(
             [CONTEXT1, DATASET_DEFAULT_GRAPH_ID]
         )
 
@@ -131,7 +138,7 @@ def test_graph_aware(get_dataset):
     g1.add((TAREK, LIKES, PIZZA))
 
     # added graph still exists
-    assert set(x.identifier for x in dataset.contexts()) == set(
+    assert set(x.identifier for x in dataset.graphs()) == set(
         [CONTEXT1, DATASET_DEFAULT_GRAPH_ID]
     )
 
@@ -147,14 +154,14 @@ def test_graph_aware(get_dataset):
     # empty named graphs
     if store != "SPARQLUpdateStore":
         # graph still exists, although empty
-        assert set(x.identifier for x in dataset.contexts()) == set(
+        assert set(x.identifier for x in dataset.graphs()) == set(
             [CONTEXT1, DATASET_DEFAULT_GRAPH_ID]
         )
 
     dataset.remove_graph(CONTEXT1)
 
     # graph is gone
-    assert set(x.identifier for x in dataset.contexts()) == set(
+    assert set(x.identifier for x in dataset.graphs()) == set(
         [DATASET_DEFAULT_GRAPH_ID]
     )
 
@@ -173,7 +180,7 @@ def test_default_graph(get_dataset):
     dataset.add((TAREK, LIKES, PIZZA))
     assert len(dataset) == 1
     # only default exists
-    assert list(dataset.contexts()) == [dataset.default_context]
+    assert list(dataset.graphs()) == [dataset.default_context]
 
     # removing default graph removes triples but not actual graph
     dataset.remove_graph(DATASET_DEFAULT_GRAPH_ID)
@@ -181,7 +188,7 @@ def test_default_graph(get_dataset):
     assert len(dataset) == 0
 
     # default still exists
-    assert set(dataset.contexts()) == set([dataset.default_context])
+    assert set(dataset.graphs()) == set([dataset.default_context])
 
 
 def test_not_union(get_dataset):
@@ -193,11 +200,11 @@ def test_not_union(get_dataset):
             "its default graph as the union of the named graphs"
         )
 
-    subgraph1 = dataset.graph(CONTEXT1)
-    subgraph1.add((TAREK, LIKES, PIZZA))
+    g1 = dataset.graph(CONTEXT1)
+    g1.add((TAREK, LIKES, PIZZA))
 
     assert list(dataset.objects(TAREK, None)) == []
-    assert list(subgraph1.objects(TAREK, None)) == [PIZZA]
+    assert list(g1.objects(TAREK, None)) == [PIZZA]
 
 
 def test_iter(get_dataset):
@@ -208,16 +215,16 @@ def test_iter(get_dataset):
     uri_c = URIRef("https://example.com/c")
     uri_d = URIRef("https://example.com/d")
 
-    d.graph(URIRef("https://example.com/subgraph1"))
-    d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/subgraph1")))
+    d.graph(URIRef("https://example.com/g1"))
+    d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1")))
 
     d.add(
-        (uri_a, uri_b, uri_c, URIRef("https://example.com/subgraph1"))
+        (uri_a, uri_b, uri_c, URIRef("https://example.com/g1"))
     )  # pointless addition: duplicates above
 
     d.graph(URIRef("https://example.com/g2"))
     d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2")))
-    d.add((uri_a, uri_b, uri_d, URIRef("https://example.com/subgraph1")))
+    d.add((uri_a, uri_b, uri_d, URIRef("https://example.com/g1")))
 
     # traditional iterator
     i_trad = 0
@@ -232,7 +239,7 @@ def test_iter(get_dataset):
     assert i_new == i_trad  # both should be 3
 
 
-def test_subgraph_without_identifier() -> None:
+def test_graph_without_identifier() -> None:
     """
     Graphs with no identifies assigned are identified by Skolem IRIs with a
     prefix that is bound to `genid`.
@@ -241,9 +248,9 @@ def test_subgraph_without_identifier() -> None:
     reviewed at some point.
     """
 
-    dataset = Dataset()
+    d = Dataset()
 
-    nman = dataset.namespace_manager
+    nman = d.namespace_manager
 
     genid_prefix = URIRef("https://rdflib.github.io/.well-known/genid/rdflib/")
 
@@ -253,15 +260,36 @@ def test_subgraph_without_identifier() -> None:
         is None
     )
 
-    subgraph: Graph = dataset.graph()
-    subgraph.add((EGSCHEME["subject"], EGSCHEME["predicate"], EGSCHEME["object"]))
+    ex = Namespace("http://example.com/")
+    g1: Graph = d.graph()
+    g1.add((ex.subject, ex.predicate, ex.object))
 
     namespaces = set(nman.namespaces())
     assert next(
         (namespace for namespace in namespaces if namespace[0] == "genid"), None
     ) == ("genid", genid_prefix)
 
-    assert f"{subgraph.identifier}".startswith(genid_prefix)
+    assert f"{g1.identifier}".startswith(genid_prefix)
+
+    # now add a preexisting graph with no identifier
+    # i.e. not one created within this Dataset object
+    g2 = Graph()
+    g2.add((ex.subject, ex.predicate, ex.object))
+    d.add_graph(g2)
+
+    iris = 0
+    bns = 0
+    others = 0
+    for g in d.graphs():
+        if type(g.identifier) is URIRef:
+            iris += 1
+        elif type(g.identifier) is BNode:
+            bns += 1
+        else:
+            others += 1
+    assert iris == 2
+    assert bns == 1
+    assert others == 0
 
 
 def test_not_deprecated():