diff --git a/rdflib/graph.py b/rdflib/graph.py
index 857491a2e..f9e00f2f9 100644
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -355,6 +355,11 @@
_TripleOrQuadSelectorType = Union["_TripleSelectorType", "_QuadSelectorType"]
_TriplePathType = Tuple["_SubjectType", Path, "_ObjectType"]
_TripleOrTriplePathType = Union["_TripleType", "_TriplePathType"]
+_TripleChoiceType = Union[
+ Tuple[List[_SubjectType], Optional[_PredicateType], Optional[_ObjectType]],
+ Tuple[Optional[_SubjectType], List[_PredicateType], Optional[_ObjectType]],
+ Tuple[Optional[_SubjectType], Optional[_PredicateType], List[_ObjectType]],
+]
_GraphT = TypeVar("_GraphT", bound="Graph")
_ConjunctiveGraphT = TypeVar("_ConjunctiveGraphT", bound="ConjunctiveGraph")
@@ -994,11 +999,7 @@ def predicate_objects(
def triples_choices(
self,
- triple: Union[
- Tuple[List[_SubjectType], _PredicateType, _ObjectType],
- Tuple[_SubjectType, List[_PredicateType], _ObjectType],
- Tuple[_SubjectType, _PredicateType, List[_ObjectType]],
- ],
+ triple: _TripleChoiceType,
context: Optional[_ContextType] = None,
) -> Generator[_TripleType, None, None]:
subject, predicate, object_ = triple
@@ -2196,11 +2197,7 @@ def quads(
def triples_choices(
self,
- triple: Union[
- Tuple[List[_SubjectType], _PredicateType, _ObjectType],
- Tuple[_SubjectType, List[_PredicateType], _ObjectType],
- Tuple[_SubjectType, _PredicateType, List[_ObjectType]],
- ],
+ triple: _TripleChoiceType,
context: Optional[_ContextType] = None,
) -> Generator[_TripleType, None, None]:
"""Iterate over all the triples in the entire conjunctive graph"""
@@ -2946,11 +2943,7 @@ def __isub__(self: _GraphT, other: Iterable[_TripleType]) -> NoReturn:
def triples_choices(
self,
- triple: Union[
- Tuple[List[_SubjectType], _PredicateType, _ObjectType],
- Tuple[_SubjectType, List[_PredicateType], _ObjectType],
- Tuple[_SubjectType, _PredicateType, List[_ObjectType]],
- ],
+ triple: _TripleChoiceType,
context: Optional[_ContextType] = None,
) -> Generator[_TripleType, None, None]:
subject, predicate, object_ = triple
diff --git a/rdflib/plugins/parsers/rdfxml.py b/rdflib/plugins/parsers/rdfxml.py
index e0f6e05fa..54fc69567 100644
--- a/rdflib/plugins/parsers/rdfxml.py
+++ b/rdflib/plugins/parsers/rdfxml.py
@@ -298,7 +298,8 @@ def document_element_start(
self, name: Tuple[str, str], qname, attrs: AttributesImpl
) -> None:
if name[0] and URIRef("".join(name)) == RDFVOC.RDF:
- next = self.next
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, "next")
next.start = self.node_element_start
next.end = self.node_element_end
else:
@@ -315,7 +316,8 @@ def node_element_start(
current = self.current
absolutize = self.absolutize
- next = self.next
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, "next")
next.start = self.property_element_start
next.end = self.property_element_end
@@ -408,7 +410,8 @@ def property_element_start(
current = self.current
absolutize = self.absolutize
- next = self.next
+ # Cheap hack so 2to3 doesn't turn it into __next__
+ next = getattr(self, "next")
object: Optional[_ObjectType] = None
current.data = None
current.list = None
diff --git a/rdflib/plugins/serializers/longturtle.py b/rdflib/plugins/serializers/longturtle.py
index 8de1e52a2..2aaed36e6 100644
--- a/rdflib/plugins/serializers/longturtle.py
+++ b/rdflib/plugins/serializers/longturtle.py
@@ -39,21 +39,20 @@
class LongTurtleSerializer(RecursiveSerializer):
+ """LongTurtle, a Turtle serialization format.
+
+ When the optional parameter ``canon`` is set to :py:obj:`True`, the graph is canonicalized
+ before serialization. This normalizes blank node identifiers and allows for
+ deterministic serialization of the graph. Useful when consistent outputs are required.
+ """
+
short_name = "longturtle"
indentString = " "
def __init__(self, store):
self._ns_rewrite = {}
- store = to_canonical_graph(store)
- content = store.serialize(format="application/n-triples")
- lines = content.split("\n")
- lines.sort()
- graph = Graph()
- graph.parse(
- data="\n".join(lines), format="application/n-triples", skolemize=True
- )
- graph = graph.de_skolemize()
- super(LongTurtleSerializer, self).__init__(graph)
+ self._canon = False
+ super(LongTurtleSerializer, self).__init__(store)
self.keywords = {RDF.type: "a"}
self.reset()
self.stream = None
@@ -83,11 +82,34 @@ def addNamespace(self, prefix, namespace):
super(LongTurtleSerializer, self).addNamespace(prefix, namespace)
return prefix
+ def canonize(self):
+ """Apply canonicalization to the store.
+
+ This normalizes blank node identifiers and allows for deterministic
+ serialization of the graph.
+ """
+ if not self._canon:
+ return
+
+ namespace_manager = self.store.namespace_manager
+ store = to_canonical_graph(self.store)
+ content = store.serialize(format="application/n-triples")
+ lines = content.split("\n")
+ lines.sort()
+ graph = Graph()
+ graph.parse(
+ data="\n".join(lines), format="application/n-triples", skolemize=True
+ )
+ graph = graph.de_skolemize()
+ graph.namespace_manager = namespace_manager
+ self.store = graph
+
def reset(self):
super(LongTurtleSerializer, self).reset()
self._shortNames = {}
self._started = False
self._ns_rewrite = {}
+ self.canonize()
def serialize(
self,
@@ -97,6 +119,7 @@ def serialize(
spacious: Optional[bool] = None,
**kwargs: Any,
) -> None:
+ self._canon = kwargs.get("canon", False)
self.reset()
self.stream = stream
# if base is given here, use, if not and a base is set for the graph use that
diff --git a/rdflib/plugins/shared/jsonld/context.py b/rdflib/plugins/shared/jsonld/context.py
index e6b668878..325ee38ce 100644
--- a/rdflib/plugins/shared/jsonld/context.py
+++ b/rdflib/plugins/shared/jsonld/context.py
@@ -8,14 +8,13 @@
# https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/context.py
from __future__ import annotations
-from collections import namedtuple
+from collections.abc import Collection, Generator
from typing import (
TYPE_CHECKING,
Any,
- Collection,
Dict,
- Generator,
List,
+ NamedTuple,
Optional,
Set,
Tuple,
@@ -74,27 +73,47 @@ class Defined(int):
class Context:
+ """
+ A JSON-LD context, which contains term definitions
+ """
+
+ _base: str | None
+ #: _alias maps NODE_KEY to list of aliases
+ _alias: dict[str, list[str]]
+ _lookup: dict[tuple[str, Any, Defined | str, bool], Term]
+ _prefixes: dict[str, Any]
+ _context_cache: dict[str, Any]
+
+ version: float
+ language: str | None
+ doc_base: str | None
+ vocab: str | None
+ active: bool
+ propagate: bool
+ terms: dict[str, Any]
+ parent: Context | None
+
def __init__(
self,
source: _ContextSourceType = None,
base: Optional[str] = None,
version: Optional[float] = 1.1,
):
- self.version: float = version or 1.1
+ self._alias = {}
+ self._lookup = {}
+ self._prefixes = {}
+ self._context_cache = {}
+
+ self.version = version or 1.1
self.language = None
- self.vocab: Optional[str] = None
- self._base: Optional[str]
- self.base = base
self.doc_base = base
- self.terms: Dict[str, Any] = {}
- # _alias maps NODE_KEY to list of aliases
- self._alias: Dict[str, List[str]] = {}
- self._lookup: Dict[Tuple[str, Any, Union[Defined, str], bool], Term] = {}
- self._prefixes: Dict[str, Any] = {}
self.active = False
- self.parent: Optional[Context] = None
self.propagate = True
- self._context_cache: Dict[str, Any] = {}
+ self.vocab = None
+ self.base = base
+ self.terms = {}
+ self.parent = None
+
if source:
self.load(source)
@@ -668,9 +687,39 @@ def to_dict(self) -> Dict[str, Any]:
return r
-Term = namedtuple(
- "Term",
- "id, name, type, container, index, language, reverse, context," "prefix, protected",
-)
-
-Term.__new__.__defaults__ = (UNDEF, UNDEF, UNDEF, UNDEF, False, UNDEF, False, False)
+class Term(NamedTuple):
+ """
+ Describes how a JSON key should be interpreted when parsed as RDF
+ """
+
+ #: The IRI or CURIE of the term.
+ id: str
+ #: The name of the term, ie an alias for the id.
+ name: str
+ #: The type of the term, such as @id, @json, @none or @vocab
+ type: Defined | str = UNDEF
+ #: The container type, such as @graph, @id, @index, @language, @list, @set or @type,
+ container: Collection[Any] | str | Defined = UNDEF
+ #: A predicate IRI that should be used to interpret keys of this object,
+ #: when used alongside `@container: @index`.
+ #: See https://www.w3.org/TR/json-ld11/#property-based-data-indexing
+ #: Ideally this wouldn't be called 'index' as it overrides the tuple's builtin index() method
+ #: Hence the pyright ignore comment
+ index: str | Defined | None = (
+ None # pyright: ignore[reportIncompatibleMethodOverride]
+ )
+ #: The language to be used for values of this term
+ language: str | Defined | None = UNDEF
+ #: Indicates that this term is a reverse property, so subject and object are swapped.
+ #: https://www.w3.org/TR/json-ld11/#reverse-properties
+ reverse: bool = False
+ #: A scoped context used inside values that use this term.
+ #: See https://www.w3.org/TR/json-ld11/#scoped-contexts
+ context: Any = UNDEF
+ #: If true, indicates that this should be used during compaction.
+ #: If false, indicates that this term cannot be used in compaction.
+ #: See https://www.w3.org/TR/json-ld11/#compact-iris
+ prefix: bool | None = None
+ #: If true, marks the term as protected, meaning it cannot be overridden by a subcontext.
+ #: See https://www.w3.org/TR/json-ld11/#protected-term-definitions
+ protected: bool = False
diff --git a/rdflib/plugins/stores/berkeleydb.py b/rdflib/plugins/stores/berkeleydb.py
index 872dc368e..12009787c 100644
--- a/rdflib/plugins/stores/berkeleydb.py
+++ b/rdflib/plugins/stores/berkeleydb.py
@@ -428,7 +428,8 @@ def remove( # type: ignore[override]
cursor = index.cursor(txn=txn)
try:
cursor.set_range(key)
- current = cursor.next
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, "next")()
except db.DBNotFoundError:
current = None
cursor.close()
@@ -505,7 +506,8 @@ def triples(
cursor = index.cursor(txn=txn)
try:
cursor.set_range(key)
- current = cursor.next
+ # Cheap hack so 2to3 doesn't convert to next(cursor)
+ current = getattr(cursor, "next")()
except db.DBNotFoundError:
current = None
cursor.close()
@@ -537,7 +539,8 @@ def __len__(self, context: Optional[_ContextType] = None) -> int:
key, value = current
if key.startswith(prefix):
count += 1
- current = cursor.next
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, "next")()
else:
break
cursor.close()
@@ -590,7 +593,8 @@ def namespaces(self) -> Generator[Tuple[str, URIRef], None, None]:
while current:
prefix, namespace = current
results.append((prefix.decode("utf-8"), namespace.decode("utf-8")))
- current = cursor.next
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, "next")()
cursor.close()
for prefix, namespace in results:
yield prefix, URIRef(namespace)
@@ -633,7 +637,8 @@ def contexts(
cursor = index.cursor()
try:
cursor.set_range(key)
- current = cursor.next
+ # Hack to stop 2to3 converting this to next(cursor)
+ current = getattr(cursor, "next")()
except db.DBNotFoundError:
current = None
cursor.close()
diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py
index f9827cf94..e7a9723e8 100644
--- a/rdflib/plugins/stores/sparqlstore.py
+++ b/rdflib/plugins/stores/sparqlstore.py
@@ -35,6 +35,7 @@
_TripleType,
_ContextType,
_QuadType,
+ _TripleChoiceType,
_TriplePatternType,
_SubjectType,
_PredicateType,
@@ -367,11 +368,7 @@ def triples( # type: ignore[override]
def triples_choices(
self,
- _: Tuple[
- Union[_SubjectType, List[_SubjectType]],
- Union[_PredicateType, List[_PredicateType]],
- Union[_ObjectType, List[_ObjectType]],
- ],
+ _: _TripleChoiceType,
context: Optional[_ContextType] = None,
) -> Generator[
Tuple[
diff --git a/rdflib/store.py b/rdflib/store.py
index 2ca03529a..9cada631d 100644
--- a/rdflib/store.py
+++ b/rdflib/store.py
@@ -36,7 +36,6 @@
Generator,
Iterable,
Iterator,
- List,
Mapping,
Optional,
Tuple,
@@ -49,10 +48,8 @@
from rdflib.graph import (
Graph,
_ContextType,
- _ObjectType,
- _PredicateType,
_QuadType,
- _SubjectType,
+ _TripleChoiceType,
_TriplePatternType,
_TripleType,
)
@@ -281,11 +278,7 @@ def remove(
def triples_choices(
self,
- triple: Union[
- Tuple[List[_SubjectType], _PredicateType, _ObjectType],
- Tuple[_SubjectType, List[_PredicateType], _ObjectType],
- Tuple[_SubjectType, _PredicateType, List[_ObjectType]],
- ],
+ triple: _TripleChoiceType,
context: Optional[_ContextType] = None,
) -> Generator[
Tuple[
diff --git a/test/data/longturtle/longturtle-target.ttl b/test/data/longturtle/longturtle-target.ttl
index 54cf23e9f..b9df06e75 100644
--- a/test/data/longturtle/longturtle-target.ttl
+++ b/test/data/longturtle/longturtle-target.ttl
@@ -1,72 +1,74 @@
+PREFIX cn:
+PREFIX ex:
PREFIX geo:
PREFIX rdf:
-PREFIX schema:
+PREFIX sdo:
PREFIX xsd:
-
- a schema:Person ;
- schema:age 41 ;
- schema:alternateName
+ex:nicholas
+ a sdo:Person ;
+ sdo:age 41 ;
+ sdo:alternateName
[
- schema:name "Dr N.J. Car" ;
+ sdo:name "Dr N.J. Car" ;
] ,
"N.J. Car" ,
"Nick Car" ;
- schema:name
+ sdo:name
[
- a ;
- schema:hasPart
+ a cn:CompoundName ;
+ sdo:hasPart
[
- a ;
- schema:hasPart
+ a cn:CompoundName ;
+ sdo:hasPart
[
- a ;
+ a cn:CompoundName ;
rdf:value "Car" ;
] ,
[
- a ;
+ a cn:CompoundName ;
rdf:value "Maxov" ;
] ;
] ,
[
- a ;
+ a cn:CompoundName ;
rdf:value "Nicholas" ;
] ,
[
- a ;
+ a cn:CompoundName ;
rdf:value "John" ;
] ;
] ;
- schema:worksFor ;
+ sdo:worksFor ;
.
- a schema:Organization ;
- schema:location ;
+ a sdo:Organization ;
+ sdo:location ;
.
- a schema:Place ;
- schema:address
+ a sdo:Place ;
+ sdo:address
[
- a schema:PostalAddress ;
- schema:addressCountry
+ a sdo:PostalAddress ;
+ sdo:addressCountry
[
- schema:identifier "au" ;
- schema:name "Australia" ;
+ sdo:identifier "au" ;
+ sdo:name "Australia" ;
] ;
- schema:addressLocality "Shorncliffe" ;
- schema:addressRegion "QLD" ;
- schema:postalCode 4017 ;
- schema:streetAddress (
+ sdo:addressLocality "Shorncliffe" ;
+ sdo:addressRegion "QLD" ;
+ sdo:postalCode 4017 ;
+ sdo:streetAddress (
72
"Yundah"
"Street"
) ;
] ;
- schema:geo
+ sdo:geo
[
- schema:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
+ sdo:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ;
] ;
- schema:name "KurrawongAI HQ" ;
+ sdo:name "KurrawongAI HQ" ;
.
diff --git a/test/test_graph/test_graph.py b/test/test_graph/test_graph.py
index 639aa710c..0e8227042 100644
--- a/test/test_graph/test_graph.py
+++ b/test/test_graph/test_graph.py
@@ -399,7 +399,7 @@ def test_guess_format_for_parse_http_text_plain():
assert len(graph) > 0
# A url that returns content-type text/html.
- url = "https://github.com/RDFLib/rdflib/issues/2734"
+ url = "https://www.w3.org/TR/REC-rdf-syntax/"
with pytest.raises(PluginException):
graph = Graph().parse(url)
diff --git a/test/test_serializers/test_serializer_longturtle.py b/test/test_serializers/test_serializer_longturtle.py
index c1761b6da..65821784e 100644
--- a/test/test_serializers/test_serializer_longturtle.py
+++ b/test/test_serializers/test_serializer_longturtle.py
@@ -167,7 +167,7 @@ def test_longturtle():
g.bind("sdo", SDO)
# run the long turtle serializer
- output = g.serialize(format="longturtle")
+ output = g.serialize(format="longturtle", canon=True)
# fix the target
current_dir = Path.cwd() # Get the current directory
diff --git a/test/test_serializers/test_serializer_longturtle_sort.py b/test/test_serializers/test_serializer_longturtle_sort.py
index 0e397afaf..044660e3e 100644
--- a/test/test_serializers/test_serializer_longturtle_sort.py
+++ b/test/test_serializers/test_serializer_longturtle_sort.py
@@ -62,55 +62,55 @@ def test_sort_semiblank_graph() -> None:
graph.add((outer_node, EX.has, inner_node))
graph.add((inner_node, RDFS.seeAlso, nested))
- graph_text = graph.serialize(format="longturtle", sort=True)
+ graph_text = graph.serialize(format="longturtle", canon=True)
if first_graph_text == "":
first_graph_text = graph_text
serialization_counter[graph_text] += 1
expected_serialization = """\
-PREFIX ns1:
+PREFIX ex:
PREFIX rdfs:
-ns1:A
+ex:A
rdfs:comment "Thing A" ;
.
-ns1:C
+ex:C
rdfs:comment "Thing C" ;
.
-ns1:B
+ex:B
rdfs:comment "Thing B" ;
.
-[] ns1:has
+[] ex:has
[
- rdfs:seeAlso ns1:A ;
+ rdfs:seeAlso ex:A ;
] ;
.
-[] rdfs:seeAlso ns1:B ;
+[] rdfs:seeAlso ex:B ;
.
-[] ns1:has
+[] ex:has
[
- rdfs:seeAlso ns1:C ;
+ rdfs:seeAlso ex:C ;
] ;
.
-[] rdfs:seeAlso ns1:A ;
+[] rdfs:seeAlso ex:A ;
.
-[] rdfs:seeAlso ns1:C ;
+[] rdfs:seeAlso ex:C ;
.
-[] rdfs:seeAlso ns1:B ;
+[] rdfs:seeAlso ex:B ;
.
-[] ns1:has
+[] ex:has
[
- rdfs:seeAlso ns1:B ;
+ rdfs:seeAlso ex:B ;
] ;
.