diff --git a/rdflib/graph.py b/rdflib/graph.py index 857491a2e..f9e00f2f9 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -355,6 +355,11 @@ _TripleOrQuadSelectorType = Union["_TripleSelectorType", "_QuadSelectorType"] _TriplePathType = Tuple["_SubjectType", Path, "_ObjectType"] _TripleOrTriplePathType = Union["_TripleType", "_TriplePathType"] +_TripleChoiceType = Union[ + Tuple[List[_SubjectType], Optional[_PredicateType], Optional[_ObjectType]], + Tuple[Optional[_SubjectType], List[_PredicateType], Optional[_ObjectType]], + Tuple[Optional[_SubjectType], Optional[_PredicateType], List[_ObjectType]], +] _GraphT = TypeVar("_GraphT", bound="Graph") _ConjunctiveGraphT = TypeVar("_ConjunctiveGraphT", bound="ConjunctiveGraph") @@ -994,11 +999,7 @@ def predicate_objects( def triples_choices( self, - triple: Union[ - Tuple[List[_SubjectType], _PredicateType, _ObjectType], - Tuple[_SubjectType, List[_PredicateType], _ObjectType], - Tuple[_SubjectType, _PredicateType, List[_ObjectType]], - ], + triple: _TripleChoiceType, context: Optional[_ContextType] = None, ) -> Generator[_TripleType, None, None]: subject, predicate, object_ = triple @@ -2196,11 +2197,7 @@ def quads( def triples_choices( self, - triple: Union[ - Tuple[List[_SubjectType], _PredicateType, _ObjectType], - Tuple[_SubjectType, List[_PredicateType], _ObjectType], - Tuple[_SubjectType, _PredicateType, List[_ObjectType]], - ], + triple: _TripleChoiceType, context: Optional[_ContextType] = None, ) -> Generator[_TripleType, None, None]: """Iterate over all the triples in the entire conjunctive graph""" @@ -2946,11 +2943,7 @@ def __isub__(self: _GraphT, other: Iterable[_TripleType]) -> NoReturn: def triples_choices( self, - triple: Union[ - Tuple[List[_SubjectType], _PredicateType, _ObjectType], - Tuple[_SubjectType, List[_PredicateType], _ObjectType], - Tuple[_SubjectType, _PredicateType, List[_ObjectType]], - ], + triple: _TripleChoiceType, context: Optional[_ContextType] = None, ) -> Generator[_TripleType, None, None]: subject, predicate, object_ = triple diff --git a/rdflib/plugins/parsers/rdfxml.py b/rdflib/plugins/parsers/rdfxml.py index e0f6e05fa..54fc69567 100644 --- a/rdflib/plugins/parsers/rdfxml.py +++ b/rdflib/plugins/parsers/rdfxml.py @@ -298,7 +298,8 @@ def document_element_start( self, name: Tuple[str, str], qname, attrs: AttributesImpl ) -> None: if name[0] and URIRef("".join(name)) == RDFVOC.RDF: - next = self.next + # Cheap hack so 2to3 doesn't turn it into __next__ + next = getattr(self, "next") next.start = self.node_element_start next.end = self.node_element_end else: @@ -315,7 +316,8 @@ def node_element_start( current = self.current absolutize = self.absolutize - next = self.next + # Cheap hack so 2to3 doesn't turn it into __next__ + next = getattr(self, "next") next.start = self.property_element_start next.end = self.property_element_end @@ -408,7 +410,8 @@ def property_element_start( current = self.current absolutize = self.absolutize - next = self.next + # Cheap hack so 2to3 doesn't turn it into __next__ + next = getattr(self, "next") object: Optional[_ObjectType] = None current.data = None current.list = None diff --git a/rdflib/plugins/serializers/longturtle.py b/rdflib/plugins/serializers/longturtle.py index 8de1e52a2..2aaed36e6 100644 --- a/rdflib/plugins/serializers/longturtle.py +++ b/rdflib/plugins/serializers/longturtle.py @@ -39,21 +39,20 @@ class LongTurtleSerializer(RecursiveSerializer): + """LongTurtle, a Turtle serialization format. + + When the optional parameter ``canon`` is set to :py:obj:`True`, the graph is canonicalized + before serialization. This normalizes blank node identifiers and allows for + deterministic serialization of the graph. Useful when consistent outputs are required. + """ + short_name = "longturtle" indentString = " " def __init__(self, store): self._ns_rewrite = {} - store = to_canonical_graph(store) - content = store.serialize(format="application/n-triples") - lines = content.split("\n") - lines.sort() - graph = Graph() - graph.parse( - data="\n".join(lines), format="application/n-triples", skolemize=True - ) - graph = graph.de_skolemize() - super(LongTurtleSerializer, self).__init__(graph) + self._canon = False + super(LongTurtleSerializer, self).__init__(store) self.keywords = {RDF.type: "a"} self.reset() self.stream = None @@ -83,11 +82,34 @@ def addNamespace(self, prefix, namespace): super(LongTurtleSerializer, self).addNamespace(prefix, namespace) return prefix + def canonize(self): + """Apply canonicalization to the store. + + This normalizes blank node identifiers and allows for deterministic + serialization of the graph. + """ + if not self._canon: + return + + namespace_manager = self.store.namespace_manager + store = to_canonical_graph(self.store) + content = store.serialize(format="application/n-triples") + lines = content.split("\n") + lines.sort() + graph = Graph() + graph.parse( + data="\n".join(lines), format="application/n-triples", skolemize=True + ) + graph = graph.de_skolemize() + graph.namespace_manager = namespace_manager + self.store = graph + def reset(self): super(LongTurtleSerializer, self).reset() self._shortNames = {} self._started = False self._ns_rewrite = {} + self.canonize() def serialize( self, @@ -97,6 +119,7 @@ def serialize( spacious: Optional[bool] = None, **kwargs: Any, ) -> None: + self._canon = kwargs.get("canon", False) self.reset() self.stream = stream # if base is given here, use, if not and a base is set for the graph use that diff --git a/rdflib/plugins/shared/jsonld/context.py b/rdflib/plugins/shared/jsonld/context.py index e6b668878..325ee38ce 100644 --- a/rdflib/plugins/shared/jsonld/context.py +++ b/rdflib/plugins/shared/jsonld/context.py @@ -8,14 +8,13 @@ # https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/context.py from __future__ import annotations -from collections import namedtuple +from collections.abc import Collection, Generator from typing import ( TYPE_CHECKING, Any, - Collection, Dict, - Generator, List, + NamedTuple, Optional, Set, Tuple, @@ -74,27 +73,47 @@ class Defined(int): class Context: + """ + A JSON-LD context, which contains term definitions + """ + + _base: str | None + #: _alias maps NODE_KEY to list of aliases + _alias: dict[str, list[str]] + _lookup: dict[tuple[str, Any, Defined | str, bool], Term] + _prefixes: dict[str, Any] + _context_cache: dict[str, Any] + + version: float + language: str | None + doc_base: str | None + vocab: str | None + active: bool + propagate: bool + terms: dict[str, Any] + parent: Context | None + def __init__( self, source: _ContextSourceType = None, base: Optional[str] = None, version: Optional[float] = 1.1, ): - self.version: float = version or 1.1 + self._alias = {} + self._lookup = {} + self._prefixes = {} + self._context_cache = {} + + self.version = version or 1.1 self.language = None - self.vocab: Optional[str] = None - self._base: Optional[str] - self.base = base self.doc_base = base - self.terms: Dict[str, Any] = {} - # _alias maps NODE_KEY to list of aliases - self._alias: Dict[str, List[str]] = {} - self._lookup: Dict[Tuple[str, Any, Union[Defined, str], bool], Term] = {} - self._prefixes: Dict[str, Any] = {} self.active = False - self.parent: Optional[Context] = None self.propagate = True - self._context_cache: Dict[str, Any] = {} + self.vocab = None + self.base = base + self.terms = {} + self.parent = None + if source: self.load(source) @@ -668,9 +687,39 @@ def to_dict(self) -> Dict[str, Any]: return r -Term = namedtuple( - "Term", - "id, name, type, container, index, language, reverse, context," "prefix, protected", -) - -Term.__new__.__defaults__ = (UNDEF, UNDEF, UNDEF, UNDEF, False, UNDEF, False, False) +class Term(NamedTuple): + """ + Describes how a JSON key should be interpreted when parsed as RDF + """ + + #: The IRI or CURIE of the term. + id: str + #: The name of the term, ie an alias for the id. + name: str + #: The type of the term, such as @id, @json, @none or @vocab + type: Defined | str = UNDEF + #: The container type, such as @graph, @id, @index, @language, @list, @set or @type, + container: Collection[Any] | str | Defined = UNDEF + #: A predicate IRI that should be used to interpret keys of this object, + #: when used alongside `@container: @index`. + #: See https://www.w3.org/TR/json-ld11/#property-based-data-indexing + #: Ideally this wouldn't be called 'index' as it overrides the tuple's builtin index() method + #: Hence the pyright ignore comment + index: str | Defined | None = ( + None # pyright: ignore[reportIncompatibleMethodOverride] + ) + #: The language to be used for values of this term + language: str | Defined | None = UNDEF + #: Indicates that this term is a reverse property, so subject and object are swapped. + #: https://www.w3.org/TR/json-ld11/#reverse-properties + reverse: bool = False + #: A scoped context used inside values that use this term. + #: See https://www.w3.org/TR/json-ld11/#scoped-contexts + context: Any = UNDEF + #: If true, indicates that this should be used during compaction. + #: If false, indicates that this term cannot be used in compaction. + #: See https://www.w3.org/TR/json-ld11/#compact-iris + prefix: bool | None = None + #: If true, marks the term as protected, meaning it cannot be overridden by a subcontext. + #: See https://www.w3.org/TR/json-ld11/#protected-term-definitions + protected: bool = False diff --git a/rdflib/plugins/stores/berkeleydb.py b/rdflib/plugins/stores/berkeleydb.py index 872dc368e..12009787c 100644 --- a/rdflib/plugins/stores/berkeleydb.py +++ b/rdflib/plugins/stores/berkeleydb.py @@ -428,7 +428,8 @@ def remove( # type: ignore[override] cursor = index.cursor(txn=txn) try: cursor.set_range(key) - current = cursor.next + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, "next")() except db.DBNotFoundError: current = None cursor.close() @@ -505,7 +506,8 @@ def triples( cursor = index.cursor(txn=txn) try: cursor.set_range(key) - current = cursor.next + # Cheap hack so 2to3 doesn't convert to next(cursor) + current = getattr(cursor, "next")() except db.DBNotFoundError: current = None cursor.close() @@ -537,7 +539,8 @@ def __len__(self, context: Optional[_ContextType] = None) -> int: key, value = current if key.startswith(prefix): count += 1 - current = cursor.next + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, "next")() else: break cursor.close() @@ -590,7 +593,8 @@ def namespaces(self) -> Generator[Tuple[str, URIRef], None, None]: while current: prefix, namespace = current results.append((prefix.decode("utf-8"), namespace.decode("utf-8"))) - current = cursor.next + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, "next")() cursor.close() for prefix, namespace in results: yield prefix, URIRef(namespace) @@ -633,7 +637,8 @@ def contexts( cursor = index.cursor() try: cursor.set_range(key) - current = cursor.next + # Hack to stop 2to3 converting this to next(cursor) + current = getattr(cursor, "next")() except db.DBNotFoundError: current = None cursor.close() diff --git a/rdflib/plugins/stores/sparqlstore.py b/rdflib/plugins/stores/sparqlstore.py index f9827cf94..e7a9723e8 100644 --- a/rdflib/plugins/stores/sparqlstore.py +++ b/rdflib/plugins/stores/sparqlstore.py @@ -35,6 +35,7 @@ _TripleType, _ContextType, _QuadType, + _TripleChoiceType, _TriplePatternType, _SubjectType, _PredicateType, @@ -367,11 +368,7 @@ def triples( # type: ignore[override] def triples_choices( self, - _: Tuple[ - Union[_SubjectType, List[_SubjectType]], - Union[_PredicateType, List[_PredicateType]], - Union[_ObjectType, List[_ObjectType]], - ], + _: _TripleChoiceType, context: Optional[_ContextType] = None, ) -> Generator[ Tuple[ diff --git a/rdflib/store.py b/rdflib/store.py index 2ca03529a..9cada631d 100644 --- a/rdflib/store.py +++ b/rdflib/store.py @@ -36,7 +36,6 @@ Generator, Iterable, Iterator, - List, Mapping, Optional, Tuple, @@ -49,10 +48,8 @@ from rdflib.graph import ( Graph, _ContextType, - _ObjectType, - _PredicateType, _QuadType, - _SubjectType, + _TripleChoiceType, _TriplePatternType, _TripleType, ) @@ -281,11 +278,7 @@ def remove( def triples_choices( self, - triple: Union[ - Tuple[List[_SubjectType], _PredicateType, _ObjectType], - Tuple[_SubjectType, List[_PredicateType], _ObjectType], - Tuple[_SubjectType, _PredicateType, List[_ObjectType]], - ], + triple: _TripleChoiceType, context: Optional[_ContextType] = None, ) -> Generator[ Tuple[ diff --git a/test/data/longturtle/longturtle-target.ttl b/test/data/longturtle/longturtle-target.ttl index 54cf23e9f..b9df06e75 100644 --- a/test/data/longturtle/longturtle-target.ttl +++ b/test/data/longturtle/longturtle-target.ttl @@ -1,72 +1,74 @@ +PREFIX cn: +PREFIX ex: PREFIX geo: PREFIX rdf: -PREFIX schema: +PREFIX sdo: PREFIX xsd: - - a schema:Person ; - schema:age 41 ; - schema:alternateName +ex:nicholas + a sdo:Person ; + sdo:age 41 ; + sdo:alternateName [ - schema:name "Dr N.J. Car" ; + sdo:name "Dr N.J. Car" ; ] , "N.J. Car" , "Nick Car" ; - schema:name + sdo:name [ - a ; - schema:hasPart + a cn:CompoundName ; + sdo:hasPart [ - a ; - schema:hasPart + a cn:CompoundName ; + sdo:hasPart [ - a ; + a cn:CompoundName ; rdf:value "Car" ; ] , [ - a ; + a cn:CompoundName ; rdf:value "Maxov" ; ] ; ] , [ - a ; + a cn:CompoundName ; rdf:value "Nicholas" ; ] , [ - a ; + a cn:CompoundName ; rdf:value "John" ; ] ; ] ; - schema:worksFor ; + sdo:worksFor ; . - a schema:Organization ; - schema:location ; + a sdo:Organization ; + sdo:location ; . - a schema:Place ; - schema:address + a sdo:Place ; + sdo:address [ - a schema:PostalAddress ; - schema:addressCountry + a sdo:PostalAddress ; + sdo:addressCountry [ - schema:identifier "au" ; - schema:name "Australia" ; + sdo:identifier "au" ; + sdo:name "Australia" ; ] ; - schema:addressLocality "Shorncliffe" ; - schema:addressRegion "QLD" ; - schema:postalCode 4017 ; - schema:streetAddress ( + sdo:addressLocality "Shorncliffe" ; + sdo:addressRegion "QLD" ; + sdo:postalCode 4017 ; + sdo:streetAddress ( 72 "Yundah" "Street" ) ; ] ; - schema:geo + sdo:geo [ - schema:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ; + sdo:polygon "POLYGON((153.082403 -27.325801, 153.08241 -27.32582, 153.082943 -27.325612, 153.083010 -27.325742, 153.083543 -27.325521, 153.083456 -27.325365, 153.082403 -27.325801))"^^geo:wktLiteral ; ] ; - schema:name "KurrawongAI HQ" ; + sdo:name "KurrawongAI HQ" ; . diff --git a/test/test_graph/test_graph.py b/test/test_graph/test_graph.py index 639aa710c..0e8227042 100644 --- a/test/test_graph/test_graph.py +++ b/test/test_graph/test_graph.py @@ -399,7 +399,7 @@ def test_guess_format_for_parse_http_text_plain(): assert len(graph) > 0 # A url that returns content-type text/html. - url = "https://github.com/RDFLib/rdflib/issues/2734" + url = "https://www.w3.org/TR/REC-rdf-syntax/" with pytest.raises(PluginException): graph = Graph().parse(url) diff --git a/test/test_serializers/test_serializer_longturtle.py b/test/test_serializers/test_serializer_longturtle.py index c1761b6da..65821784e 100644 --- a/test/test_serializers/test_serializer_longturtle.py +++ b/test/test_serializers/test_serializer_longturtle.py @@ -167,7 +167,7 @@ def test_longturtle(): g.bind("sdo", SDO) # run the long turtle serializer - output = g.serialize(format="longturtle") + output = g.serialize(format="longturtle", canon=True) # fix the target current_dir = Path.cwd() # Get the current directory diff --git a/test/test_serializers/test_serializer_longturtle_sort.py b/test/test_serializers/test_serializer_longturtle_sort.py index 0e397afaf..044660e3e 100644 --- a/test/test_serializers/test_serializer_longturtle_sort.py +++ b/test/test_serializers/test_serializer_longturtle_sort.py @@ -62,55 +62,55 @@ def test_sort_semiblank_graph() -> None: graph.add((outer_node, EX.has, inner_node)) graph.add((inner_node, RDFS.seeAlso, nested)) - graph_text = graph.serialize(format="longturtle", sort=True) + graph_text = graph.serialize(format="longturtle", canon=True) if first_graph_text == "": first_graph_text = graph_text serialization_counter[graph_text] += 1 expected_serialization = """\ -PREFIX ns1: +PREFIX ex: PREFIX rdfs: -ns1:A +ex:A rdfs:comment "Thing A" ; . -ns1:C +ex:C rdfs:comment "Thing C" ; . -ns1:B +ex:B rdfs:comment "Thing B" ; . -[] ns1:has +[] ex:has [ - rdfs:seeAlso ns1:A ; + rdfs:seeAlso ex:A ; ] ; . -[] rdfs:seeAlso ns1:B ; +[] rdfs:seeAlso ex:B ; . -[] ns1:has +[] ex:has [ - rdfs:seeAlso ns1:C ; + rdfs:seeAlso ex:C ; ] ; . -[] rdfs:seeAlso ns1:A ; +[] rdfs:seeAlso ex:A ; . -[] rdfs:seeAlso ns1:C ; +[] rdfs:seeAlso ex:C ; . -[] rdfs:seeAlso ns1:B ; +[] rdfs:seeAlso ex:B ; . -[] ns1:has +[] ex:has [ - rdfs:seeAlso ns1:B ; + rdfs:seeAlso ex:B ; ] ; .