basic hextuples serializer

author: nicholascar <nicholas.car@surroundaustralia.com> 2021-12-03 17:54:42 +1000
committer: nicholascar <nicholas.car@surroundaustralia.com> 2021-12-03 17:54:42 +1000
commit: 7c3353d5678cff6beab20bb7cbfb3a48af8e107a (patch)
tree: 18d90c4d36f8c3c2ea329714a63bd49c17b95bd8
parent: 3a783a5886b7d699f13c5b9762b1bfc46e2a97b5 (diff)
download: rdflib-7c3353d5678cff6beab20bb7cbfb3a48af8e107a.tar.gz
3 files changed, 114 insertions, 0 deletions
diff --git a/rdflib/plugin.py b/rdflib/plugin.py
index 63c3ead7..96a2b610 100644
--- a/rdflib/plugin.py
+++ b/rdflib/plugin.py
@@ -348,6 +348,12 @@ register(
     "rdflib.plugins.serializers.trig",
     "TrigSerializer",
 )
+register(
+    "hext",
+    Serializer,
+    "rdflib.plugins.serializers.hext",
+    "HextuplesSerializer",
+)
 
 # Register Triple Parsers
 register(
diff --git a/rdflib/plugins/serializers/hext.py b/rdflib/plugins/serializers/hext.py
new file mode 100644
index 00000000..85a3b151
--- /dev/null
+++ b/rdflib/plugins/serializers/hext.py
@@ -0,0 +1,54 @@
+"""
+HextuplesSerializer RDF graph serializer for RDFLib.
+See <https://github.com/ontola/hextuples> for details about the format.
+"""
+from typing import IO, Optional
+
+from rdflib.graph import Graph
+from rdflib.term import Literal, URIRef, BNode
+from rdflib.serializer import Serializer
+
+__all__ = ["HextuplesSerializer"]
+
+
+class HextuplesSerializer(Serializer):
+    """
+    Serializes RDF graphs to NTriples format.
+    """
+
+    def __init__(self, store: Graph):
+        Serializer.__init__(self, store)
+        self.encoding = "utf-8"
+
+    def serialize(
+        self,
+        stream: IO[bytes],
+        base: Optional[str] = None,
+        encoding: Optional[str] = None,
+        **args
+    ):
+        self.encoding = encoding
+        for context in self.store.contexts():
+            for triple in context:
+                stream.write(
+                    _hex_line(triple, context.identifier).encode(self.encoding)
+                )
+        stream.write("\n".encode(self.encoding))
+
+
+def _hex_line(triple, context):
+    return "[%s, %s, %s, %s, %s, %s]\n" % (
+        _iri_or_bn(triple[0]),
+        _iri_or_bn(triple[1]),
+        triple[2] if type(triple[2]) == Literal else _iri_or_bn(triple[2]),
+        (f'"{triple[2].datatype}"' if triple[2].datatype is not None else '""') if type(triple[2]) == Literal else '""',
+        (f'"{triple[2].language}"' if triple[2].language is not None else '""') if type(triple[2]) == Literal else '""',
+        _iri_or_bn(context)
+    )
+
+
+def _iri_or_bn(i_):
+    if type(i_) == URIRef:
+        return f"\"{i_}\""
+    else:
+        return f"\"{i_.n3()}\""
diff --git a/test/test_serialize_hext.py b/test/test_serialize_hext.py
new file mode 100644
index 00000000..100d2aa9
--- /dev/null
+++ b/test/test_serialize_hext.py
@@ -0,0 +1,54 @@
+from rdflib import Dataset, URIRef, Namespace, Literal, BNode
+from test import TEST_DIR
+
+
+def test_hext_01():
+    d = Dataset()
+    trig = """
+            PREFIX ex: <http://example.com/>
+            PREFIX owl: <http://www.w3.org/2002/07/owl#>
+            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+            PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
+
+            ex:g1 {
+                ex:s1
+                    ex:p1 ex:o1 , ex:o2 ;
+                    ex:p2 [
+                        a owl:Thing ;
+                        rdf:value "thingy" ;
+                    ] ;
+                    ex:p3 "Object 3" , "Object 4 - English"@en ;
+                    ex:p4 "2021-12-03"^^xsd:date ;
+                    ex:p5 42 ;
+                    ex:p6 "42" ;
+                .
+            }
+
+            ex:g2 {
+                ex:s1
+                    ex:p1 ex:o1 , ex:o2 ;
+                .
+                ex:s11 ex:p11 ex:o11 , ex:o12 .
+            }
+
+            # default graph triples
+            ex:s1 ex:p1 ex:o1 , ex:o2 .
+            ex:s21 ex:p21 ex:o21 , ex:o22 .
+
+           """
+    d.parse(data=trig, format="trig")
+    out = d.serialize(format="hext")
+    testing_lines = [
+        [False, '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "", "", "http://example.com/g2"]'],
+        [False, '["http://example.com/s1", "http://example.com/p3", Object 3, "", "", "http://example.com/g1"]'],
+        [False, '["http://example.com/s1", "http://example.com/p5", 42, "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"]'],
+        [False, '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", thingy, "", "", "http://example.com/g1"]'],
+        [False, '["http://example.com/s1", "http://example.com/p4", 2021-12-03, "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"]']
+    ]
+    for line in out.splitlines():
+        for test in testing_lines:
+            if test[1] in line:
+                test[0] = True
+
+    assert all([x[0] for x in testing_lines])
author	nicholascar <nicholas.car@surroundaustralia.com>	2021-12-03 17:54:42 +1000
committer	nicholascar <nicholas.car@surroundaustralia.com>	2021-12-03 17:54:42 +1000
commit	7c3353d5678cff6beab20bb7cbfb3a48af8e107a (patch)
tree	18d90c4d36f8c3c2ea329714a63bd49c17b95bd8
parent	3a783a5886b7d699f13c5b9762b1bfc46e2a97b5 (diff)
download	rdflib-7c3353d5678cff6beab20bb7cbfb3a48af8e107a.tar.gz