summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Watts <watts.mark2015@gmail.com>2020-05-31 21:48:27 -0500
committerMark Watts <watts.mark2015@gmail.com>2020-05-31 22:14:36 -0500
commit1a5df0084c6f361c9059ac19e64d632ae503b9b3 (patch)
tree63d95b5b789776442a98d5aa0e9d99a17d5a5e2a
parent6b5bd37ccc67bdec62d2e36d174eb7933b5020b2 (diff)
downloadrdflib-1a5df0084c6f361c9059ac19e64d632ae503b9b3.tar.gz
Adding bnode_context option to NT and NQuads parsers (#980)
- Also, updating the shared context so it works properly with Graph.parse
-rw-r--r--rdflib/plugins/parsers/nquads.py12
-rw-r--r--rdflib/plugins/parsers/nt.py13
-rw-r--r--rdflib/plugins/parsers/ntriples.py31
-rw-r--r--test/nquads.rdflib/bnode_context.nquads3
-rw-r--r--test/nquads.rdflib/bnode_context_obj_bnodes.nquads3
-rw-r--r--test/test_nquads.py64
-rw-r--r--test/test_nt_misc.py16
7 files changed, 116 insertions, 26 deletions
diff --git a/rdflib/plugins/parsers/nquads.py b/rdflib/plugins/parsers/nquads.py
index 0c29fc4c..0053ccab 100644
--- a/rdflib/plugins/parsers/nquads.py
+++ b/rdflib/plugins/parsers/nquads.py
@@ -40,7 +40,7 @@ __all__ = ["NQuadsParser"]
class NQuadsParser(NTriplesParser):
- def parse(self, inputsource, sink, **kwargs):
+ def parse(self, inputsource, sink, bnode_context=None, **kwargs):
"""Parse f as an N-Triples file."""
assert sink.store.context_aware, (
"NQuadsParser must be given" " a context aware store."
@@ -61,27 +61,27 @@ class NQuadsParser(NTriplesParser):
if self.line is None:
break
try:
- self.parseline()
+ self.parseline(bnode_context)
except ParseError as msg:
raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
return self.sink
- def parseline(self):
+ def parseline(self, bnode_context=None):
self.eat(r_wspace)
if (not self.line) or self.line.startswith(("#")):
return # The line is empty or a comment
- subject = self.subject()
+ subject = self.subject(bnode_context)
self.eat(r_wspace)
predicate = self.predicate()
self.eat(r_wspace)
- obj = self.object()
+ obj = self.object(bnode_context)
self.eat(r_wspace)
- context = self.uriref() or self.nodeid() or self.sink.identifier
+ context = self.uriref() or self.nodeid(bnode_context) or self.sink.identifier
self.eat(r_tail)
if self.line:
diff --git a/rdflib/plugins/parsers/nt.py b/rdflib/plugins/parsers/nt.py
index d7d3b336..b2ec445d 100644
--- a/rdflib/plugins/parsers/nt.py
+++ b/rdflib/plugins/parsers/nt.py
@@ -20,8 +20,17 @@ class NTParser(Parser):
def __init__(self):
super(NTParser, self).__init__()
- def parse(self, source, sink, baseURI=None):
+ def parse(self, source, sink, **kwargs):
+ '''
+ Parse the NT format
+
+ :type source: `rdflib.parser.InputSource`
+ :param source: the source of NT-formatted data
+ :type sink: `rdflib.graph.Graph`
+ :param sink: where to send parsed triples
+ :param kwargs: Additional arguments to pass to `.NTriplesParser.parse`
+ '''
f = source.getByteStream() # TODO getCharacterStream?
parser = NTriplesParser(NTSink(sink))
- parser.parse(f)
+ parser.parse(f, **kwargs)
f.close()
diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py
index 21c931c6..2e83204c 100644
--- a/rdflib/plugins/parsers/ntriples.py
+++ b/rdflib/plugins/parsers/ntriples.py
@@ -133,8 +133,6 @@ class NTriplesParser(object):
else:
self._bnode_ids = {}
- self._parse_bnode_ids = None
-
if sink is not None:
self.sink = sink
else:
@@ -156,13 +154,12 @@ class NTriplesParser(object):
self.file = f
self.buffer = ""
- self._parse_bnode_ids = bnode_context
while True:
self.line = self.readline()
if self.line is None:
break
try:
- self.parseline()
+ self.parseline(bnode_context=bnode_context)
except ParseError:
raise ParseError("Invalid line: %r" % self.line)
return self.sink
@@ -200,18 +197,18 @@ class NTriplesParser(object):
return None
self.buffer += buffer
- def parseline(self):
+ def parseline(self, bnode_context=None):
self.eat(r_wspace)
if (not self.line) or self.line.startswith("#"):
return # The line is empty or a comment
- subject = self.subject()
+ subject = self.subject(bnode_context)
self.eat(r_wspaces)
predicate = self.predicate()
self.eat(r_wspaces)
- object = self.object()
+ object = self.object(bnode_context)
self.eat(r_tail)
if self.line:
@@ -230,9 +227,9 @@ class NTriplesParser(object):
self.line = self.line[m.end():]
return m
- def subject(self):
+ def subject(self, bnode_context=None):
# @@ Consider using dictionary cases
- subj = self.uriref() or self.nodeid()
+ subj = self.uriref() or self.nodeid(bnode_context)
if not subj:
raise ParseError("Subject must be uriref or nodeID")
return subj
@@ -243,8 +240,8 @@ class NTriplesParser(object):
raise ParseError("Predicate must be uriref")
return pred
- def object(self):
- objt = self.uriref() or self.nodeid() or self.literal()
+ def object(self, bnode_context=None):
+ objt = self.uriref() or self.nodeid(bnode_context) or self.literal()
if objt is False:
raise ParseError("Unrecognised object type")
return objt
@@ -257,15 +254,13 @@ class NTriplesParser(object):
return URI(uri)
return False
- def nodeid(self):
+ def nodeid(self, bnode_context=None):
if self.peek("_"):
# Fix for https://github.com/RDFLib/rdflib/issues/204
- if self._parse_bnode_ids is not None:
- bnode_ids = self._parse_bnode_ids
- else:
- bnode_ids = self._bnode_ids
+ if bnode_context is None:
+ bnode_context = self._bnode_ids
bnode_id = self.eat(r_nodeid).group(1)
- new_id = bnode_ids.get(bnode_id, None)
+ new_id = bnode_context.get(bnode_id, None)
if new_id is not None:
# Re-map to id specfic to this doc
return bNode(new_id)
@@ -273,7 +268,7 @@ class NTriplesParser(object):
# Replace with freshly-generated document-specific BNode id
bnode = bNode()
# Store the mapping
- self._bnode_ids[bnode_id] = bnode
+ bnode_context[bnode_id] = bnode
return bnode
return False
diff --git a/test/nquads.rdflib/bnode_context.nquads b/test/nquads.rdflib/bnode_context.nquads
new file mode 100644
index 00000000..59e9350f
--- /dev/null
+++ b/test/nquads.rdflib/bnode_context.nquads
@@ -0,0 +1,3 @@
+_:bnode1 <http://xmlns.com/foaf/0.1/Friend> "Michele" _:blah .
+_:bnode2 <http://xmlns.com/foaf/0.1/Friend> "Kevin" _:bluh .
+
diff --git a/test/nquads.rdflib/bnode_context_obj_bnodes.nquads b/test/nquads.rdflib/bnode_context_obj_bnodes.nquads
new file mode 100644
index 00000000..5b90fb00
--- /dev/null
+++ b/test/nquads.rdflib/bnode_context_obj_bnodes.nquads
@@ -0,0 +1,3 @@
+_:bnode1 <http://xmlns.com/foaf/0.1/Friend> "Michele" <http://example.org/alice/foaf2.rdf> .
+<http://example.org/Kevin> <http://xmlns.com/foaf/0.1/Friend> _:bnode2 <http://example.org/alice/foaf3.rdf> .
+
diff --git a/test/test_nquads.py b/test/test_nquads.py
index c25bc7ed..da9e8e2d 100644
--- a/test/test_nquads.py
+++ b/test/test_nquads.py
@@ -67,5 +67,69 @@ class NQuadsParserTest(unittest.TestCase):
)
+class BnodeContextTest(unittest.TestCase):
+ def setUp(self):
+ self.data = open("test/nquads.rdflib/bnode_context.nquads", "rb")
+ self.data_obnodes = open("test/nquads.rdflib/bnode_context_obj_bnodes.nquads", "rb")
+
+ def tearDown(self):
+ self.data.close()
+
+ def test_parse_shared_bnode_context(self):
+ bnode_ctx = dict()
+ g = ConjunctiveGraph()
+ h = ConjunctiveGraph()
+ g.parse(self.data, format="nquads", bnode_context=bnode_ctx)
+ self.data.seek(0)
+ h.parse(self.data, format="nquads", bnode_context=bnode_ctx)
+ self.assertEqual(set(h.subjects()), set(g.subjects()))
+
+ def test_parse_shared_bnode_context_same_graph(self):
+ bnode_ctx = dict()
+ g = ConjunctiveGraph()
+ g.parse(self.data_obnodes, format="nquads", bnode_context=bnode_ctx)
+ o1 = set(g.objects())
+ self.data_obnodes.seek(0)
+ g.parse(self.data_obnodes, format="nquads", bnode_context=bnode_ctx)
+ o2 = set(g.objects())
+ self.assertEqual(o1, o2)
+
+ def test_parse_distinct_bnode_context(self):
+ g = ConjunctiveGraph()
+ g.parse(self.data, format="nquads", bnode_context=dict())
+ s1 = set(g.subjects())
+ self.data.seek(0)
+ g.parse(self.data, format="nquads", bnode_context=dict())
+ s2 = set(g.subjects())
+ self.assertNotEqual(set(), s2 - s1)
+
+ def test_parse_distinct_bnode_contexts_between_graphs(self):
+ g = ConjunctiveGraph()
+ h = ConjunctiveGraph()
+ g.parse(self.data, format="nquads")
+ s1 = set(g.subjects())
+ self.data.seek(0)
+ h.parse(self.data, format="nquads")
+ s2 = set(h.subjects())
+ self.assertNotEqual(s1, s2)
+
+ def test_parse_distinct_bnode_contexts_named_graphs(self):
+ g = ConjunctiveGraph()
+ h = ConjunctiveGraph()
+ g.parse(self.data, format="nquads")
+ self.data.seek(0)
+ h.parse(self.data, format="nquads")
+ self.assertNotEqual(set(h.contexts()), set(g.contexts()))
+
+ def test_parse_shared_bnode_contexts_named_graphs(self):
+ bnode_ctx = dict()
+ g = ConjunctiveGraph()
+ h = ConjunctiveGraph()
+ g.parse(self.data, format="nquads", bnode_context=bnode_ctx)
+ self.data.seek(0)
+ h.parse(self.data, format="nquads", bnode_context=bnode_ctx)
+ self.assertEqual(set(h.contexts()), set(g.contexts()))
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/test/test_nt_misc.py b/test/test_nt_misc.py
index 4a21fed9..af7049d8 100644
--- a/test/test_nt_misc.py
+++ b/test/test_nt_misc.py
@@ -188,6 +188,22 @@ class BNodeContextTestCase(unittest.TestCase):
self.assertEqual(len(my_sink.subs), 1)
+ def test_bnode_shared_across_instances_with_parse_option(self):
+ my_sink = FakeSink()
+ bnode_ctx = dict()
+
+ p = ntriples.NTriplesParser(my_sink)
+ p.parsestring('''
+ _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> .
+ ''', bnode_context=bnode_ctx)
+
+ q = ntriples.NTriplesParser(my_sink)
+ q.parsestring('''
+ _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> .
+ ''', bnode_context=bnode_ctx)
+
+ self.assertEqual(len(my_sink.subs), 1)
+
class FakeSink(object):
def __init__(self):