diff options
author | Mark Watts <watts.mark2015@gmail.com> | 2020-05-31 21:48:27 -0500 |
---|---|---|
committer | Mark Watts <watts.mark2015@gmail.com> | 2020-05-31 22:14:36 -0500 |
commit | 1a5df0084c6f361c9059ac19e64d632ae503b9b3 (patch) | |
tree | 63d95b5b789776442a98d5aa0e9d99a17d5a5e2a | |
parent | 6b5bd37ccc67bdec62d2e36d174eb7933b5020b2 (diff) | |
download | rdflib-1a5df0084c6f361c9059ac19e64d632ae503b9b3.tar.gz |
Adding bnode_context option to NT and NQuads parsers (#980)
- Also, updating the shared context so it works properly with
Graph.parse
-rw-r--r-- | rdflib/plugins/parsers/nquads.py | 12 | ||||
-rw-r--r-- | rdflib/plugins/parsers/nt.py | 13 | ||||
-rw-r--r-- | rdflib/plugins/parsers/ntriples.py | 31 | ||||
-rw-r--r-- | test/nquads.rdflib/bnode_context.nquads | 3 | ||||
-rw-r--r-- | test/nquads.rdflib/bnode_context_obj_bnodes.nquads | 3 | ||||
-rw-r--r-- | test/test_nquads.py | 64 | ||||
-rw-r--r-- | test/test_nt_misc.py | 16 |
7 files changed, 116 insertions, 26 deletions
diff --git a/rdflib/plugins/parsers/nquads.py b/rdflib/plugins/parsers/nquads.py index 0c29fc4c..0053ccab 100644 --- a/rdflib/plugins/parsers/nquads.py +++ b/rdflib/plugins/parsers/nquads.py @@ -40,7 +40,7 @@ __all__ = ["NQuadsParser"] class NQuadsParser(NTriplesParser): - def parse(self, inputsource, sink, **kwargs): + def parse(self, inputsource, sink, bnode_context=None, **kwargs): """Parse f as an N-Triples file.""" assert sink.store.context_aware, ( "NQuadsParser must be given" " a context aware store." @@ -61,27 +61,27 @@ class NQuadsParser(NTriplesParser): if self.line is None: break try: - self.parseline() + self.parseline(bnode_context) except ParseError as msg: raise ParseError("Invalid line (%s):\n%r" % (msg, __line)) return self.sink - def parseline(self): + def parseline(self, bnode_context=None): self.eat(r_wspace) if (not self.line) or self.line.startswith(("#")): return # The line is empty or a comment - subject = self.subject() + subject = self.subject(bnode_context) self.eat(r_wspace) predicate = self.predicate() self.eat(r_wspace) - obj = self.object() + obj = self.object(bnode_context) self.eat(r_wspace) - context = self.uriref() or self.nodeid() or self.sink.identifier + context = self.uriref() or self.nodeid(bnode_context) or self.sink.identifier self.eat(r_tail) if self.line: diff --git a/rdflib/plugins/parsers/nt.py b/rdflib/plugins/parsers/nt.py index d7d3b336..b2ec445d 100644 --- a/rdflib/plugins/parsers/nt.py +++ b/rdflib/plugins/parsers/nt.py @@ -20,8 +20,17 @@ class NTParser(Parser): def __init__(self): super(NTParser, self).__init__() - def parse(self, source, sink, baseURI=None): + def parse(self, source, sink, **kwargs): + ''' + Parse the NT format + + :type source: `rdflib.parser.InputSource` + :param source: the source of NT-formatted data + :type sink: `rdflib.graph.Graph` + :param sink: where to send parsed triples + :param kwargs: Additional arguments to pass to `.NTriplesParser.parse` + ''' f = source.getByteStream() # TODO getCharacterStream? parser = NTriplesParser(NTSink(sink)) - parser.parse(f) + parser.parse(f, **kwargs) f.close() diff --git a/rdflib/plugins/parsers/ntriples.py b/rdflib/plugins/parsers/ntriples.py index 21c931c6..2e83204c 100644 --- a/rdflib/plugins/parsers/ntriples.py +++ b/rdflib/plugins/parsers/ntriples.py @@ -133,8 +133,6 @@ class NTriplesParser(object): else: self._bnode_ids = {} - self._parse_bnode_ids = None - if sink is not None: self.sink = sink else: @@ -156,13 +154,12 @@ class NTriplesParser(object): self.file = f self.buffer = "" - self._parse_bnode_ids = bnode_context while True: self.line = self.readline() if self.line is None: break try: - self.parseline() + self.parseline(bnode_context=bnode_context) except ParseError: raise ParseError("Invalid line: %r" % self.line) return self.sink @@ -200,18 +197,18 @@ class NTriplesParser(object): return None self.buffer += buffer - def parseline(self): + def parseline(self, bnode_context=None): self.eat(r_wspace) if (not self.line) or self.line.startswith("#"): return # The line is empty or a comment - subject = self.subject() + subject = self.subject(bnode_context) self.eat(r_wspaces) predicate = self.predicate() self.eat(r_wspaces) - object = self.object() + object = self.object(bnode_context) self.eat(r_tail) if self.line: @@ -230,9 +227,9 @@ class NTriplesParser(object): self.line = self.line[m.end():] return m - def subject(self): + def subject(self, bnode_context=None): # @@ Consider using dictionary cases - subj = self.uriref() or self.nodeid() + subj = self.uriref() or self.nodeid(bnode_context) if not subj: raise ParseError("Subject must be uriref or nodeID") return subj @@ -243,8 +240,8 @@ class NTriplesParser(object): raise ParseError("Predicate must be uriref") return pred - def object(self): - objt = self.uriref() or self.nodeid() or self.literal() + def object(self, bnode_context=None): + objt = self.uriref() or self.nodeid(bnode_context) or self.literal() if objt is False: raise ParseError("Unrecognised object type") return objt @@ -257,15 +254,13 @@ class NTriplesParser(object): return URI(uri) return False - def nodeid(self): + def nodeid(self, bnode_context=None): if self.peek("_"): # Fix for https://github.com/RDFLib/rdflib/issues/204 - if self._parse_bnode_ids is not None: - bnode_ids = self._parse_bnode_ids - else: - bnode_ids = self._bnode_ids + if bnode_context is None: + bnode_context = self._bnode_ids bnode_id = self.eat(r_nodeid).group(1) - new_id = bnode_ids.get(bnode_id, None) + new_id = bnode_context.get(bnode_id, None) if new_id is not None: # Re-map to id specfic to this doc return bNode(new_id) @@ -273,7 +268,7 @@ class NTriplesParser(object): # Replace with freshly-generated document-specific BNode id bnode = bNode() # Store the mapping - self._bnode_ids[bnode_id] = bnode + bnode_context[bnode_id] = bnode return bnode return False diff --git a/test/nquads.rdflib/bnode_context.nquads b/test/nquads.rdflib/bnode_context.nquads new file mode 100644 index 00000000..59e9350f --- /dev/null +++ b/test/nquads.rdflib/bnode_context.nquads @@ -0,0 +1,3 @@ +_:bnode1 <http://xmlns.com/foaf/0.1/Friend> "Michele" _:blah . +_:bnode2 <http://xmlns.com/foaf/0.1/Friend> "Kevin" _:bluh . + diff --git a/test/nquads.rdflib/bnode_context_obj_bnodes.nquads b/test/nquads.rdflib/bnode_context_obj_bnodes.nquads new file mode 100644 index 00000000..5b90fb00 --- /dev/null +++ b/test/nquads.rdflib/bnode_context_obj_bnodes.nquads @@ -0,0 +1,3 @@ +_:bnode1 <http://xmlns.com/foaf/0.1/Friend> "Michele" <http://example.org/alice/foaf2.rdf> . +<http://example.org/Kevin> <http://xmlns.com/foaf/0.1/Friend> _:bnode2 <http://example.org/alice/foaf3.rdf> . + diff --git a/test/test_nquads.py b/test/test_nquads.py index c25bc7ed..da9e8e2d 100644 --- a/test/test_nquads.py +++ b/test/test_nquads.py @@ -67,5 +67,69 @@ class NQuadsParserTest(unittest.TestCase): ) +class BnodeContextTest(unittest.TestCase): + def setUp(self): + self.data = open("test/nquads.rdflib/bnode_context.nquads", "rb") + self.data_obnodes = open("test/nquads.rdflib/bnode_context_obj_bnodes.nquads", "rb") + + def tearDown(self): + self.data.close() + + def test_parse_shared_bnode_context(self): + bnode_ctx = dict() + g = ConjunctiveGraph() + h = ConjunctiveGraph() + g.parse(self.data, format="nquads", bnode_context=bnode_ctx) + self.data.seek(0) + h.parse(self.data, format="nquads", bnode_context=bnode_ctx) + self.assertEqual(set(h.subjects()), set(g.subjects())) + + def test_parse_shared_bnode_context_same_graph(self): + bnode_ctx = dict() + g = ConjunctiveGraph() + g.parse(self.data_obnodes, format="nquads", bnode_context=bnode_ctx) + o1 = set(g.objects()) + self.data_obnodes.seek(0) + g.parse(self.data_obnodes, format="nquads", bnode_context=bnode_ctx) + o2 = set(g.objects()) + self.assertEqual(o1, o2) + + def test_parse_distinct_bnode_context(self): + g = ConjunctiveGraph() + g.parse(self.data, format="nquads", bnode_context=dict()) + s1 = set(g.subjects()) + self.data.seek(0) + g.parse(self.data, format="nquads", bnode_context=dict()) + s2 = set(g.subjects()) + self.assertNotEqual(set(), s2 - s1) + + def test_parse_distinct_bnode_contexts_between_graphs(self): + g = ConjunctiveGraph() + h = ConjunctiveGraph() + g.parse(self.data, format="nquads") + s1 = set(g.subjects()) + self.data.seek(0) + h.parse(self.data, format="nquads") + s2 = set(h.subjects()) + self.assertNotEqual(s1, s2) + + def test_parse_distinct_bnode_contexts_named_graphs(self): + g = ConjunctiveGraph() + h = ConjunctiveGraph() + g.parse(self.data, format="nquads") + self.data.seek(0) + h.parse(self.data, format="nquads") + self.assertNotEqual(set(h.contexts()), set(g.contexts())) + + def test_parse_shared_bnode_contexts_named_graphs(self): + bnode_ctx = dict() + g = ConjunctiveGraph() + h = ConjunctiveGraph() + g.parse(self.data, format="nquads", bnode_context=bnode_ctx) + self.data.seek(0) + h.parse(self.data, format="nquads", bnode_context=bnode_ctx) + self.assertEqual(set(h.contexts()), set(g.contexts())) + + if __name__ == "__main__": unittest.main() diff --git a/test/test_nt_misc.py b/test/test_nt_misc.py index 4a21fed9..af7049d8 100644 --- a/test/test_nt_misc.py +++ b/test/test_nt_misc.py @@ -188,6 +188,22 @@ class BNodeContextTestCase(unittest.TestCase): self.assertEqual(len(my_sink.subs), 1) + def test_bnode_shared_across_instances_with_parse_option(self): + my_sink = FakeSink() + bnode_ctx = dict() + + p = ntriples.NTriplesParser(my_sink) + p.parsestring(''' + _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000001> . + ''', bnode_context=bnode_ctx) + + q = ntriples.NTriplesParser(my_sink) + q.parsestring(''' + _:0 <http://purl.obolibrary.org/obo/RO_0002350> <http://www.gbif.org/species/0000002> . + ''', bnode_context=bnode_ctx) + + self.assertEqual(len(my_sink.subs), 1) + class FakeSink(object): def __init__(self): |