diff options
author | Mark Watts <watts.mark2015@gmail.com> | 2019-08-31 09:12:43 -0500 |
---|---|---|
committer | Mark Watts <watts.mark2015@gmail.com> | 2020-03-16 19:20:49 -0500 |
commit | 48cd7df77e71d8382847872fe7a3371477828df4 (patch) | |
tree | 5fbad9d3ee6f8ba7d04a618f738b5f9a64860160 | |
parent | 107ffb6467f813980dcccd1f652ccde6b97d8d32 (diff) | |
download | rdflib-48cd7df77e71d8382847872fe7a3371477828df4.tar.gz |
Adding a wrapper for batching add() calls to a Graph
- Should address RDFLib/rdflib#357
-rw-r--r-- | rdflib/graph.py | 56 | ||||
-rw-r--r-- | test/test_batch_add.py | 24 |
2 files changed, 79 insertions, 1 deletions
diff --git a/rdflib/graph.py b/rdflib/graph.py index 4a27e6de..7de79e29 100644 --- a/rdflib/graph.py +++ b/rdflib/graph.py @@ -273,7 +273,8 @@ from six.moves.urllib.parse import urlparse __all__ = [ 'Graph', 'ConjunctiveGraph', 'QuotedGraph', 'Seq', 'ModificationException', 'Dataset', - 'UnSupportedAggregateOperation', 'ReadOnlyGraphAggregate'] + 'UnSupportedAggregateOperation', 'ReadOnlyGraphAggregate', + 'BatchAddGraph'] class Graph(Node): @@ -1930,6 +1931,59 @@ def _assertnode(*terms): return True +class BatchAddGraph(object): + ''' + Wrapper around graph that turns calls to :meth:`add` into calls to :meth:`~rdflib.graph.Graph.addN` + + :Parameters: + + - `graph`: The graph to wrap + - `batchsize`: The maximum number of triples to buffer before passing to + `graph`'s `addN` + + :ivar graph: The wrapped graph + :ivar count: The number of triples buffered since initaialization or the last call + to :meth:`reset` + :ivar batch: The current buffer of triples + + ''' + + def __init__(self, graph, batchsize=1000): + if not batchsize or batchsize < 0: + raise ValueError("batchsize must be a positive number") + self.graph = graph + self.__graph_tuple = (graph,) + self.__batchsize = batchsize + self.reset() + + def reset(self): + ''' + Manually clear the buffered triples and reset the count to zero + ''' + self.batch = [] + self.count = 0 + + def add(self, triple): + ''' + Add a triple to the buffer + + :param triple: The triple to add + ''' + if len(self.batch) >= self.__batchsize == 0: + self.graph.addN(self.batch) + self.batch = [] + self.count += 1 + self.batch.append(triple + self.__graph_tuple) + + def __enter__(self): + self.reset() + return self + + def __exit__(self, *exc): + if exc[0] is None: + self.graph.addN(self.batch) + + def test(): import doctest doctest.testmod() diff --git a/test/test_batch_add.py b/test/test_batch_add.py new file mode 100644 index 00000000..b1e711f7 --- /dev/null +++ b/test/test_batch_add.py @@ -0,0 +1,24 @@ +import unittest +from rdflib.graph import Graph, BatchAddGraph +from rdflib.term import URIRef + + +class TestBatchAddGraph(unittest.TestCase): + def test_batchsize_zero(self): + with self.assertRaises(ValueError): + BatchAddGraph(Graph(), batchsize=0) + + def test_batchsize_none(self): + with self.assertRaises(ValueError): + BatchAddGraph(Graph(), batchsize=None) + + def test_batchsize_negative(self): + with self.assertRaises(ValueError): + BatchAddGraph(Graph(), batchsize=-12) + + def test_exit_submits_partial_batch(self): + trip = (URIRef('a'), URIRef('b'), URIRef('c')) + g = Graph() + with BatchAddGraph(g) as cut: + cut.add(trip) + self.assertIn(trip, g) |