summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Watts <watts.mark2015@gmail.com>2019-08-31 09:12:43 -0500
committerMark Watts <watts.mark2015@gmail.com>2020-03-16 19:20:49 -0500
commit48cd7df77e71d8382847872fe7a3371477828df4 (patch)
tree5fbad9d3ee6f8ba7d04a618f738b5f9a64860160
parent107ffb6467f813980dcccd1f652ccde6b97d8d32 (diff)
downloadrdflib-48cd7df77e71d8382847872fe7a3371477828df4.tar.gz
Adding a wrapper for batching add() calls to a Graph
- Should address RDFLib/rdflib#357
-rw-r--r--rdflib/graph.py56
-rw-r--r--test/test_batch_add.py24
2 files changed, 79 insertions, 1 deletions
diff --git a/rdflib/graph.py b/rdflib/graph.py
index 4a27e6de..7de79e29 100644
--- a/rdflib/graph.py
+++ b/rdflib/graph.py
@@ -273,7 +273,8 @@ from six.moves.urllib.parse import urlparse
__all__ = [
'Graph', 'ConjunctiveGraph', 'QuotedGraph', 'Seq',
'ModificationException', 'Dataset',
- 'UnSupportedAggregateOperation', 'ReadOnlyGraphAggregate']
+ 'UnSupportedAggregateOperation', 'ReadOnlyGraphAggregate',
+ 'BatchAddGraph']
class Graph(Node):
@@ -1930,6 +1931,59 @@ def _assertnode(*terms):
return True
+class BatchAddGraph(object):
+ '''
+ Wrapper around graph that turns calls to :meth:`add` into calls to :meth:`~rdflib.graph.Graph.addN`
+
+ :Parameters:
+
+ - `graph`: The graph to wrap
+ - `batchsize`: The maximum number of triples to buffer before passing to
+ `graph`'s `addN`
+
+ :ivar graph: The wrapped graph
+ :ivar count: The number of triples buffered since initaialization or the last call
+ to :meth:`reset`
+ :ivar batch: The current buffer of triples
+
+ '''
+
+ def __init__(self, graph, batchsize=1000):
+ if not batchsize or batchsize < 0:
+ raise ValueError("batchsize must be a positive number")
+ self.graph = graph
+ self.__graph_tuple = (graph,)
+ self.__batchsize = batchsize
+ self.reset()
+
+ def reset(self):
+ '''
+ Manually clear the buffered triples and reset the count to zero
+ '''
+ self.batch = []
+ self.count = 0
+
+ def add(self, triple):
+ '''
+ Add a triple to the buffer
+
+ :param triple: The triple to add
+ '''
+ if len(self.batch) >= self.__batchsize == 0:
+ self.graph.addN(self.batch)
+ self.batch = []
+ self.count += 1
+ self.batch.append(triple + self.__graph_tuple)
+
+ def __enter__(self):
+ self.reset()
+ return self
+
+ def __exit__(self, *exc):
+ if exc[0] is None:
+ self.graph.addN(self.batch)
+
+
def test():
import doctest
doctest.testmod()
diff --git a/test/test_batch_add.py b/test/test_batch_add.py
new file mode 100644
index 00000000..b1e711f7
--- /dev/null
+++ b/test/test_batch_add.py
@@ -0,0 +1,24 @@
+import unittest
+from rdflib.graph import Graph, BatchAddGraph
+from rdflib.term import URIRef
+
+
+class TestBatchAddGraph(unittest.TestCase):
+ def test_batchsize_zero(self):
+ with self.assertRaises(ValueError):
+ BatchAddGraph(Graph(), batchsize=0)
+
+ def test_batchsize_none(self):
+ with self.assertRaises(ValueError):
+ BatchAddGraph(Graph(), batchsize=None)
+
+ def test_batchsize_negative(self):
+ with self.assertRaises(ValueError):
+ BatchAddGraph(Graph(), batchsize=-12)
+
+ def test_exit_submits_partial_batch(self):
+ trip = (URIRef('a'), URIRef('b'), URIRef('c'))
+ g = Graph()
+ with BatchAddGraph(g) as cut:
+ cut.add(trip)
+ self.assertIn(trip, g)