summaryrefslogtreecommitdiff
path: root/bzrlib/tests/test__simple_set.py
diff options
context:
space:
mode:
Diffstat (limited to 'bzrlib/tests/test__simple_set.py')
-rw-r--r--bzrlib/tests/test__simple_set.py392
1 files changed, 392 insertions, 0 deletions
diff --git a/bzrlib/tests/test__simple_set.py b/bzrlib/tests/test__simple_set.py
new file mode 100644
index 0000000..3e9b6c4
--- /dev/null
+++ b/bzrlib/tests/test__simple_set.py
@@ -0,0 +1,392 @@
+# Copyright (C) 2009, 2010, 2011 Canonical Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Tests for the StaticTupleInterned type."""
+
+import sys
+
+from bzrlib import (
+ tests,
+ )
+from bzrlib.tests import (
+ features,
+ )
+
+try:
+ from bzrlib import _simple_set_pyx
+except ImportError:
+ _simple_set_pyx = None
+
+
+class _Hashable(object):
+ """A simple object which has a fixed hash value.
+
+ We could have used an 'int', but it turns out that Int objects don't
+ implement tp_richcompare...
+ """
+
+ def __init__(self, the_hash):
+ self.hash = the_hash
+
+ def __hash__(self):
+ return self.hash
+
+ def __eq__(self, other):
+ if not isinstance(other, _Hashable):
+ return NotImplemented
+ return other.hash == self.hash
+
+
+class _BadSecondHash(_Hashable):
+
+ def __init__(self, the_hash):
+ _Hashable.__init__(self, the_hash)
+ self._first = True
+
+ def __hash__(self):
+ if self._first:
+ self._first = False
+ return self.hash
+ else:
+ raise ValueError('I can only be hashed once.')
+
+
+class _BadCompare(_Hashable):
+
+ def __eq__(self, other):
+ raise RuntimeError('I refuse to play nice')
+
+
+class _NoImplementCompare(_Hashable):
+
+ def __eq__(self, other):
+ return NotImplemented
+
+
+# Even though this is an extension, we don't permute the tests for a python
+# version. As the plain python version is just a dict or set
+compiled_simpleset_feature = features.ModuleAvailableFeature(
+ 'bzrlib._simple_set_pyx')
+
+
+class TestSimpleSet(tests.TestCase):
+
+ _test_needs_features = [compiled_simpleset_feature]
+ module = _simple_set_pyx
+
+ def assertIn(self, obj, container):
+ self.assertTrue(obj in container,
+ '%s not found in %s' % (obj, container))
+
+ def assertNotIn(self, obj, container):
+ self.assertTrue(obj not in container,
+ 'We found %s in %s' % (obj, container))
+
+ def assertFillState(self, used, fill, mask, obj):
+ self.assertEqual((used, fill, mask), (obj.used, obj.fill, obj.mask))
+
+ def assertLookup(self, offset, value, obj, key):
+ self.assertEqual((offset, value), obj._test_lookup(key))
+
+ def assertRefcount(self, count, obj):
+ """Assert that the refcount for obj is what we expect.
+
+ Note that this automatically adjusts for the fact that calling
+ assertRefcount actually creates a new pointer, as does calling
+ sys.getrefcount. So pass the expected value *before* the call.
+ """
+ # I'm not sure why the offset is 3, but I've check that in the caller,
+ # an offset of 1 works, which is expected. Not sure why assertRefcount
+ # is incrementing/decrementing 2 times
+ self.assertEqual(count, sys.getrefcount(obj)-3)
+
+ def test_initial(self):
+ obj = self.module.SimpleSet()
+ self.assertEqual(0, len(obj))
+ st = ('foo', 'bar')
+ self.assertFillState(0, 0, 0x3ff, obj)
+
+ def test__lookup(self):
+ # These are carefully chosen integers to force hash collisions in the
+ # algorithm, based on the initial set size of 1024
+ obj = self.module.SimpleSet()
+ self.assertLookup(643, '<null>', obj, _Hashable(643))
+ self.assertLookup(643, '<null>', obj, _Hashable(643 + 1024))
+ self.assertLookup(643, '<null>', obj, _Hashable(643 + 50*1024))
+
+ def test__lookup_collision(self):
+ obj = self.module.SimpleSet()
+ k1 = _Hashable(643)
+ k2 = _Hashable(643 + 1024)
+ self.assertLookup(643, '<null>', obj, k1)
+ self.assertLookup(643, '<null>', obj, k2)
+ obj.add(k1)
+ self.assertLookup(643, k1, obj, k1)
+ self.assertLookup(644, '<null>', obj, k2)
+
+ def test__lookup_after_resize(self):
+ obj = self.module.SimpleSet()
+ k1 = _Hashable(643)
+ k2 = _Hashable(643 + 1024)
+ obj.add(k1)
+ obj.add(k2)
+ self.assertLookup(643, k1, obj, k1)
+ self.assertLookup(644, k2, obj, k2)
+ obj._py_resize(2047) # resized to 2048
+ self.assertEqual(2048, obj.mask + 1)
+ self.assertLookup(643, k1, obj, k1)
+ self.assertLookup(643+1024, k2, obj, k2)
+ obj._py_resize(1023) # resized back to 1024
+ self.assertEqual(1024, obj.mask + 1)
+ self.assertLookup(643, k1, obj, k1)
+ self.assertLookup(644, k2, obj, k2)
+
+ def test_get_set_del_with_collisions(self):
+ obj = self.module.SimpleSet()
+
+ h1 = 643
+ h2 = 643 + 1024
+ h3 = 643 + 1024*50
+ h4 = 643 + 1024*25
+ h5 = 644
+ h6 = 644 + 1024
+
+ k1 = _Hashable(h1)
+ k2 = _Hashable(h2)
+ k3 = _Hashable(h3)
+ k4 = _Hashable(h4)
+ k5 = _Hashable(h5)
+ k6 = _Hashable(h6)
+ self.assertLookup(643, '<null>', obj, k1)
+ self.assertLookup(643, '<null>', obj, k2)
+ self.assertLookup(643, '<null>', obj, k3)
+ self.assertLookup(643, '<null>', obj, k4)
+ self.assertLookup(644, '<null>', obj, k5)
+ self.assertLookup(644, '<null>', obj, k6)
+ obj.add(k1)
+ self.assertIn(k1, obj)
+ self.assertNotIn(k2, obj)
+ self.assertNotIn(k3, obj)
+ self.assertNotIn(k4, obj)
+ self.assertLookup(643, k1, obj, k1)
+ self.assertLookup(644, '<null>', obj, k2)
+ self.assertLookup(644, '<null>', obj, k3)
+ self.assertLookup(644, '<null>', obj, k4)
+ self.assertLookup(644, '<null>', obj, k5)
+ self.assertLookup(644, '<null>', obj, k6)
+ self.assertIs(k1, obj[k1])
+ self.assertIs(k2, obj.add(k2))
+ self.assertIs(k2, obj[k2])
+ self.assertLookup(643, k1, obj, k1)
+ self.assertLookup(644, k2, obj, k2)
+ self.assertLookup(646, '<null>', obj, k3)
+ self.assertLookup(646, '<null>', obj, k4)
+ self.assertLookup(645, '<null>', obj, k5)
+ self.assertLookup(645, '<null>', obj, k6)
+ self.assertLookup(643, k1, obj, _Hashable(h1))
+ self.assertLookup(644, k2, obj, _Hashable(h2))
+ self.assertLookup(646, '<null>', obj, _Hashable(h3))
+ self.assertLookup(646, '<null>', obj, _Hashable(h4))
+ self.assertLookup(645, '<null>', obj, _Hashable(h5))
+ self.assertLookup(645, '<null>', obj, _Hashable(h6))
+ obj.add(k3)
+ self.assertIs(k3, obj[k3])
+ self.assertIn(k1, obj)
+ self.assertIn(k2, obj)
+ self.assertIn(k3, obj)
+ self.assertNotIn(k4, obj)
+
+ obj.discard(k1)
+ self.assertLookup(643, '<dummy>', obj, k1)
+ self.assertLookup(644, k2, obj, k2)
+ self.assertLookup(646, k3, obj, k3)
+ self.assertLookup(643, '<dummy>', obj, k4)
+ self.assertNotIn(k1, obj)
+ self.assertIn(k2, obj)
+ self.assertIn(k3, obj)
+ self.assertNotIn(k4, obj)
+
+ def test_add(self):
+ obj = self.module.SimpleSet()
+ self.assertFillState(0, 0, 0x3ff, obj)
+ # We use this clumsy notation, because otherwise the refcounts are off.
+ # I'm guessing the python compiler sees it is a static tuple, and adds
+ # it to the function variables, or somesuch
+ k1 = tuple(['foo'])
+ self.assertRefcount(1, k1)
+ self.assertIs(k1, obj.add(k1))
+ self.assertFillState(1, 1, 0x3ff, obj)
+ self.assertRefcount(2, k1)
+ ktest = obj[k1]
+ self.assertRefcount(3, k1)
+ self.assertIs(k1, ktest)
+ del ktest
+ self.assertRefcount(2, k1)
+ k2 = tuple(['foo'])
+ self.assertRefcount(1, k2)
+ self.assertIsNot(k1, k2)
+ # doesn't add anything, so the counters shouldn't be adjusted
+ self.assertIs(k1, obj.add(k2))
+ self.assertFillState(1, 1, 0x3ff, obj)
+ self.assertRefcount(2, k1) # not changed
+ self.assertRefcount(1, k2) # not incremented
+ self.assertIs(k1, obj[k1])
+ self.assertIs(k1, obj[k2])
+ self.assertRefcount(2, k1)
+ self.assertRefcount(1, k2)
+ # Deleting an entry should remove the fill, but not the used
+ obj.discard(k1)
+ self.assertFillState(0, 1, 0x3ff, obj)
+ self.assertRefcount(1, k1)
+ k3 = tuple(['bar'])
+ self.assertRefcount(1, k3)
+ self.assertIs(k3, obj.add(k3))
+ self.assertFillState(1, 2, 0x3ff, obj)
+ self.assertRefcount(2, k3)
+ self.assertIs(k2, obj.add(k2))
+ self.assertFillState(2, 2, 0x3ff, obj)
+ self.assertRefcount(1, k1)
+ self.assertRefcount(2, k2)
+ self.assertRefcount(2, k3)
+
+ def test_discard(self):
+ obj = self.module.SimpleSet()
+ k1 = tuple(['foo'])
+ k2 = tuple(['foo'])
+ k3 = tuple(['bar'])
+ self.assertRefcount(1, k1)
+ self.assertRefcount(1, k2)
+ self.assertRefcount(1, k3)
+ obj.add(k1)
+ self.assertRefcount(2, k1)
+ self.assertEqual(0, obj.discard(k3))
+ self.assertRefcount(1, k3)
+ obj.add(k3)
+ self.assertRefcount(2, k3)
+ self.assertEqual(1, obj.discard(k3))
+ self.assertRefcount(1, k3)
+
+ def test__resize(self):
+ obj = self.module.SimpleSet()
+ k1 = ('foo',)
+ k2 = ('bar',)
+ k3 = ('baz',)
+ obj.add(k1)
+ obj.add(k2)
+ obj.add(k3)
+ obj.discard(k2)
+ self.assertFillState(2, 3, 0x3ff, obj)
+ self.assertEqual(1024, obj._py_resize(500))
+ # Doesn't change the size, but does change the content
+ self.assertFillState(2, 2, 0x3ff, obj)
+ obj.add(k2)
+ obj.discard(k3)
+ self.assertFillState(2, 3, 0x3ff, obj)
+ self.assertEqual(4096, obj._py_resize(4095))
+ self.assertFillState(2, 2, 0xfff, obj)
+ self.assertIn(k1, obj)
+ self.assertIn(k2, obj)
+ self.assertNotIn(k3, obj)
+ obj.add(k2)
+ self.assertIn(k2, obj)
+ obj.discard(k2)
+ self.assertEqual((591, '<dummy>'), obj._test_lookup(k2))
+ self.assertFillState(1, 2, 0xfff, obj)
+ self.assertEqual(2048, obj._py_resize(1024))
+ self.assertFillState(1, 1, 0x7ff, obj)
+ self.assertEqual((591, '<null>'), obj._test_lookup(k2))
+
+ def test_second_hash_failure(self):
+ obj = self.module.SimpleSet()
+ k1 = _BadSecondHash(200)
+ k2 = _Hashable(200)
+ # Should only call hash() one time
+ obj.add(k1)
+ self.assertFalse(k1._first)
+ self.assertRaises(ValueError, obj.add, k2)
+
+ def test_richcompare_failure(self):
+ obj = self.module.SimpleSet()
+ k1 = _Hashable(200)
+ k2 = _BadCompare(200)
+ obj.add(k1)
+ # Tries to compare with k1, fails
+ self.assertRaises(RuntimeError, obj.add, k2)
+
+ def test_richcompare_not_implemented(self):
+ obj = self.module.SimpleSet()
+ # Even though their hashes are the same, tp_richcompare returns
+ # NotImplemented, which means we treat them as not equal
+ k1 = _NoImplementCompare(200)
+ k2 = _NoImplementCompare(200)
+ self.assertLookup(200, '<null>', obj, k1)
+ self.assertLookup(200, '<null>', obj, k2)
+ self.assertIs(k1, obj.add(k1))
+ self.assertLookup(200, k1, obj, k1)
+ self.assertLookup(201, '<null>', obj, k2)
+ self.assertIs(k2, obj.add(k2))
+ self.assertIs(k1, obj[k1])
+
+ def test_add_and_remove_lots_of_items(self):
+ obj = self.module.SimpleSet()
+ chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890'
+ for i in chars:
+ for j in chars:
+ k = (i, j)
+ obj.add(k)
+ num = len(chars)*len(chars)
+ self.assertFillState(num, num, 0x1fff, obj)
+ # Now delete all of the entries and it should shrink again
+ for i in chars:
+ for j in chars:
+ k = (i, j)
+ obj.discard(k)
+ # It should be back to 1024 wide mask, though there may still be some
+ # dummy values in there
+ self.assertFillState(0, obj.fill, 0x3ff, obj)
+ # but there should be fewer than 1/5th dummy entries
+ self.assertTrue(obj.fill < 1024 / 5)
+
+ def test__iter__(self):
+ obj = self.module.SimpleSet()
+ k1 = ('1',)
+ k2 = ('1', '2')
+ k3 = ('3', '4')
+ obj.add(k1)
+ obj.add(k2)
+ obj.add(k3)
+ all = set()
+ for key in obj:
+ all.add(key)
+ self.assertEqual(sorted([k1, k2, k3]), sorted(all))
+ iterator = iter(obj)
+ iterator.next()
+ obj.add(('foo',))
+ # Set changed size
+ self.assertRaises(RuntimeError, iterator.next)
+ # And even removing an item still causes it to fail
+ obj.discard(k2)
+ self.assertRaises(RuntimeError, iterator.next)
+
+ def test__sizeof__(self):
+ # SimpleSet needs a custom sizeof implementation, because it allocates
+ # memory that Python cannot directly see (_table).
+ # Too much variability in platform sizes for us to give a fixed size
+ # here. However without a custom implementation, __sizeof__ would give
+ # us only the size of the object, and not its table. We know the table
+ # is at least 4bytes*1024entries in size.
+ obj = self.module.SimpleSet()
+ self.assertTrue(obj.__sizeof__() > 4096)