diff options
author | John Anderson <sontek@gmail.com> | 2015-07-02 10:39:43 -0700 |
---|---|---|
committer | John Anderson <sontek@gmail.com> | 2015-07-02 10:39:43 -0700 |
commit | cff5abfad59baa593996fe7e240fd3944e2c4af2 (patch) | |
tree | 5f91ba8f0c04790309b4b00027810da749025369 | |
parent | a935261830671ba84e8c15f1c9f031512e926338 (diff) | |
download | pymemcache-cff5abfad59baa593996fe7e240fd3944e2c4af2.tar.gz |
Remove dependency on clandestined and use pure python murmur3
-rw-r--r-- | pymemcache/client/hash.py | 12 | ||||
-rw-r--r-- | pymemcache/client/murmur3.py | 51 | ||||
-rw-r--r-- | pymemcache/client/rendezvous.py | 46 | ||||
-rw-r--r-- | pymemcache/test/test_rendezvous.py | 203 | ||||
-rw-r--r-- | setup.py | 7 |
5 files changed, 311 insertions, 8 deletions
diff --git a/pymemcache/client/hash.py b/pymemcache/client/hash.py index 7abf83b..925a72d 100644 --- a/pymemcache/client/hash.py +++ b/pymemcache/client/hash.py @@ -3,16 +3,11 @@ import time import logging from pymemcache.client.base import Client, PooledClient, _check_key -from clandestined import RendezvousHash as RH +from pymemcache.client.rendezvous import RendezvousHash logger = logging.getLogger(__name__) -class RendezvousHash(RH): - def get_node(self, key): - return self.find_node(key) - - class HashClient(object): """ A client for communicating with a cluster of memcached servers @@ -57,6 +52,11 @@ class HashClient(object): Further arguments are interpreted as for :py:class:`.Client` constructor. + + The default ``hasher`` is using a pure python implementation that can be + significantly improved performance wise by switching to a C based + version. We recommend using ``python-clandestined`` if having a C + dependency is acceptable. """ self.clients = {} self.retry_attempts = retry_attempts diff --git a/pymemcache/client/murmur3.py b/pymemcache/client/murmur3.py new file mode 100644 index 0000000..787eeaf --- /dev/null +++ b/pymemcache/client/murmur3.py @@ -0,0 +1,51 @@ +def murmur3_32(data, seed=0): + """MurmurHash3 was written by Austin Appleby, and is placed in the + public domain. The author hereby disclaims copyright to this source + code.""" + + c1 = 0xcc9e2d51 + c2 = 0x1b873593 + + length = len(data) + h1 = seed + roundedEnd = (length & 0xfffffffc) # round down to 4 byte block + for i in range(0, roundedEnd, 4): + # little endian load order + k1 = (ord(data[i]) & 0xff) | ((ord(data[i + 1]) & 0xff) << 8) | \ + ((ord(data[i + 2]) & 0xff) << 16) | (ord(data[i + 3]) << 24) + k1 *= c1 + k1 = (k1 << 15) | ((k1 & 0xffffffff) >> 17) # ROTL32(k1,15) + k1 *= c2 + + h1 ^= k1 + h1 = (h1 << 13) | ((h1 & 0xffffffff) >> 19) # ROTL32(h1,13) + h1 = h1 * 5 + 0xe6546b64 + + # tail + k1 = 0 + + val = length & 0x03 + if val == 3: + k1 = (ord(data[roundedEnd + 2]) & 0xff) << 16 + # fallthrough + if val in [2, 3]: + k1 |= (ord(data[roundedEnd + 1]) & 0xff) << 8 + # fallthrough + if val in [1, 2, 3]: + k1 |= ord(data[roundedEnd]) & 0xff + k1 *= c1 + k1 = (k1 << 15) | ((k1 & 0xffffffff) >> 17) # ROTL32(k1,15) + k1 *= c2 + h1 ^= k1 + + # finalization + h1 ^= length + + # fmix(h1) + h1 ^= ((h1 & 0xffffffff) >> 16) + h1 *= 0x85ebca6b + h1 ^= ((h1 & 0xffffffff) >> 13) + h1 *= 0xc2b2ae35 + h1 ^= ((h1 & 0xffffffff) >> 16) + + return h1 & 0xffffffff diff --git a/pymemcache/client/rendezvous.py b/pymemcache/client/rendezvous.py new file mode 100644 index 0000000..32ecc2b --- /dev/null +++ b/pymemcache/client/rendezvous.py @@ -0,0 +1,46 @@ +from pymemcache.client.murmur3 import murmur3_32 + + +class RendezvousHash(object): + """ + Implements the Highest Random Weight (HRW) hashing algorithm most + commonly referred to as rendezvous hashing. + + Originally developed as part of python-clandestined. + + Copyright (c) 2014 Ernest W. Durbin III + """ + def __init__(self, nodes=None, seed=0, hash_function=murmur3_32): + """ + Constructor. + """ + self.nodes = [] + self.seed = seed + if nodes is not None: + self.nodes = nodes + self.hash_function = lambda x: hash_function(x, seed) + + def add_node(self, node): + if node not in self.nodes: + self.nodes.append(node) + + def remove_node(self, node): + if node in self.nodes: + self.nodes.remove(node) + else: + raise ValueError("No such node %s to remove" % (node)) + + def get_node(self, key): + high_score = -1 + winner = None + + for node in self.nodes: + score = self.hash_function( + "%s-%s" % (str(node), str(key))) + + if score > high_score: + (high_score, winner) = (score, node) + elif score == high_score: + (high_score, winner) = (score, max(str(node), str(winner))) + + return winner diff --git a/pymemcache/test/test_rendezvous.py b/pymemcache/test/test_rendezvous.py new file mode 100644 index 0000000..067b760 --- /dev/null +++ b/pymemcache/test/test_rendezvous.py @@ -0,0 +1,203 @@ +from pymemcache.client.rendezvous import RendezvousHash +import pytest + + +@pytest.mark.unit() +def test_init_no_options(): + rendezvous = RendezvousHash() + assert 0 == len(rendezvous.nodes) + assert 1361238019 == rendezvous.hash_function('6666') + + +@pytest.mark.unit() +def test_init(): + nodes = ['0', '1', '2'] + rendezvous = RendezvousHash(nodes=nodes) + assert 3 == len(rendezvous.nodes) + assert 1361238019 == rendezvous.hash_function('6666') + + +@pytest.mark.unit() +def test_seed(): + rendezvous = RendezvousHash(seed=10) + assert 2981722772 == rendezvous.hash_function('6666') + + +@pytest.mark.unit() +def test_add_node(): + rendezvous = RendezvousHash() + rendezvous.add_node('1') + + assert 1 == len(rendezvous.nodes) + rendezvous.add_node('1') + + assert 1 == len(rendezvous.nodes) + rendezvous.add_node('2') + + assert 2 == len(rendezvous.nodes) + rendezvous.add_node('1') + + assert 2 == len(rendezvous.nodes) + + +@pytest.mark.unit() +def test_remove_node(): + nodes = ['0', '1', '2'] + rendezvous = RendezvousHash(nodes=nodes) + rendezvous.remove_node('2') + + assert 2 == len(rendezvous.nodes) + + with pytest.raises(ValueError): + rendezvous.remove_node('2') + + assert 2 == len(rendezvous.nodes) + + rendezvous.remove_node('1') + assert 1 == len(rendezvous.nodes) + + rendezvous.remove_node('0') + assert 0 == len(rendezvous.nodes) + + +@pytest.mark.unit() +def test_get_node(): + nodes = ['0', '1', '2'] + rendezvous = RendezvousHash(nodes=nodes) + assert '0' == rendezvous.get_node('ok') + assert '1' == rendezvous.get_node('mykey') + assert '2' == rendezvous.get_node('wat') + + +@pytest.mark.unit() +def test_get_node_after_removal(): + nodes = ['0', '1', '2'] + rendezvous = RendezvousHash(nodes=nodes) + rendezvous.remove_node('1') + + assert '0' == rendezvous.get_node('ok') + assert '0' == rendezvous.get_node('mykey') + assert '2' == rendezvous.get_node('wat') + + +@pytest.mark.unit() +def test_get_node_after_addition(): + nodes = ['0', '1', '2'] + rendezvous = RendezvousHash(nodes=nodes) + assert '0' == rendezvous.get_node('ok') + assert '1' == rendezvous.get_node('mykey') + assert '2' == rendezvous.get_node('wat') + assert '2' == rendezvous.get_node('lol') + rendezvous.add_node('3') + + assert '0' == rendezvous.get_node('ok') + assert '1' == rendezvous.get_node('mykey') + assert '2' == rendezvous.get_node('wat') + assert '3' == rendezvous.get_node('lol') + + +@pytest.mark.unit() +def test_grow(): + rendezvous = RendezvousHash() + + placements = {} + + for i in range(10): + rendezvous.add_node(str(i)) + placements[str(i)] = [] + + for i in range(1000): + node = rendezvous.get_node(str(i)) + placements[node].append(i) + + new_placements = {} + + for i in range(20): + rendezvous.add_node(str(i)) + new_placements[str(i)] = [] + + for i in range(1000): + node = rendezvous.get_node(str(i)) + new_placements[node].append(i) + + keys = [k for sublist in placements.values() for k in sublist] + new_keys = [k for sublist in new_placements.values() for k in sublist] + assert sorted(keys) == sorted(new_keys) + + added = 0 + removed = 0 + + for node, assignments in new_placements.items(): + after = set(assignments) + before = set(placements.get(node, [])) + removed += len(before.difference(after)) + added += len(after.difference(before)) + + assert added == removed + assert 1062 == (added + removed) + + +@pytest.mark.unit() +def test_shrink(): + rendezvous = RendezvousHash() + + placements = {} + for i in range(10): + rendezvous.add_node(str(i)) + placements[str(i)] = [] + + for i in range(1000): + node = rendezvous.get_node(str(i)) + placements[node].append(i) + + rendezvous.remove_node('9') + new_placements = {} + for i in range(9): + new_placements[str(i)] = [] + + for i in range(1000): + node = rendezvous.get_node(str(i)) + new_placements[node].append(i) + + keys = [k for sublist in placements.values() for k in sublist] + new_keys = [k for sublist in new_placements.values() for k in sublist] + assert sorted(keys) == sorted(new_keys) + + added = 0 + removed = 0 + for node, assignments in placements.items(): + after = set(assignments) + before = set(new_placements.get(node, [])) + removed += len(before.difference(after)) + added += len(after.difference(before)) + + assert added == removed + assert 202 == (added + removed) + + +def collide(key, seed): + return 1337 + + +@pytest.mark.unit() +def test_rendezvous_collision(): + nodes = ['c', 'b', 'a'] + rendezvous = RendezvousHash(nodes, hash_function=collide) + + for i in range(1000): + assert 'c' == rendezvous.get_node(i) + + +@pytest.mark.unit() +def test_rendezvous_names(): + nodes = [1, 2, 3, 'a', 'b', 'lol.wat.com'] + rendezvous = RendezvousHash(nodes, hash_function=collide) + + for i in range(10): + assert 'lol.wat.com' == rendezvous.get_node(i) + + nodes = [1, 'a', '0'] + rendezvous = RendezvousHash(nodes, hash_function=collide) + + for i in range(10): + assert 'a' == rendezvous.get_node(i) @@ -1,16 +1,16 @@ #!/usr/bin/env python from setuptools import setup, find_packages - from pymemcache import __version__ + setup( name='pymemcache', version=__version__, author='Charles Gordon', author_email='charles@pinterest.com', packages=find_packages(), - install_requires=['six', 'clandestined'], + install_requires=['six'], description='A comprehensive, fast, pure Python memcached client', long_description=open('README.md').read(), license='Apache License 2.0', @@ -20,6 +20,9 @@ setup( 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: PyPy', + 'Programming Language :: Python :: PyPy3', 'License :: OSI Approved :: Apache Software License', 'Topic :: Database', ], |