summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Anderson <sontek@gmail.com>2015-07-02 10:39:43 -0700
committerJohn Anderson <sontek@gmail.com>2015-07-02 10:39:43 -0700
commitcff5abfad59baa593996fe7e240fd3944e2c4af2 (patch)
tree5f91ba8f0c04790309b4b00027810da749025369
parenta935261830671ba84e8c15f1c9f031512e926338 (diff)
downloadpymemcache-cff5abfad59baa593996fe7e240fd3944e2c4af2.tar.gz
Remove dependency on clandestined and use pure python murmur3
-rw-r--r--pymemcache/client/hash.py12
-rw-r--r--pymemcache/client/murmur3.py51
-rw-r--r--pymemcache/client/rendezvous.py46
-rw-r--r--pymemcache/test/test_rendezvous.py203
-rw-r--r--setup.py7
5 files changed, 311 insertions, 8 deletions
diff --git a/pymemcache/client/hash.py b/pymemcache/client/hash.py
index 7abf83b..925a72d 100644
--- a/pymemcache/client/hash.py
+++ b/pymemcache/client/hash.py
@@ -3,16 +3,11 @@ import time
import logging
from pymemcache.client.base import Client, PooledClient, _check_key
-from clandestined import RendezvousHash as RH
+from pymemcache.client.rendezvous import RendezvousHash
logger = logging.getLogger(__name__)
-class RendezvousHash(RH):
- def get_node(self, key):
- return self.find_node(key)
-
-
class HashClient(object):
"""
A client for communicating with a cluster of memcached servers
@@ -57,6 +52,11 @@ class HashClient(object):
Further arguments are interpreted as for :py:class:`.Client`
constructor.
+
+ The default ``hasher`` is using a pure python implementation that can be
+ significantly improved performance wise by switching to a C based
+ version. We recommend using ``python-clandestined`` if having a C
+ dependency is acceptable.
"""
self.clients = {}
self.retry_attempts = retry_attempts
diff --git a/pymemcache/client/murmur3.py b/pymemcache/client/murmur3.py
new file mode 100644
index 0000000..787eeaf
--- /dev/null
+++ b/pymemcache/client/murmur3.py
@@ -0,0 +1,51 @@
+def murmur3_32(data, seed=0):
+ """MurmurHash3 was written by Austin Appleby, and is placed in the
+ public domain. The author hereby disclaims copyright to this source
+ code."""
+
+ c1 = 0xcc9e2d51
+ c2 = 0x1b873593
+
+ length = len(data)
+ h1 = seed
+ roundedEnd = (length & 0xfffffffc) # round down to 4 byte block
+ for i in range(0, roundedEnd, 4):
+ # little endian load order
+ k1 = (ord(data[i]) & 0xff) | ((ord(data[i + 1]) & 0xff) << 8) | \
+ ((ord(data[i + 2]) & 0xff) << 16) | (ord(data[i + 3]) << 24)
+ k1 *= c1
+ k1 = (k1 << 15) | ((k1 & 0xffffffff) >> 17) # ROTL32(k1,15)
+ k1 *= c2
+
+ h1 ^= k1
+ h1 = (h1 << 13) | ((h1 & 0xffffffff) >> 19) # ROTL32(h1,13)
+ h1 = h1 * 5 + 0xe6546b64
+
+ # tail
+ k1 = 0
+
+ val = length & 0x03
+ if val == 3:
+ k1 = (ord(data[roundedEnd + 2]) & 0xff) << 16
+ # fallthrough
+ if val in [2, 3]:
+ k1 |= (ord(data[roundedEnd + 1]) & 0xff) << 8
+ # fallthrough
+ if val in [1, 2, 3]:
+ k1 |= ord(data[roundedEnd]) & 0xff
+ k1 *= c1
+ k1 = (k1 << 15) | ((k1 & 0xffffffff) >> 17) # ROTL32(k1,15)
+ k1 *= c2
+ h1 ^= k1
+
+ # finalization
+ h1 ^= length
+
+ # fmix(h1)
+ h1 ^= ((h1 & 0xffffffff) >> 16)
+ h1 *= 0x85ebca6b
+ h1 ^= ((h1 & 0xffffffff) >> 13)
+ h1 *= 0xc2b2ae35
+ h1 ^= ((h1 & 0xffffffff) >> 16)
+
+ return h1 & 0xffffffff
diff --git a/pymemcache/client/rendezvous.py b/pymemcache/client/rendezvous.py
new file mode 100644
index 0000000..32ecc2b
--- /dev/null
+++ b/pymemcache/client/rendezvous.py
@@ -0,0 +1,46 @@
+from pymemcache.client.murmur3 import murmur3_32
+
+
+class RendezvousHash(object):
+ """
+ Implements the Highest Random Weight (HRW) hashing algorithm most
+ commonly referred to as rendezvous hashing.
+
+ Originally developed as part of python-clandestined.
+
+ Copyright (c) 2014 Ernest W. Durbin III
+ """
+ def __init__(self, nodes=None, seed=0, hash_function=murmur3_32):
+ """
+ Constructor.
+ """
+ self.nodes = []
+ self.seed = seed
+ if nodes is not None:
+ self.nodes = nodes
+ self.hash_function = lambda x: hash_function(x, seed)
+
+ def add_node(self, node):
+ if node not in self.nodes:
+ self.nodes.append(node)
+
+ def remove_node(self, node):
+ if node in self.nodes:
+ self.nodes.remove(node)
+ else:
+ raise ValueError("No such node %s to remove" % (node))
+
+ def get_node(self, key):
+ high_score = -1
+ winner = None
+
+ for node in self.nodes:
+ score = self.hash_function(
+ "%s-%s" % (str(node), str(key)))
+
+ if score > high_score:
+ (high_score, winner) = (score, node)
+ elif score == high_score:
+ (high_score, winner) = (score, max(str(node), str(winner)))
+
+ return winner
diff --git a/pymemcache/test/test_rendezvous.py b/pymemcache/test/test_rendezvous.py
new file mode 100644
index 0000000..067b760
--- /dev/null
+++ b/pymemcache/test/test_rendezvous.py
@@ -0,0 +1,203 @@
+from pymemcache.client.rendezvous import RendezvousHash
+import pytest
+
+
+@pytest.mark.unit()
+def test_init_no_options():
+ rendezvous = RendezvousHash()
+ assert 0 == len(rendezvous.nodes)
+ assert 1361238019 == rendezvous.hash_function('6666')
+
+
+@pytest.mark.unit()
+def test_init():
+ nodes = ['0', '1', '2']
+ rendezvous = RendezvousHash(nodes=nodes)
+ assert 3 == len(rendezvous.nodes)
+ assert 1361238019 == rendezvous.hash_function('6666')
+
+
+@pytest.mark.unit()
+def test_seed():
+ rendezvous = RendezvousHash(seed=10)
+ assert 2981722772 == rendezvous.hash_function('6666')
+
+
+@pytest.mark.unit()
+def test_add_node():
+ rendezvous = RendezvousHash()
+ rendezvous.add_node('1')
+
+ assert 1 == len(rendezvous.nodes)
+ rendezvous.add_node('1')
+
+ assert 1 == len(rendezvous.nodes)
+ rendezvous.add_node('2')
+
+ assert 2 == len(rendezvous.nodes)
+ rendezvous.add_node('1')
+
+ assert 2 == len(rendezvous.nodes)
+
+
+@pytest.mark.unit()
+def test_remove_node():
+ nodes = ['0', '1', '2']
+ rendezvous = RendezvousHash(nodes=nodes)
+ rendezvous.remove_node('2')
+
+ assert 2 == len(rendezvous.nodes)
+
+ with pytest.raises(ValueError):
+ rendezvous.remove_node('2')
+
+ assert 2 == len(rendezvous.nodes)
+
+ rendezvous.remove_node('1')
+ assert 1 == len(rendezvous.nodes)
+
+ rendezvous.remove_node('0')
+ assert 0 == len(rendezvous.nodes)
+
+
+@pytest.mark.unit()
+def test_get_node():
+ nodes = ['0', '1', '2']
+ rendezvous = RendezvousHash(nodes=nodes)
+ assert '0' == rendezvous.get_node('ok')
+ assert '1' == rendezvous.get_node('mykey')
+ assert '2' == rendezvous.get_node('wat')
+
+
+@pytest.mark.unit()
+def test_get_node_after_removal():
+ nodes = ['0', '1', '2']
+ rendezvous = RendezvousHash(nodes=nodes)
+ rendezvous.remove_node('1')
+
+ assert '0' == rendezvous.get_node('ok')
+ assert '0' == rendezvous.get_node('mykey')
+ assert '2' == rendezvous.get_node('wat')
+
+
+@pytest.mark.unit()
+def test_get_node_after_addition():
+ nodes = ['0', '1', '2']
+ rendezvous = RendezvousHash(nodes=nodes)
+ assert '0' == rendezvous.get_node('ok')
+ assert '1' == rendezvous.get_node('mykey')
+ assert '2' == rendezvous.get_node('wat')
+ assert '2' == rendezvous.get_node('lol')
+ rendezvous.add_node('3')
+
+ assert '0' == rendezvous.get_node('ok')
+ assert '1' == rendezvous.get_node('mykey')
+ assert '2' == rendezvous.get_node('wat')
+ assert '3' == rendezvous.get_node('lol')
+
+
+@pytest.mark.unit()
+def test_grow():
+ rendezvous = RendezvousHash()
+
+ placements = {}
+
+ for i in range(10):
+ rendezvous.add_node(str(i))
+ placements[str(i)] = []
+
+ for i in range(1000):
+ node = rendezvous.get_node(str(i))
+ placements[node].append(i)
+
+ new_placements = {}
+
+ for i in range(20):
+ rendezvous.add_node(str(i))
+ new_placements[str(i)] = []
+
+ for i in range(1000):
+ node = rendezvous.get_node(str(i))
+ new_placements[node].append(i)
+
+ keys = [k for sublist in placements.values() for k in sublist]
+ new_keys = [k for sublist in new_placements.values() for k in sublist]
+ assert sorted(keys) == sorted(new_keys)
+
+ added = 0
+ removed = 0
+
+ for node, assignments in new_placements.items():
+ after = set(assignments)
+ before = set(placements.get(node, []))
+ removed += len(before.difference(after))
+ added += len(after.difference(before))
+
+ assert added == removed
+ assert 1062 == (added + removed)
+
+
+@pytest.mark.unit()
+def test_shrink():
+ rendezvous = RendezvousHash()
+
+ placements = {}
+ for i in range(10):
+ rendezvous.add_node(str(i))
+ placements[str(i)] = []
+
+ for i in range(1000):
+ node = rendezvous.get_node(str(i))
+ placements[node].append(i)
+
+ rendezvous.remove_node('9')
+ new_placements = {}
+ for i in range(9):
+ new_placements[str(i)] = []
+
+ for i in range(1000):
+ node = rendezvous.get_node(str(i))
+ new_placements[node].append(i)
+
+ keys = [k for sublist in placements.values() for k in sublist]
+ new_keys = [k for sublist in new_placements.values() for k in sublist]
+ assert sorted(keys) == sorted(new_keys)
+
+ added = 0
+ removed = 0
+ for node, assignments in placements.items():
+ after = set(assignments)
+ before = set(new_placements.get(node, []))
+ removed += len(before.difference(after))
+ added += len(after.difference(before))
+
+ assert added == removed
+ assert 202 == (added + removed)
+
+
+def collide(key, seed):
+ return 1337
+
+
+@pytest.mark.unit()
+def test_rendezvous_collision():
+ nodes = ['c', 'b', 'a']
+ rendezvous = RendezvousHash(nodes, hash_function=collide)
+
+ for i in range(1000):
+ assert 'c' == rendezvous.get_node(i)
+
+
+@pytest.mark.unit()
+def test_rendezvous_names():
+ nodes = [1, 2, 3, 'a', 'b', 'lol.wat.com']
+ rendezvous = RendezvousHash(nodes, hash_function=collide)
+
+ for i in range(10):
+ assert 'lol.wat.com' == rendezvous.get_node(i)
+
+ nodes = [1, 'a', '0']
+ rendezvous = RendezvousHash(nodes, hash_function=collide)
+
+ for i in range(10):
+ assert 'a' == rendezvous.get_node(i)
diff --git a/setup.py b/setup.py
index f053090..9183a9d 100644
--- a/setup.py
+++ b/setup.py
@@ -1,16 +1,16 @@
#!/usr/bin/env python
from setuptools import setup, find_packages
-
from pymemcache import __version__
+
setup(
name='pymemcache',
version=__version__,
author='Charles Gordon',
author_email='charles@pinterest.com',
packages=find_packages(),
- install_requires=['six', 'clandestined'],
+ install_requires=['six'],
description='A comprehensive, fast, pure Python memcached client',
long_description=open('README.md').read(),
license='Apache License 2.0',
@@ -20,6 +20,9 @@ setup(
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.3',
+ 'Programming Language :: Python :: 3.4',
+ 'Programming Language :: Python :: PyPy',
+ 'Programming Language :: Python :: PyPy3',
'License :: OSI Approved :: Apache Software License',
'Topic :: Database',
],