summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rosen <sirosen@globus.org>2019-08-26 14:11:31 -0400
committerJon Parise <jon@pinterest.com>2019-08-26 11:11:31 -0700
commit5699c9dfa7067a99000e281091dd6400a1e84122 (patch)
tree92b8c74e4303906b6e8feab1263f35230bd840dd
parentf6ca790148cacb9b1144248d531adeffac0f7d3c (diff)
downloadpymemcache-5699c9dfa7067a99000e281091dd6400a1e84122.tar.gz
Change serialization interface to be an object (#245)
* Change serialization interface to be an object Rather than passing separate serialization and deserialization methods to a pymemcache client, pass an object implementing a very simple two-method interface. This is a rather significant breaking change and should be part of an x.0.0 major release. Resolves #56 As suggested in that issue, this is a cleaner interface, as there's no sensible context in which you would provide only one of these two methods and it should therefore be thought of as a serialization/deserialization protocol. Also adds a note to the documentation's Best Practices list that you should use the built-in serializer object unless you have a reason to do otherwise. * Support "de/serializer" in addition to "serde" In order to support older client usage in addition to the new serialization object (protocol), restore the "serializer" and "deserializer" arguments to the Client classes. These are marked as deprecated and will be automatically wrapped into a small "serde" object. In order to make the various object names more distinguishable and more informative, the built-in default serializer is now called "python_memcache_pickle_serde" Additionally, default client.serde to a "no-op serializer". This object does no transforms on the data. By putting this in place, we can skip some conditionals in the code around presence or absence of a serializer and therefore simplify internally (at the cost of an extra, unnecessary, functional call in some cases). It also simplifies logic around the handling of flags because we are now *guaranteed* the presence of a serializer object which returns some flags. i.e. "default flags" are no longer the responsibility of the various serializer usage sites. This is done carefully to ensure that passing a `serializer` without a `deserializer` is respected.
-rw-r--r--ChangeLog.rst6
-rw-r--r--README.rst1
-rw-r--r--docs/getting_started.rst66
-rw-r--r--pymemcache/client/base.py68
-rw-r--r--pymemcache/client/hash.py2
-rw-r--r--pymemcache/serde.py52
-rw-r--r--pymemcache/test/test_client.py29
-rw-r--r--pymemcache/test/test_integration.py31
-rw-r--r--pymemcache/test/test_serde.py23
-rw-r--r--pymemcache/test/utils.py15
10 files changed, 184 insertions, 109 deletions
diff --git a/ChangeLog.rst b/ChangeLog.rst
index 321e51c..fb8b628 100644
--- a/ChangeLog.rst
+++ b/ChangeLog.rst
@@ -7,6 +7,12 @@ New in version 3.0.0 (unreleased)
* Validate integer inputs for ``expire``, ``delay``, ``incr``, ``decr``, and
``memlimit`` -- non-integer values now raise ``MemcacheIllegalInputError``
+* The serialization API has been reworked. Instead of consuming a serializer
+ and deserializer as separate arguments, client objects now expect an argument
+ `serde` to be an object which implements `serialize` and `deserialize`
+ as methods. (`serialize` and `deserialize` are still supported but considered
+ deprecated)
+
New in version 2.2.2
--------------------
* Fix ``long_description`` string in Python packaging.
diff --git a/README.rst b/README.rst
index 8355544..4ddf3d1 100644
--- a/README.rst
+++ b/README.rst
@@ -126,6 +126,7 @@ Credits
* `Nicholas Charriere <https://github.com/nichochar>`_
* `Joe Gordon <https://github.com/jogo>`_
* `Jon Parise <https://github.com/jparise>`_
+* `Stephen Rosen <https://github.com/sirosen>`_
We're Hiring!
=============
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
index be9e0c1..4e1b9ef 100644
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
@@ -49,20 +49,20 @@ Serialization
import json
from pymemcache.client.base import Client
- def json_serializer(key, value):
- if type(value) == str:
- return value, 1
- return json.dumps(value), 2
-
- def json_deserializer(key, value, flags):
- if flags == 1:
- return value
- if flags == 2:
- return json.loads(value)
- raise Exception("Unknown serialization format")
-
- client = Client(('localhost', 11211), serializer=json_serializer,
- deserializer=json_deserializer)
+ class JsonSerde(object):
+ def serialize(self, key, value):
+ if isinstance(value, str):
+ return value, 1
+ return json.dumps(value), 2
+
+ def deserialize(self, key, value, flags):
+ if flags == 1:
+ return value
+ if flags == 2:
+ return json.loads(value)
+ raise Exception("Unknown serialization format")
+
+ client = Client(('localhost', 11211), serde=JsonSerde())
client.set('key', {'a':'b', 'c':'d'})
result = client.get('key')
@@ -77,34 +77,44 @@ pymemcache provides a default
class Foo(object):
pass
- client = Client(('localhost', 11211),
- serializer=serde.python_memcache_serializer,
- deserializer=serde.python_memcache_deserializer)
+ client = Client(('localhost', 11211), serde=serde.pickle_serde)
client.set('key', Foo())
result client.get('key')
The serializer uses the highest pickle protocol available. In order to make
sure multiple versions of Python can read the protocol version, you can specify
-the version with :func:`pymemcache.serde.get_python_memcache_serializer`.
+the version by explicitly instantiating :class:`pymemcache.serde.PickleSerde`:
.. code-block:: python
- client = Client(('localhost', 11211),
- serializer=serde.get_python_memcache_serializer(pickle_version=2),
- deserializer=serde.python_memcache_deserializer)
+ client = Client(
+ ('localhost', 11211),
+ serde=serde.PickleSerde(pickle_version=2)
+ )
Deserialization with Python 3
-----------------------------
+Values passed to the `serde.deserialize()` method will be bytestrings. It is
+therefore necessary to encode and decode them correctly. Here's a version of
+the `JsonSerde` from above which is more careful with encodings:
+
.. code-block:: python
- def json_deserializer(key, value, flags):
- if flags == 1:
- return value.decode('utf-8')
- if flags == 2:
- return json.loads(value.decode('utf-8'))
- raise Exception("Unknown serialization format")
+ class JsonSerde(object):
+ def serialize(self, key, value):
+ if isinstance(value, str):
+ return value.encode('utf-8'), 1
+ return json.dumps(value).encode('utf-8'), 2
+
+ def deserialize(self, key, value, flags):
+ if flags == 1:
+ return value.decode('utf-8')
+ if flags == 2:
+ return json.loads(value.decode('utf-8'))
+ raise Exception("Unknown serialization format")
+
Key Constraints
---------------
@@ -140,6 +150,8 @@ Best Practices
errors, from killing your web requests. Do not use this flag if you need to
know about errors from memcache, and make sure you have some other way to
detect memcache server failures.
+ - Unless you have a known reason to do otherwise, use the provided serializer
+ in `pymemcache.serde.pickle_serde` for any de/serialization of objects.
.. WARNING::
diff --git a/pymemcache/client/base.py b/pymemcache/client/base.py
index cd3f644..1214afc 100644
--- a/pymemcache/client/base.py
+++ b/pymemcache/client/base.py
@@ -17,6 +17,7 @@ import six
from pymemcache import pool
+from pymemcache.serde import LegacyWrappingSerde
from pymemcache.exceptions import (
MemcacheClientError,
MemcacheUnknownCommandError,
@@ -144,36 +145,38 @@ class Client(object):
just calling encode on the string (using UTF-8, for instance).
If you intend to use anything but str as a value, it is a good idea to use
- a serializer and deserializer. The pymemcache.serde library has some
- already implemented serializers, including one that is compatible with
- the python-memcache library.
+ a serializer. The pymemcache.serde library has an already implemented
+ serializer which pickles and unpickles data.
*Serialization and Deserialization*
- The constructor takes two optional functions, one for "serialization" of
- values, and one for "deserialization". The serialization function takes
- two arguments, a key and a value, and returns a tuple of two elements, the
- serialized value, and an integer in the range 0-65535 (the "flags"). The
- deserialization function takes three parameters, a key, value and flags
- and returns the deserialized value.
+ The constructor takes an optional object, the "serializer/deserializer"
+ ("serde"), which is responsible for both serialization and deserialization
+ of objects. That object must satisfy the serializer interface by providing
+ two methods: `serialize` and `deserialize`. `serialize` takes two
+ arguments, a key and a value, and returns a tuple of two elements, the
+ serialized value, and an integer in the range 0-65535 (the "flags").
+ `deserialize` takes three parameters, a key, value, and flags, and returns
+ the deserialized value.
Here is an example using JSON for non-str values:
.. code-block:: python
- def serialize_json(key, value):
- if type(value) == str:
- return value, 1
- return json.dumps(value), 2
+ class JSONSerde(object):
+ def serialize(self, key, value):
+ if isinstance(value, str):
+ return value, 1
+ return json.dumps(value), 2
- def deserialize_json(key, value, flags):
- if flags == 1:
- return value
+ def deserialize(self, key, value, flags):
+ if flags == 1:
+ return value
- if flags == 2:
- return json.loads(value)
+ if flags == 2:
+ return json.loads(value)
- raise Exception("Unknown flags for value: {1}".format(flags))
+ raise Exception("Unknown flags for value: {1}".format(flags))
.. note::
@@ -205,6 +208,7 @@ class Client(object):
def __init__(self,
server,
+ serde=None,
serializer=None,
deserializer=None,
connect_timeout=None,
@@ -221,8 +225,9 @@ class Client(object):
Args:
server: tuple(hostname, port) or string containing a UNIX socket path.
- serializer: optional function, see notes in the class docs.
- deserializer: optional function, see notes in the class docs.
+ serde: optional seralizer object, see notes in the class docs.
+ serializer: deprecated serialization function
+ deserializer: deprecated deserialization function
connect_timeout: optional float, seconds to wait for a connection to
the memcached server. Defaults to "forever" (uses the underlying
default socket timeout, which can be very long).
@@ -249,8 +254,7 @@ class Client(object):
call to a method on the object will do that.
"""
self.server = server
- self.serializer = serializer
- self.deserializer = deserializer
+ self.serde = serde or LegacyWrappingSerde(serializer, deserializer)
self.connect_timeout = connect_timeout
self.timeout = timeout
self.no_delay = no_delay
@@ -800,8 +804,7 @@ class Client(object):
buf, value = _readvalue(self.sock, buf, int(size))
key = remapped_keys[key]
- if self.deserializer:
- value = self.deserializer(key, value, int(flags))
+ value = self.serde.deserialize(key, value, int(flags))
if expect_cas:
return key, (value, cas), buf
@@ -864,10 +867,7 @@ class Client(object):
keys.append(key)
key = self.check_key(key)
- if self.serializer:
- data, data_flags = self.serializer(key, data)
- else:
- data_flags = 0
+ data, data_flags = self.serde.serialize(key, data)
# If 'flags' was explicitly provided, it overrides the value
# returned by the serializer.
@@ -965,10 +965,14 @@ class PooledClient(object):
eventlet lock or semaphore could be used instead)
Further arguments are interpreted as for :py:class:`.Client` constructor.
+
+ Note: if `serde` is given, the same object will be used for *all* clients
+ in the pool. Your serde object must therefore be thread-safe.
"""
def __init__(self,
server,
+ serde=None,
serializer=None,
deserializer=None,
connect_timeout=None,
@@ -983,8 +987,7 @@ class PooledClient(object):
allow_unicode_keys=False,
encoding='ascii'):
self.server = server
- self.serializer = serializer
- self.deserializer = deserializer
+ self.serde = serde or LegacyWrappingSerde(serializer, deserializer)
self.connect_timeout = connect_timeout
self.timeout = timeout
self.no_delay = no_delay
@@ -1011,8 +1014,7 @@ class PooledClient(object):
def _create_client(self):
client = Client(self.server,
- serializer=self.serializer,
- deserializer=self.deserializer,
+ serde=self.serde,
connect_timeout=self.connect_timeout,
timeout=self.timeout,
no_delay=self.no_delay,
diff --git a/pymemcache/client/hash.py b/pymemcache/client/hash.py
index cd147b2..d2a5489 100644
--- a/pymemcache/client/hash.py
+++ b/pymemcache/client/hash.py
@@ -19,6 +19,7 @@ class HashClient(object):
self,
servers,
hasher=RendezvousHash,
+ serde=None,
serializer=None,
deserializer=None,
connect_timeout=None,
@@ -81,6 +82,7 @@ class HashClient(object):
'no_delay': no_delay,
'socket_module': socket_module,
'key_prefix': key_prefix,
+ 'serde': serde,
'serializer': serializer,
'deserializer': deserializer,
'allow_unicode_keys': allow_unicode_keys,
diff --git a/pymemcache/serde.py b/pymemcache/serde.py
index 333ca3b..f2714f3 100644
--- a/pymemcache/serde.py
+++ b/pymemcache/serde.py
@@ -105,3 +105,55 @@ def python_memcache_deserializer(key, value, flags):
return None
return value
+
+
+class PickleSerde(object):
+ """
+ An object which implements the serialization/deserialization protocol for
+ :py:class:`pymemcache.client.base.Client` and its descendants using pickle_.
+
+ Serialization and deserialization are implemented as methods of this class.
+ To implement a custom serialization/deserialization method for pymemcache,
+ you should implement the same interface as the one provided by this object
+ -- :py:meth:`pymemcache.serde.PickleSerde.serialize` and
+ :py:meth:`pymemcache.serde.PickleSerde.deserialize`. Then,
+ pass your custom object to the pymemcache client object in place of
+ `PickleSerde`.
+
+ For more details on the serialization protocol, see the class documentation
+ for :py:class:`pymemcache.client.base.Client`
+
+ .. pickle: https://docs.python.org/3/library/pickle.html
+ """
+ def __init__(self, pickle_version=DEFAULT_PICKLE_VERSION):
+ self._serialize_func = get_python_memcache_serializer(pickle_version)
+
+ def serialize(self, key, value):
+ return self._serialize_func(key, value)
+
+ def deserialize(self, key, value, flags):
+ return python_memcache_deserializer(key, value, flags)
+
+
+class LegacyWrappingSerde(object):
+ """
+ This class defines how to wrap legacy de/serialization functions into a
+ 'serde' object which implements '.serialize' and '.deserialize' methods.
+ It is used automatically by pymemcache.client.base.Client when the
+ 'serializer' or 'deserializer' arguments are given.
+
+ The serializer_func and deserializer_func are expected to be None in the
+ case that they are missing.
+ """
+ def __init__(self, serializer_func, deserializer_func):
+ self.serialize = serializer_func or self._default_serialize
+ self.deserialize = deserializer_func or self._default_deserialize
+
+ def _default_serialize(self, key, value):
+ return value, 0
+
+ def _default_deserialize(self, key, value, flags):
+ return value
+
+
+pickle_serde = PickleSerde()
diff --git a/pymemcache/test/test_client.py b/pymemcache/test/test_client.py
index a84e9c5..bfee6e1 100644
--- a/pymemcache/test/test_client.py
+++ b/pymemcache/test/test_client.py
@@ -665,10 +665,14 @@ class TestClient(ClientTestMixin, unittest.TestCase):
assert result is False
def test_serialization(self):
- def _ser(key, value):
- return json.dumps(value), 0
+ class JsonSerde(object):
+ def serialize(self, key, value):
+ return json.dumps(value).encode('ascii'), 0
+
+ def deserialize(self, key, value, flags):
+ return json.loads(value.decode('ascii'))
- client = self.make_client([b'STORED\r\n'], serializer=_ser)
+ client = self.make_client([b'STORED\r\n'], serde=JsonSerde())
client.set('key', {'c': 'd'})
assert client.sock.send_bufs == [
b'set key 0 0 10 noreply\r\n{"c": "d"}\r\n'
@@ -1205,22 +1209,23 @@ class TestMockClient(ClientTestMixin, unittest.TestCase):
assert result == b'value'
def test_deserialization(self):
- def _serializer(key, value):
- if isinstance(value, dict):
- return json.dumps(value).encode('UTF-8'), 1
- return value, 0
+ class JsonSerde(object):
+ def serialize(self, key, value):
+ if isinstance(value, dict):
+ return json.dumps(value).encode('UTF-8'), 1
+ return value, 0
- def _deserializer(key, value, flags):
- if flags == 1:
- return json.loads(value.decode('UTF-8'))
- return value
+ def deserialize(self, key, value, flags):
+ if flags == 1:
+ return json.loads(value.decode('UTF-8'))
+ return value
client = self.make_client([
b'STORED\r\n',
b'VALUE key1 0 5\r\nhello\r\nEND\r\n',
b'STORED\r\n',
b'VALUE key2 0 18\r\n{"hello": "world"}\r\nEND\r\n',
- ], serializer=_serializer, deserializer=_deserializer)
+ ], serde=JsonSerde())
result = client.set(b'key1', b'hello', noreply=False)
result = client.get(b'key1')
diff --git a/pymemcache/test/test_integration.py b/pymemcache/test/test_integration.py
index ad7a6fd..9ba6f97 100644
--- a/pymemcache/test/test_integration.py
+++ b/pymemcache/test/test_integration.py
@@ -26,9 +26,8 @@ from pymemcache.exceptions import (
MemcacheClientError
)
from pymemcache.serde import (
- get_python_memcache_serializer,
- python_memcache_serializer,
- python_memcache_deserializer
+ PickleSerde,
+ pickle_serde
)
@@ -233,15 +232,16 @@ def test_misc(client_class, host, port, socket_module):
@pytest.mark.integration()
def test_serialization_deserialization(host, port, socket_module):
- def _ser(key, value):
- return json.dumps(value).encode('ascii'), 1
+ class JsonSerde(object):
+ def serialize(self, key, value):
+ return json.dumps(value).encode('ascii'), 1
- def _des(key, value, flags):
- if flags == 1:
- return json.loads(value.decode('ascii'))
- return value
+ def deserialize(self, key, value, flags):
+ if flags == 1:
+ return json.loads(value.decode('ascii'))
+ return value
- client = Client((host, port), serializer=_ser, deserializer=_des,
+ client = Client((host, port), serde=JsonSerde(),
socket_module=socket_module)
client.flush_all()
@@ -252,15 +252,14 @@ def test_serialization_deserialization(host, port, socket_module):
def serde_serialization_helper(client_class, host, port,
- socket_module, serializer):
+ socket_module, serde):
def check(value):
client.set(b'key', value, noreply=False)
result = client.get(b'key')
assert result == value
assert type(result) is type(value)
- client = client_class((host, port), serializer=serializer,
- deserializer=python_memcache_deserializer,
+ client = client_class((host, port), serde=serde,
socket_module=socket_module)
client.flush_all()
@@ -281,7 +280,7 @@ def serde_serialization_helper(client_class, host, port,
@pytest.mark.integration()
def test_serde_serialization(client_class, host, port, socket_module):
serde_serialization_helper(client_class, host, port,
- socket_module, python_memcache_serializer)
+ socket_module, pickle_serde)
@pytest.mark.integration()
@@ -289,7 +288,7 @@ def test_serde_serialization0(client_class, host, port, socket_module):
serde_serialization_helper(
client_class, host, port,
socket_module,
- get_python_memcache_serializer(pickle_version=0))
+ PickleSerde(pickle_version=0))
@pytest.mark.integration()
@@ -297,7 +296,7 @@ def test_serde_serialization2(client_class, host, port, socket_module):
serde_serialization_helper(
client_class, host, port,
socket_module,
- get_python_memcache_serializer(pickle_version=2))
+ PickleSerde(pickle_version=2))
@pytest.mark.integration()
diff --git a/pymemcache/test/test_serde.py b/pymemcache/test/test_serde.py
index e14568b..1850160 100644
--- a/pymemcache/test/test_serde.py
+++ b/pymemcache/test/test_serde.py
@@ -1,9 +1,9 @@
# -*- coding: utf-8 -*-
from unittest import TestCase
-from pymemcache.serde import (python_memcache_serializer,
- get_python_memcache_serializer,
- python_memcache_deserializer, FLAG_BYTES,
+from pymemcache.serde import (pickle_serde,
+ PickleSerde,
+ FLAG_BYTES,
FLAG_PICKLE, FLAG_INTEGER, FLAG_LONG, FLAG_TEXT)
import pytest
import six
@@ -22,10 +22,10 @@ class CustomInt(int):
@pytest.mark.unit()
class TestSerde(TestCase):
- serializer = python_memcache_serializer
+ serde = pickle_serde
def check(self, value, expected_flags):
- serialized, flags = self.serializer(b'key', value)
+ serialized, flags = self.serde.serialize(b'key', value)
assert flags == expected_flags
# pymemcache stores values as byte strings, so we immediately the value
@@ -33,7 +33,7 @@ class TestSerde(TestCase):
if not isinstance(serialized, six.binary_type):
serialized = six.text_type(serialized).encode('ascii')
- deserialized = python_memcache_deserializer(b'key', serialized, flags)
+ deserialized = self.serde.deserialize(b'key', serialized, flags)
assert deserialized == value
def test_bytes(self):
@@ -66,20 +66,21 @@ class TestSerde(TestCase):
@pytest.mark.unit()
class TestSerdePickleVersion0(TestCase):
- serializer = get_python_memcache_serializer(pickle_version=0)
+ serde = PickleSerde(pickle_version=0)
@pytest.mark.unit()
class TestSerdePickleVersion1(TestCase):
- serializer = get_python_memcache_serializer(pickle_version=1)
+ serde = PickleSerde(pickle_version=1)
@pytest.mark.unit()
class TestSerdePickleVersion2(TestCase):
- serializer = get_python_memcache_serializer(pickle_version=2)
+ serde = PickleSerde(pickle_version=2)
@pytest.mark.unit()
class TestSerdePickleVersionHighest(TestCase):
- serializer = get_python_memcache_serializer(
- pickle_version=pickle.HIGHEST_PROTOCOL)
+ serde = PickleSerde(
+ pickle_version=pickle.HIGHEST_PROTOCOL
+ )
diff --git a/pymemcache/test/utils.py b/pymemcache/test/utils.py
index 301e2e6..b5ff72c 100644
--- a/pymemcache/test/utils.py
+++ b/pymemcache/test/utils.py
@@ -10,6 +10,7 @@ import time
import six
from pymemcache.exceptions import MemcacheIllegalInputError
+from pymemcache.serde import LegacyWrappingSerde
class MockMemcacheClient(object):
@@ -20,8 +21,7 @@ class MockMemcacheClient(object):
def __init__(self,
server=None,
- serializer=None,
- deserializer=None,
+ serde=None,
connect_timeout=None,
timeout=None,
no_delay=False,
@@ -32,8 +32,7 @@ class MockMemcacheClient(object):
self._contents = {}
- self.serializer = serializer
- self.deserializer = deserializer
+ self.serde = serde or LegacyWrappingSerde(None, None)
self.allow_unicode_keys = allow_unicode_keys
# Unused, but present for interface compatibility
@@ -63,9 +62,7 @@ class MockMemcacheClient(object):
del self._contents[key]
return default
- if self.deserializer:
- return self.deserializer(key, value, flags)
- return value
+ return self.serde.deserialize(key, value, flags)
def get_many(self, keys):
out = {}
@@ -94,9 +91,7 @@ class MockMemcacheClient(object):
except (UnicodeEncodeError, UnicodeDecodeError):
raise MemcacheIllegalInputError
- flags = 0
- if self.serializer:
- value, flags = self.serializer(key, value)
+ value, flags = self.serde.serialize(key, value)
if expire:
expire += time.time()