diff options
-rw-r--r-- | .travis.yml | 40 | ||||
-rw-r--r-- | ChangeLog.rst | 28 | ||||
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | README.rst | 2 | ||||
-rwxr-xr-x | docker/runtests.sh | 14 | ||||
-rw-r--r-- | msgpack/_packer.pyx | 95 | ||||
-rw-r--r-- | msgpack/_unpacker.pyx | 160 | ||||
-rw-r--r-- | msgpack/exceptions.py | 22 | ||||
-rw-r--r-- | msgpack/fallback.py | 584 | ||||
-rw-r--r-- | msgpack/pack.h | 2 | ||||
-rwxr-xr-x | setup.py | 7 | ||||
-rw-r--r-- | test/test_buffer.py | 9 | ||||
-rw-r--r-- | test/test_limits.py | 23 | ||||
-rw-r--r-- | test/test_memoryview.py | 112 | ||||
-rw-r--r-- | test/test_stricttype.py | 15 | ||||
-rw-r--r-- | tox.ini | 3 |
16 files changed, 754 insertions, 368 deletions
diff --git a/.travis.yml b/.travis.yml index 2ba2caa..b4396cb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,25 +1,39 @@ sudo: false +language: python +python: 3.5 cache: directories: - $HOME/.cache/pip -language: python -python: - - 3.5 branches: - only: - - master + only: + - master env: - - TOXENV=py26-c,py27-c - - TOXENV=py32-c,py33-c,py34-c,py35-c - - TOXENV=py26-pure,py27-pure - - TOXENV=py32-pure,py33-pure,py34-pure,py35-pure - - TOXENV=pypy-pure,pypy3-pure + - TOXENV=py27-c,py33-c,py34-c,py35-c + - TOXENV=py27-pure,py33-pure,py34-pure,py35-pure + - TOXENV=pypy-pure,pypy3-pure + +matrix: + include: + - sudo: required + services: + - docker + env: + - DOCKER_IMAGE=quay.io/pypa/manylinux1_i686 + install: + - pip install -U pip + - pip install cython + - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx + - docker pull $DOCKER_IMAGE + script: + - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh install: - - pip install tox - - pip install cython --install-option=--cython-with-refnanny --install-option=--no-cython-compile - - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx + - pip install -U pip + - pip install tox cython + - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx script: tox + +# vim: sw=2 ts=2 diff --git a/ChangeLog.rst b/ChangeLog.rst index 35535b4..396ccb7 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,7 +1,33 @@ -0.4.7 +0.5.0 ===== :release date: TBD +0.5 is important step toward 1.0. There are some deprecations. +Please read changes carefully. + +Changes +------- + +* Drop Python 2.6 and 3.2 support + +* Deprecate useless custom exceptions. Use ValueError instead of PackValueError, + Exception instead of PackException and UnpackException, etc... + See msgpack/exceptions.py + +* Add `strict_types` option to packer. It can be used to serialize subclass of + builtin types. For example, when packing object which type is subclass of dict, + `default()` is called. + +* Pure Python implementation supports packing memoryview object. + +Bugs fixed +---------- + + +0.4.7 +===== +:release date: 2016-01-25 + Bugs fixed ---------- @@ -20,3 +20,9 @@ python3: cython test: py.test test + +.PHONY: clean +clean: + rm -rf build + rm msgpack/*.so + rm -rf msgpack/__pycache__ @@ -6,7 +6,7 @@ MessagePack for Python :version: 0.4.6 :date: 2015-03-13 -.. image:: https://secure.travis-ci.org/msgpack/msgpack-python.png +.. image:: https://secure.travis-ci.org/msgpack/msgpack-python.svg :target: https://travis-ci.org/#!/msgpack/msgpack-python diff --git a/docker/runtests.sh b/docker/runtests.sh new file mode 100755 index 0000000..0d74802 --- /dev/null +++ b/docker/runtests.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e -x + +for V in cp35-cp35m cp34-cp34m cp27-cp27m cp27-cp27mu; do + PYBIN=/opt/python/$V/bin + $PYBIN/python setup.py install + rm -rf build/ # Avoid lib build by narrow Python is used by wide python + $PYBIN/pip install pytest + pushd test # prevent importing msgpack package in current directory. + $PYBIN/python -c 'import sys; print(hex(sys.maxsize))' + $PYBIN/python -c 'from msgpack import _packer, _unpacker' + $PYBIN/py.test -v + popd +done diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 872465b..5c950ce 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -2,14 +2,16 @@ #cython: embedsignature=True from cpython cimport * -from libc.stdlib cimport * -from libc.string cimport * -from libc.limits cimport * -from msgpack.exceptions import PackValueError +from msgpack.exceptions import PackValueError, PackOverflowError from msgpack import ExtType +cdef extern from "Python.h": + + int PyMemoryView_Check(object obj) + + cdef extern from "pack.h": struct msgpack_packer: char* buf @@ -34,6 +36,7 @@ cdef extern from "pack.h": int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) cdef int DEFAULT_RECURSE_LIMIT=511 +cdef size_t ITEM_LIMIT = (2**32)-1 cdef class Packer(object): @@ -63,6 +66,13 @@ cdef class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. It also enable str8 type for unicode. + :param bool strict_types: + If set to true, types will be checked to be exact. Derived classes + from serializeable types will not be serialized and will be + treated as unsupported type and forwarded to default. + Additionally tuples will not be serialized as lists. + This is useful when trying to implement accurate serialization + for python types. """ cdef msgpack_packer pk cdef object _default @@ -70,22 +80,23 @@ cdef class Packer(object): cdef object _berrors cdef char *encoding cdef char *unicode_errors + cdef bint strict_types cdef bool use_float cdef bint autoreset def __cinit__(self): cdef int buf_size = 1024*1024 - self.pk.buf = <char*> malloc(buf_size); + self.pk.buf = <char*> PyMem_Malloc(buf_size) if self.pk.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") self.pk.buf_size = buf_size self.pk.length = 0 def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - use_single_float=False, bint autoreset=1, bint use_bin_type=0): - """ - """ + use_single_float=False, bint autoreset=1, bint use_bin_type=0, + bint strict_types=0): self.use_float = use_single_float + self.strict_types = strict_types self.autoreset = autoreset self.pk.use_bin_type = use_bin_type if default is not None: @@ -108,7 +119,8 @@ cdef class Packer(object): self.unicode_errors = PyBytes_AsString(self._berrors) def __dealloc__(self): - free(self.pk.buf); + PyMem_Free(self.pk.buf) + self.pk.buf = NULL cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: cdef long long llval @@ -121,6 +133,8 @@ cdef class Packer(object): cdef dict d cdef size_t L cdef int default_used = 0 + cdef bint strict_types = self.strict_types + cdef Py_buffer view if nest_limit < 0: raise PackValueError("recursion limit exceeded.") @@ -128,12 +142,12 @@ cdef class Packer(object): while True: if o is None: ret = msgpack_pack_nil(&self.pk) - elif isinstance(o, bool): + elif PyBool_Check(o) if strict_types else isinstance(o, bool): if o: ret = msgpack_pack_true(&self.pk) else: ret = msgpack_pack_false(&self.pk) - elif PyLong_Check(o): + elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o): # PyInt_Check(long) is True for Python 3. # So we should test long before int. try: @@ -149,32 +163,32 @@ cdef class Packer(object): default_used = True continue else: - raise - elif PyInt_Check(o): + raise PackOverflowError("Integer value out of range") + elif PyInt_CheckExact(o) if strict_types else PyInt_Check(o): longval = o ret = msgpack_pack_long(&self.pk, longval) - elif PyFloat_Check(o): + elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o): if self.use_float: fval = o ret = msgpack_pack_float(&self.pk, fval) else: dval = o ret = msgpack_pack_double(&self.pk, dval) - elif PyBytes_Check(o): + elif PyBytes_CheckExact(o) if strict_types else PyBytes_Check(o): L = len(o) - if L > (2**32)-1: - raise ValueError("bytes is too large") + if L > ITEM_LIMIT: + raise PackValueError("bytes is too large") rawval = o ret = msgpack_pack_bin(&self.pk, L) if ret == 0: ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_Check(o): + elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): if not self.encoding: raise TypeError("Can't encode unicode string: no encoding is specified") o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) L = len(o) - if L > (2**32)-1: - raise ValueError("unicode string is too large") + if L > ITEM_LIMIT: + raise PackValueError("unicode string is too large") rawval = o ret = msgpack_pack_raw(&self.pk, L) if ret == 0: @@ -182,8 +196,8 @@ cdef class Packer(object): elif PyDict_CheckExact(o): d = <dict>o L = len(d) - if L > (2**32)-1: - raise ValueError("dict is too large") + if L > ITEM_LIMIT: + raise PackValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: for k, v in d.iteritems(): @@ -191,10 +205,10 @@ cdef class Packer(object): if ret != 0: break ret = self._pack(v, nest_limit-1) if ret != 0: break - elif PyDict_Check(o): + elif not strict_types and PyDict_Check(o): L = len(o) - if L > (2**32)-1: - raise ValueError("dict is too large") + if L > ITEM_LIMIT: + raise PackValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: for k, v in o.items(): @@ -202,24 +216,35 @@ cdef class Packer(object): if ret != 0: break ret = self._pack(v, nest_limit-1) if ret != 0: break - elif isinstance(o, ExtType): + elif type(o) is ExtType if strict_types else isinstance(o, ExtType): # This should be before Tuple because ExtType is namedtuple. longval = o.code rawval = o.data L = len(o.data) - if L > (2**32)-1: - raise ValueError("EXT data is too large") + if L > ITEM_LIMIT: + raise PackValueError("EXT data is too large") ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyTuple_Check(o) or PyList_Check(o): + elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): L = len(o) - if L > (2**32)-1: - raise ValueError("list is too large") + if L > ITEM_LIMIT: + raise PackValueError("list is too large") ret = msgpack_pack_array(&self.pk, L) if ret == 0: for v in o: ret = self._pack(v, nest_limit-1) if ret != 0: break + elif PyMemoryView_Check(o): + if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0: + raise PackValueError("could not get buffer for memoryview") + L = view.len + if L > ITEM_LIMIT: + PyBuffer_Release(&view); + raise PackValueError("memoryview is too large") + ret = msgpack_pack_bin(&self.pk, L) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, <char*>view.buf, L) + PyBuffer_Release(&view); elif not default_used and self._default: o = self._default(o) default_used = 1 @@ -245,8 +270,8 @@ cdef class Packer(object): msgpack_pack_raw_body(&self.pk, data, len(data)) def pack_array_header(self, long long size): - if size > (2**32-1): - raise ValueError + if size > ITEM_LIMIT: + raise PackValueError cdef int ret = msgpack_pack_array(&self.pk, size) if ret == -1: raise MemoryError @@ -258,8 +283,8 @@ cdef class Packer(object): return buf def pack_map_header(self, long long size): - if size > (2**32-1): - raise ValueError + if size > ITEM_LIMIT: + raise PackValueError cdef int ret = msgpack_pack_map(&self.pk, size) if ret == -1: raise MemoryError diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 1aefc64..f6e06b0 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -1,21 +1,41 @@ # coding: utf-8 #cython: embedsignature=True -from cpython cimport * +from cpython.bytes cimport ( + PyBytes_AsString, + PyBytes_FromStringAndSize, + PyBytes_Size, +) +from cpython.buffer cimport ( + Py_buffer, + PyObject_CheckBuffer, + PyObject_GetBuffer, + PyBuffer_Release, + PyBuffer_IsContiguous, + PyBUF_READ, + PyBUF_SIMPLE, + PyBUF_FULL_RO, +) +from cpython.mem cimport PyMem_Malloc, PyMem_Free +from cpython.object cimport PyCallable_Check +from cpython.ref cimport Py_DECREF +from cpython.exc cimport PyErr_WarnEx + cdef extern from "Python.h": ctypedef struct PyObject cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 + object PyMemoryView_GetContiguous(object obj, int buffertype, char order) from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * from msgpack.exceptions import ( - BufferFull, - OutOfData, - UnpackValueError, - ExtraData, - ) + BufferFull, + OutOfData, + UnpackValueError, + ExtraData, +) from msgpack import ExtType @@ -97,6 +117,42 @@ cdef inline init_ctx(unpack_context *ctx, def default_read_extended_type(typecode, data): raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) +cdef inline int get_data_from_buffer(object obj, + Py_buffer *view, + char **buf, + Py_ssize_t *buffer_len, + int *new_protocol) except 0: + cdef object contiguous + cdef Py_buffer tmp + if PyObject_CheckBuffer(obj): + new_protocol[0] = 1 + if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1: + raise + if view.itemsize != 1: + PyBuffer_Release(view) + raise BufferError("cannot unpack from multi-byte object") + if PyBuffer_IsContiguous(view, 'A') == 0: + PyBuffer_Release(view) + # create a contiguous copy and get buffer + contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C') + PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE) + # view must hold the only reference to contiguous, + # so memory is freed when view is released + Py_DECREF(contiguous) + buffer_len[0] = view.len + buf[0] = <char*> view.buf + return 1 + else: + new_protocol[0] = 0 + if PyObject_AsReadBuffer(obj, <const void**> buf, buffer_len) == -1: + raise BufferError("could not get memoryview") + PyErr_WarnEx(RuntimeWarning, + "using old buffer interface to unpack %s; " + "this leads to unpacking errors if slicing is used and " + "will be removed in a future version" % type(obj), + 1) + return 1 + def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", object_pairs_hook=None, ext_hook=ExtType, @@ -116,27 +172,34 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cdef Py_ssize_t off = 0 cdef int ret - cdef char* buf + cdef Py_buffer view + cdef char* buf = NULL cdef Py_ssize_t buf_len cdef char* cenc = NULL cdef char* cerr = NULL + cdef int new_protocol = 0 - PyObject_AsReadBuffer(packed, <const void**>&buf, &buf_len) + get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) - if encoding is not None: - if isinstance(encoding, unicode): - encoding = encoding.encode('ascii') - cenc = PyBytes_AsString(encoding) + try: + if encoding is not None: + if isinstance(encoding, unicode): + encoding = encoding.encode('ascii') + cenc = PyBytes_AsString(encoding) - if unicode_errors is not None: - if isinstance(unicode_errors, unicode): - unicode_errors = unicode_errors.encode('ascii') - cerr = PyBytes_AsString(unicode_errors) + if unicode_errors is not None: + if isinstance(unicode_errors, unicode): + unicode_errors = unicode_errors.encode('ascii') + cerr = PyBytes_AsString(unicode_errors) + + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, + use_list, cenc, cerr, + max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) + ret = unpack_construct(&ctx, buf, buf_len, &off) + finally: + if new_protocol: + PyBuffer_Release(&view); - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, cenc, cerr, - max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) - ret = unpack_construct(&ctx, buf, buf_len, &off) if ret == 1: obj = unpack_data(&ctx) if off < buf_len: @@ -256,7 +319,7 @@ cdef class Unpacker(object): self.buf = NULL def __dealloc__(self): - free(self.buf) + PyMem_Free(self.buf) self.buf = NULL def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, @@ -289,7 +352,7 @@ cdef class Unpacker(object): read_size = min(max_buffer_size, 1024**2) self.max_buffer_size = max_buffer_size self.read_size = read_size - self.buf = <char*>malloc(read_size) + self.buf = <char*>PyMem_Malloc(read_size) if self.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") self.buf_size = read_size @@ -322,14 +385,20 @@ cdef class Unpacker(object): def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" cdef Py_buffer pybuff + cdef int new_protocol = 0 + cdef char* buf + cdef Py_ssize_t buf_len + if self.file_like is not None: raise AssertionError( "unpacker.feed() is not be able to use with `file_like`.") - PyObject_GetBuffer(next_bytes, &pybuff, PyBUF_SIMPLE) + + get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len, &new_protocol) try: - self.append_buffer(<char*>pybuff.buf, pybuff.len) + self.append_buffer(buf, buf_len) finally: - PyBuffer_Release(&pybuff) + if new_protocol: + PyBuffer_Release(&pybuff) cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): cdef: @@ -352,13 +421,13 @@ cdef class Unpacker(object): if new_size > self.max_buffer_size: raise BufferFull new_size = min(new_size*2, self.max_buffer_size) - new_buf = <char*>malloc(new_size) + new_buf = <char*>PyMem_Malloc(new_size) if new_buf == NULL: # self.buf still holds old buffer and will be freed during # obj destruction raise MemoryError("Unable to enlarge internal buffer.") memcpy(new_buf, buf + head, tail - head) - free(buf) + PyMem_Free(buf) buf = new_buf buf_size = new_size @@ -397,24 +466,27 @@ cdef class Unpacker(object): else: raise OutOfData("No more data to unpack.") - ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) - if write_bytes is not None: - write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) - - if ret == 1: - obj = unpack_data(&self.ctx) - unpack_init(&self.ctx) - return obj - elif ret == 0: - if self.file_like is not None: - self.read_from_file() - continue - if iter: - raise StopIteration("No more data to unpack.") + try: + ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + if write_bytes is not None: + write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) + + if ret == 1: + obj = unpack_data(&self.ctx) + unpack_init(&self.ctx) + return obj + elif ret == 0: + if self.file_like is not None: + self.read_from_file() + continue + if iter: + raise StopIteration("No more data to unpack.") + else: + raise OutOfData("No more data to unpack.") else: - raise OutOfData("No more data to unpack.") - else: - raise ValueError("Unpack failed: error = %d" % (ret,)) + raise UnpackValueError("Unpack failed: error = %d" % (ret,)) + except ValueError as e: + raise UnpackValueError(e) def read_bytes(self, Py_ssize_t nbytes): """Read a specified number of raw bytes from the stream""" diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py index f7678f1..9766881 100644 --- a/msgpack/exceptions.py +++ b/msgpack/exceptions.py @@ -1,5 +1,5 @@ class UnpackException(Exception): - pass + """Deprecated. Use Exception instead to catch all exception during unpacking.""" class BufferFull(UnpackException): @@ -11,10 +11,10 @@ class OutOfData(UnpackException): class UnpackValueError(UnpackException, ValueError): - pass + """Deprecated. Use ValueError instead.""" -class ExtraData(ValueError): +class ExtraData(UnpackValueError): def __init__(self, unpacked, extra): self.unpacked = unpacked self.extra = extra @@ -22,8 +22,20 @@ class ExtraData(ValueError): def __str__(self): return "unpack(b) received extra data." + class PackException(Exception): - pass + """Deprecated. Use Exception instead to catch all exception during packing.""" + class PackValueError(PackException, ValueError): - pass + """PackValueError is raised when type of input data is supported but it's value is unsupported. + + Deprecated. Use ValueError instead. + """ + + +class PackOverflowError(PackValueError, OverflowError): + """PackOverflowError is raised when integer value is out of range of msgpack support [-2**31, 2**32). + + Deprecated. Use ValueError instead. + """ diff --git a/msgpack/fallback.py b/msgpack/fallback.py index f682611..11087eb 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,8 +1,8 @@ """Fallback pure Python implementation of msgpack""" import sys -import array import struct +import warnings if sys.version_info[0] == 3: PY3 = True @@ -36,6 +36,8 @@ if hasattr(sys, 'pypy_version_info'): else: self.builder = StringBuilder() def write(self, s): + if isinstance(s, memoryview): + s = s.tobytes() self.builder.append(s) def getvalue(self): return self.builder.build() @@ -44,11 +46,13 @@ else: from io import BytesIO as StringIO newlist_hint = lambda size: [] + from msgpack.exceptions import ( BufferFull, OutOfData, UnpackValueError, PackValueError, + PackOverflowError, ExtraData) from msgpack import ExtType @@ -69,6 +73,31 @@ TYPE_EXT = 5 DEFAULT_RECURSE_LIMIT = 511 +def _check_type_strict(obj, t, type=type, tuple=tuple): + if type(t) is tuple: + return type(obj) in t + else: + return type(obj) is t + + +def _get_data_from_buffer(obj): + try: + view = memoryview(obj) + except TypeError: + # try to use legacy buffer protocol if 2.7, otherwise re-raise + if not PY3: + view = memoryview(buffer(obj)) + warnings.warn("using old buffer interface to unpack %s; " + "this leads to unpacking errors if slicing is used and " + "will be removed in a future version" % type(obj), + RuntimeWarning) + else: + raise + if view.itemsize != 1: + raise ValueError("cannot unpack from multi-byte object") + return view + + def unpack(stream, **kwargs): """ Unpack an object from `stream`. @@ -76,11 +105,8 @@ def unpack(stream, **kwargs): Raises `ExtraData` when `packed` contains extra bytes. See :class:`Unpacker` for options. """ - unpacker = Unpacker(stream, **kwargs) - ret = unpacker._fb_unpack() - if unpacker._fb_got_extradata(): - raise ExtraData(ret, unpacker._fb_get_extradata()) - return ret + data = stream.read() + return unpackb(data, **kwargs) def unpackb(packed, **kwargs): @@ -93,11 +119,11 @@ def unpackb(packed, **kwargs): unpacker = Unpacker(None, **kwargs) unpacker.feed(packed) try: - ret = unpacker._fb_unpack() + ret = unpacker._unpack() except OutOfData: raise UnpackValueError("Data is not enough.") - if unpacker._fb_got_extradata(): - raise ExtraData(ret, unpacker._fb_get_extradata()) + if unpacker._got_extradata(): + raise ExtraData(ret, unpacker._get_extradata()) return ret @@ -111,7 +137,7 @@ class Unpacker(object): If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. :param int read_size: - Used as `file_like.read(read_size)`. (default: `min(1024**2, max_buffer_size)`) + Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) :param bool use_list: If true, unpack msgpack array to Python list. @@ -181,21 +207,17 @@ class Unpacker(object): max_map_len=2147483647, max_ext_len=2147483647): if file_like is None: - self._fb_feeding = True + self._feeding = True else: if not callable(file_like.read): raise TypeError("`file_like.read` must be callable") self.file_like = file_like - self._fb_feeding = False + self._feeding = False #: array of bytes feeded. - self._fb_buffers = [] - #: Which buffer we currently reads - self._fb_buf_i = 0 + self._buffer = bytearray() #: Which position we currently reads - self._fb_buf_o = 0 - #: Total size of _fb_bufferes - self._fb_buf_n = 0 + self._buff_i = 0 # When Unpacker is used as an iterable, between the calls to next(), # the buffer is not "consumed" completely, for efficiency sake. @@ -203,13 +225,13 @@ class Unpacker(object): # the correct moments, we have to keep track of how sloppy we were. # Furthermore, when the buffer is incomplete (that is: in the case # we raise an OutOfData) we need to rollback the buffer to the correct - # state, which _fb_slopiness records. - self._fb_sloppiness = 0 + # state, which _buf_checkpoint records. + self._buf_checkpoint = 0 self._max_buffer_size = max_buffer_size or 2**31-1 if read_size > self._max_buffer_size: raise ValueError("read_size must be smaller than max_buffer_size") - self._read_size = read_size or min(self._max_buffer_size, 4096) + self._read_size = read_size or min(self._max_buffer_size, 16*1024) self._encoding = encoding self._unicode_errors = unicode_errors self._use_list = use_list @@ -236,135 +258,91 @@ class Unpacker(object): raise TypeError("`ext_hook` is not callable") def feed(self, next_bytes): - if isinstance(next_bytes, array.array): - next_bytes = next_bytes.tostring() - elif isinstance(next_bytes, bytearray): - next_bytes = bytes(next_bytes) - assert self._fb_feeding - if (self._fb_buf_n + len(next_bytes) - self._fb_sloppiness - > self._max_buffer_size): + assert self._feeding + view = _get_data_from_buffer(next_bytes) + if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size): raise BufferFull - self._fb_buf_n += len(next_bytes) - self._fb_buffers.append(next_bytes) - - def _fb_sloppy_consume(self): - """ Gets rid of some of the used parts of the buffer. """ - if self._fb_buf_i: - for i in xrange(self._fb_buf_i): - self._fb_buf_n -= len(self._fb_buffers[i]) - self._fb_buffers = self._fb_buffers[self._fb_buf_i:] - self._fb_buf_i = 0 - if self._fb_buffers: - self._fb_sloppiness = self._fb_buf_o - else: - self._fb_sloppiness = 0 + self._buffer += view - def _fb_consume(self): + def _consume(self): """ Gets rid of the used parts of the buffer. """ - if self._fb_buf_i: - for i in xrange(self._fb_buf_i): - self._fb_buf_n -= len(self._fb_buffers[i]) - self._fb_buffers = self._fb_buffers[self._fb_buf_i:] - self._fb_buf_i = 0 - if self._fb_buffers: - self._fb_buffers[0] = self._fb_buffers[0][self._fb_buf_o:] - self._fb_buf_n -= self._fb_buf_o - else: - self._fb_buf_n = 0 - self._fb_buf_o = 0 - self._fb_sloppiness = 0 - - def _fb_got_extradata(self): - if self._fb_buf_i != len(self._fb_buffers): - return True - if self._fb_feeding: - return False - if not self.file_like: - return False - if self.file_like.read(1): - return True - return False + self._buf_checkpoint = self._buff_i - def __iter__(self): - return self + def _got_extradata(self): + return self._buff_i < len(self._buffer) + + def _get_extradata(self): + return self._buffer[self._buff_i:] def read_bytes(self, n): - return self._fb_read(n) - - def _fb_rollback(self): - self._fb_buf_i = 0 - self._fb_buf_o = self._fb_sloppiness - - def _fb_get_extradata(self): - bufs = self._fb_buffers[self._fb_buf_i:] - if bufs: - bufs[0] = bufs[0][self._fb_buf_o:] - return b''.join(bufs) - - def _fb_read(self, n, write_bytes=None): - buffs = self._fb_buffers - # We have a redundant codepath for the most common case, such that - # pypy optimizes it properly. This is the case that the read fits - # in the current buffer. - if (write_bytes is None and self._fb_buf_i < len(buffs) and - self._fb_buf_o + n < len(buffs[self._fb_buf_i])): - self._fb_buf_o += n - return buffs[self._fb_buf_i][self._fb_buf_o - n:self._fb_buf_o] - - # The remaining cases. - ret = b'' - while len(ret) != n: - sliced = n - len(ret) - if self._fb_buf_i == len(buffs): - if self._fb_feeding: - break - to_read = sliced - if self._read_size > to_read: - to_read = self._read_size - tmp = self.file_like.read(to_read) - if not tmp: - break - buffs.append(tmp) - self._fb_buf_n += len(tmp) - continue - ret += buffs[self._fb_buf_i][self._fb_buf_o:self._fb_buf_o + sliced] - self._fb_buf_o += sliced - if self._fb_buf_o >= len(buffs[self._fb_buf_i]): - self._fb_buf_o = 0 - self._fb_buf_i += 1 - if len(ret) != n: - self._fb_rollback() + return self._read(n) + + def _read(self, n): + # (int) -> bytearray + self._reserve(n) + i = self._buff_i + self._buff_i = i+n + return self._buffer[i:i+n] + + def _reserve(self, n): + remain_bytes = len(self._buffer) - self._buff_i - n + + # Fast path: buffer has n bytes already + if remain_bytes >= 0: + return + + if self._feeding: + self._buff_i = self._buf_checkpoint raise OutOfData - if write_bytes is not None: - write_bytes(ret) - return ret - def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): + # Strip buffer before checkpoint before reading file. + if self._buf_checkpoint > 0: + del self._buffer[:self._buf_checkpoint] + self._buff_i -= self._buf_checkpoint + self._buf_checkpoint = 0 + + # Read from file + remain_bytes = -remain_bytes + while remain_bytes > 0: + to_read_bytes = max(self._read_size, remain_bytes) + read_data = self.file_like.read(to_read_bytes) + if not read_data: + break + assert isinstance(read_data, bytes) + self._buffer += read_data + remain_bytes -= len(read_data) + + if len(self._buffer) < n + self._buff_i: + self._buff_i = 0 # rollback + raise OutOfData + + def _read_header(self, execute=EX_CONSTRUCT): typ = TYPE_IMMEDIATE n = 0 obj = None - c = self._fb_read(1, write_bytes) - b = ord(c) - if b & 0b10000000 == 0: + self._reserve(1) + b = self._buffer[self._buff_i] + self._buff_i += 1 + if b & 0b10000000 == 0: obj = b elif b & 0b11100000 == 0b11100000: - obj = struct.unpack("b", c)[0] + obj = -1 - (b ^ 0xff) elif b & 0b11100000 == 0b10100000: n = b & 0b00011111 - obj = self._fb_read(n, write_bytes) typ = TYPE_RAW if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + obj = self._read(n) elif b & 0b11110000 == 0b10010000: n = b & 0b00001111 typ = TYPE_ARRAY if n > self._max_array_len: - raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) elif b & 0b11110000 == 0b10000000: n = b & 0b00001111 typ = TYPE_MAP if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) elif b == 0xc0: obj = None elif b == 0xc2: @@ -373,129 +351,185 @@ class Unpacker(object): obj = True elif b == 0xc4: typ = TYPE_BIN - n = struct.unpack("B", self._fb_read(1, write_bytes))[0] + self._reserve(1) + n = self._buffer[self._buff_i] + self._buff_i += 1 if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._fb_read(n, write_bytes) + raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + obj = self._read(n) elif b == 0xc5: typ = TYPE_BIN - n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + self._reserve(2) + n = struct.unpack_from(">H", self._buffer, self._buff_i)[0] + self._buff_i += 2 if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._fb_read(n, write_bytes) + raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + obj = self._read(n) elif b == 0xc6: typ = TYPE_BIN - n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + self._reserve(4) + n = struct.unpack_from(">I", self._buffer, self._buff_i)[0] + self._buff_i += 4 if n > self._max_bin_len: - raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) - obj = self._fb_read(n, write_bytes) + raise UnpackValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + obj = self._read(n) elif b == 0xc7: # ext 8 typ = TYPE_EXT - L, n = struct.unpack('Bb', self._fb_read(2, write_bytes)) + self._reserve(2) + L, n = struct.unpack_from('Bb', self._buffer, self._buff_i) + self._buff_i += 2 if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._fb_read(L, write_bytes) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + obj = self._read(L) elif b == 0xc8: # ext 16 typ = TYPE_EXT - L, n = struct.unpack('>Hb', self._fb_read(3, write_bytes)) + self._reserve(3) + L, n = struct.unpack_from('>Hb', self._buffer, self._buff_i) + self._buff_i += 3 if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._fb_read(L, write_bytes) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + obj = self._read(L) elif b == 0xc9: # ext 32 typ = TYPE_EXT - L, n = struct.unpack('>Ib', self._fb_read(5, write_bytes)) + self._reserve(5) + L, n = struct.unpack_from('>Ib', self._buffer, self._buff_i) + self._buff_i += 5 if L > self._max_ext_len: - raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) - obj = self._fb_read(L, write_bytes) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + obj = self._read(L) elif b == 0xca: - obj = struct.unpack(">f", self._fb_read(4, write_bytes))[0] + self._reserve(4) + obj = struct.unpack_from(">f", self._buffer, self._buff_i)[0] + self._buff_i += 4 elif b == 0xcb: - obj = struct.unpack(">d", self._fb_read(8, write_bytes))[0] + self._reserve(8) + obj = struct.unpack_from(">d", self._buffer, self._buff_i)[0] + self._buff_i += 8 elif b == 0xcc: - obj = struct.unpack("B", self._fb_read(1, write_bytes))[0] + self._reserve(1) + obj = self._buffer[self._buff_i] + self._buff_i += 1 elif b == 0xcd: - obj = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + self._reserve(2) + obj = struct.unpack_from(">H", self._buffer, self._buff_i)[0] + self._buff_i += 2 elif b == 0xce: - obj = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + self._reserve(4) + obj = struct.unpack_from(">I", self._buffer, self._buff_i)[0] + self._buff_i += 4 elif b == 0xcf: - obj = struct.unpack(">Q", self._fb_read(8, write_bytes))[0] + self._reserve(8) + obj = struct.unpack_from(">Q", self._buffer, self._buff_i)[0] + self._buff_i += 8 elif b == 0xd0: - obj = struct.unpack("b", self._fb_read(1, write_bytes))[0] + self._reserve(1) + obj = struct.unpack_from("b", self._buffer, self._buff_i)[0] + self._buff_i += 1 elif b == 0xd1: - obj = struct.unpack(">h", self._fb_read(2, write_bytes))[0] + self._reserve(2) + obj = struct.unpack_from(">h", self._buffer, self._buff_i)[0] + self._buff_i += 2 elif b == 0xd2: - obj = struct.unpack(">i", self._fb_read(4, write_bytes))[0] + self._reserve(4) + obj = struct.unpack_from(">i", self._buffer, self._buff_i)[0] + self._buff_i += 4 elif b == 0xd3: - obj = struct.unpack(">q", self._fb_read(8, write_bytes))[0] + self._reserve(8) + obj = struct.unpack_from(">q", self._buffer, self._buff_i)[0] + self._buff_i += 8 elif b == 0xd4: # fixext 1 typ = TYPE_EXT if self._max_ext_len < 1: - raise ValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) - n, obj = struct.unpack('b1s', self._fb_read(2, write_bytes)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) + self._reserve(2) + n, obj = struct.unpack_from("b1s", self._buffer, self._buff_i) + self._buff_i += 2 elif b == 0xd5: # fixext 2 typ = TYPE_EXT if self._max_ext_len < 2: - raise ValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) - n, obj = struct.unpack('b2s', self._fb_read(3, write_bytes)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) + self._reserve(3) + n, obj = struct.unpack_from("b2s", self._buffer, self._buff_i) + self._buff_i += 3 elif b == 0xd6: # fixext 4 typ = TYPE_EXT if self._max_ext_len < 4: - raise ValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) - n, obj = struct.unpack('b4s', self._fb_read(5, write_bytes)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) + self._reserve(5) + n, obj = struct.unpack_from("b4s", self._buffer, self._buff_i) + self._buff_i += 5 elif b == 0xd7: # fixext 8 typ = TYPE_EXT if self._max_ext_len < 8: - raise ValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) - n, obj = struct.unpack('b8s', self._fb_read(9, write_bytes)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) + self._reserve(9) + n, obj = struct.unpack_from("b8s", self._buffer, self._buff_i) + self._buff_i += 9 elif b == 0xd8: # fixext 16 typ = TYPE_EXT if self._max_ext_len < 16: - raise ValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) - n, obj = struct.unpack('b16s', self._fb_read(17, write_bytes)) + raise UnpackValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) + self._reserve(17) + n, obj = struct.unpack_from("b16s", self._buffer, self._buff_i) + self._buff_i += 17 elif b == 0xd9: typ = TYPE_RAW - n = struct.unpack("B", self._fb_read(1, write_bytes))[0] + self._reserve(1) + n = self._buffer[self._buff_i] + self._buff_i += 1 if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._fb_read(n, write_bytes) + raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + obj = self._read(n) elif b == 0xda: typ = TYPE_RAW - n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + self._reserve(2) + n, = struct.unpack_from(">H", self._buffer, self._buff_i) + self._buff_i += 2 if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._fb_read(n, write_bytes) + raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + obj = self._read(n) elif b == 0xdb: typ = TYPE_RAW - n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + self._reserve(4) + n, = struct.unpack_from(">I", self._buffer, self._buff_i) + self._buff_i += 4 if n > self._max_str_len: - raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) - obj = self._fb_read(n, write_bytes) + raise UnpackValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + obj = self._read(n) elif b == 0xdc: - n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] - if n > self._max_array_len: - raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) typ = TYPE_ARRAY - elif b == 0xdd: - n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + self._reserve(2) + n, = struct.unpack_from(">H", self._buffer, self._buff_i) + self._buff_i += 2 if n > self._max_array_len: - raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + elif b == 0xdd: typ = TYPE_ARRAY + self._reserve(4) + n, = struct.unpack_from(">I", self._buffer, self._buff_i) + self._buff_i += 4 + if n > self._max_array_len: + raise UnpackValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) elif b == 0xde: - n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + self._reserve(2) + n, = struct.unpack_from(">H", self._buffer, self._buff_i) + self._buff_i += 2 if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP elif b == 0xdf: - n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + self._reserve(4) + n, = struct.unpack_from(">I", self._buffer, self._buff_i) + self._buff_i += 4 if n > self._max_map_len: - raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + raise UnpackValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) typ = TYPE_MAP else: raise UnpackValueError("Unknown header: 0x%x" % b) return typ, n, obj - def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None): - typ, n, obj = self._read_header(execute, write_bytes) + def _unpack(self, execute=EX_CONSTRUCT): + typ, n, obj = self._read_header(execute) if execute == EX_READ_ARRAY_HEADER: if typ != TYPE_ARRAY: @@ -510,11 +544,11 @@ class Unpacker(object): if execute == EX_SKIP: for i in xrange(n): # TODO check whether we need to call `list_hook` - self._fb_unpack(EX_SKIP, write_bytes) + self._unpack(EX_SKIP) return ret = newlist_hint(n) for i in xrange(n): - ret.append(self._fb_unpack(EX_CONSTRUCT, write_bytes)) + ret.append(self._unpack(EX_CONSTRUCT)) if self._list_hook is not None: ret = self._list_hook(ret) # TODO is the interaction between `list_hook` and `use_list` ok? @@ -523,19 +557,19 @@ class Unpacker(object): if execute == EX_SKIP: for i in xrange(n): # TODO check whether we need to call hooks - self._fb_unpack(EX_SKIP, write_bytes) - self._fb_unpack(EX_SKIP, write_bytes) + self._unpack(EX_SKIP) + self._unpack(EX_SKIP) return if self._object_pairs_hook is not None: ret = self._object_pairs_hook( - (self._fb_unpack(EX_CONSTRUCT, write_bytes), - self._fb_unpack(EX_CONSTRUCT, write_bytes)) + (self._unpack(EX_CONSTRUCT), + self._unpack(EX_CONSTRUCT)) for _ in xrange(n)) else: ret = {} for _ in xrange(n): - key = self._fb_unpack(EX_CONSTRUCT, write_bytes) - ret[key] = self._fb_unpack(EX_CONSTRUCT, write_bytes) + key = self._unpack(EX_CONSTRUCT) + ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: ret = self._object_hook(ret) return ret @@ -544,41 +578,55 @@ class Unpacker(object): if typ == TYPE_RAW: if self._encoding is not None: obj = obj.decode(self._encoding, self._unicode_errors) + else: + obj = bytes(obj) return obj if typ == TYPE_EXT: - return self._ext_hook(n, obj) + return self._ext_hook(n, bytes(obj)) if typ == TYPE_BIN: - return obj + return bytes(obj) assert typ == TYPE_IMMEDIATE return obj - def next(self): + def __iter__(self): + return self + + def __next__(self): try: - ret = self._fb_unpack(EX_CONSTRUCT, None) - self._fb_sloppy_consume() + ret = self._unpack(EX_CONSTRUCT) + self._consume() return ret except OutOfData: - self._fb_consume() + self._consume() raise StopIteration - __next__ = next + + next = __next__ def skip(self, write_bytes=None): - self._fb_unpack(EX_SKIP, write_bytes) - self._fb_consume() + self._unpack(EX_SKIP) + if write_bytes is not None: + write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) + self._consume() def unpack(self, write_bytes=None): - ret = self._fb_unpack(EX_CONSTRUCT, write_bytes) - self._fb_consume() + ret = self._unpack(EX_CONSTRUCT) + if write_bytes is not None: + write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) + self._consume() return ret def read_array_header(self, write_bytes=None): - ret = self._fb_unpack(EX_READ_ARRAY_HEADER, write_bytes) - self._fb_consume() + ret = self._unpack(EX_READ_ARRAY_HEADER) + if write_bytes is not None: + write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) + self._consume() return ret def read_map_header(self, write_bytes=None): - ret = self._fb_unpack(EX_READ_MAP_HEADER, write_bytes) - self._fb_consume() + ret = self._unpack(EX_READ_MAP_HEADER) + if write_bytes is not None: + write_bytes(self._buffer[self._buf_checkpoint:self._buff_i]) + self._consume() return ret @@ -609,9 +657,18 @@ class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. It also enable str8 type for unicode. + :param bool strict_types: + If set to true, types will be checked to be exact. Derived classes + from serializeable types will not be serialized and will be + treated as unsupported type and forwarded to default. + Additionally tuples will not be serialized as lists. + This is useful when trying to implement accurate serialization + for python types. """ def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', - use_single_float=False, autoreset=True, use_bin_type=False): + use_single_float=False, autoreset=True, use_bin_type=False, + strict_types=False): + self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type @@ -623,18 +680,24 @@ class Packer(object): raise TypeError("default must be callable") self._default = default - def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): + def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, + check=isinstance, check_type_strict=_check_type_strict): default_used = False + if self._strict_types: + check = check_type_strict + list_types = list + else: + list_types = (list, tuple) while True: if nest_limit < 0: raise PackValueError("recursion limit exceeded") if obj is None: return self._buffer.write(b"\xc0") - if isinstance(obj, bool): + if check(obj, bool): if obj: return self._buffer.write(b"\xc3") return self._buffer.write(b"\xc2") - if isinstance(obj, int_types): + if check(obj, int_types): if 0 <= obj < 0x80: return self._buffer.write(struct.pack("B", obj)) if -0x20 <= obj < 0: @@ -659,42 +722,35 @@ class Packer(object): obj = self._default(obj) default_used = True continue - raise PackValueError("Integer value out of range") - if self._use_bin_type and isinstance(obj, bytes): + raise PackOverflowError("Integer value out of range") + if check(obj, bytes): n = len(obj) - if n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xc4, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xc5, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xc6, n)) - else: + if n >= 2**32: raise PackValueError("Bytes is too large") + self._pack_bin_header(n) return self._buffer.write(obj) - if isinstance(obj, (Unicode, bytes)): - if isinstance(obj, Unicode): - if self._encoding is None: - raise TypeError( - "Can't encode unicode string: " - "no encoding is specified") - obj = obj.encode(self._encoding, self._unicode_errors) + if check(obj, Unicode): + if self._encoding is None: + raise TypeError( + "Can't encode unicode string: " + "no encoding is specified") + obj = obj.encode(self._encoding, self._unicode_errors) n = len(obj) - if n <= 0x1f: - self._buffer.write(struct.pack('B', 0xa0 + n)) - elif self._use_bin_type and n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xd9, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xda, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xdb, n)) - else: + if n >= 2**32: raise PackValueError("String is too large") + self._pack_raw_header(n) + return self._buffer.write(obj) + if check(obj, memoryview): + n = len(obj) * obj.itemsize + if n >= 2**32: + raise PackValueError("Memoryview is too large") + self._pack_bin_header(n) return self._buffer.write(obj) - if isinstance(obj, float): + if check(obj, float): if self._use_float: return self._buffer.write(struct.pack(">Bf", 0xca, obj)) return self._buffer.write(struct.pack(">Bd", 0xcb, obj)) - if isinstance(obj, ExtType): + if check(obj, ExtType): code = obj.code data = obj.data assert isinstance(code, int) @@ -719,14 +775,14 @@ class Packer(object): self._buffer.write(struct.pack("b", code)) self._buffer.write(data) return - if isinstance(obj, (list, tuple)): + if check(obj, list_types): n = len(obj) - self._fb_pack_array_header(n) + self._pack_array_header(n) for i in xrange(n): self._pack(obj[i], nest_limit - 1) return - if isinstance(obj, dict): - return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), + if check(obj, dict): + return self._pack_map_pairs(len(obj), dict_iteritems(obj), nest_limit - 1) if not default_used and self._default is not None: obj = self._default(obj) @@ -744,7 +800,7 @@ class Packer(object): return ret def pack_map_pairs(self, pairs): - self._fb_pack_map_pairs(len(pairs), pairs) + self._pack_map_pairs(len(pairs), pairs) ret = self._buffer.getvalue() if self._autoreset: self._buffer = StringIO() @@ -754,8 +810,8 @@ class Packer(object): def pack_array_header(self, n): if n >= 2**32: - raise ValueError - self._fb_pack_array_header(n) + raise PackValueError + self._pack_array_header(n) ret = self._buffer.getvalue() if self._autoreset: self._buffer = StringIO() @@ -765,8 +821,8 @@ class Packer(object): def pack_map_header(self, n): if n >= 2**32: - raise ValueError - self._fb_pack_map_header(n) + raise PackValueError + self._pack_map_header(n) ret = self._buffer.getvalue() if self._autoreset: self._buffer = StringIO() @@ -783,7 +839,7 @@ class Packer(object): raise TypeError("data must have bytes type") L = len(data) if L > 0xffffffff: - raise ValueError("Too large data") + raise PackValueError("Too large data") if L == 1: self._buffer.write(b'\xd4') elif L == 2: @@ -803,7 +859,7 @@ class Packer(object): self._buffer.write(struct.pack('B', typecode)) self._buffer.write(data) - def _fb_pack_array_header(self, n): + def _pack_array_header(self, n): if n <= 0x0f: return self._buffer.write(struct.pack('B', 0x90 + n)) if n <= 0xffff: @@ -812,7 +868,7 @@ class Packer(object): return self._buffer.write(struct.pack(">BI", 0xdd, n)) raise PackValueError("Array is too large") - def _fb_pack_map_header(self, n): + def _pack_map_header(self, n): if n <= 0x0f: return self._buffer.write(struct.pack('B', 0x80 + n)) if n <= 0xffff: @@ -821,12 +877,36 @@ class Packer(object): return self._buffer.write(struct.pack(">BI", 0xdf, n)) raise PackValueError("Dict is too large") - def _fb_pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): - self._fb_pack_map_header(n) + def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): + self._pack_map_header(n) for (k, v) in pairs: self._pack(k, nest_limit - 1) self._pack(v, nest_limit - 1) + def _pack_raw_header(self, n): + if n <= 0x1f: + self._buffer.write(struct.pack('B', 0xa0 + n)) + elif self._use_bin_type and n <= 0xff: + self._buffer.write(struct.pack('>BB', 0xd9, n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xda, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xdb, n)) + else: + raise PackValueError('Raw is too large') + + def _pack_bin_header(self, n): + if not self._use_bin_type: + return self._pack_raw_header(n) + elif n <= 0xff: + return self._buffer.write(struct.pack('>BB', 0xc4, n)) + elif n <= 0xffff: + return self._buffer.write(struct.pack(">BH", 0xc5, n)) + elif n <= 0xffffffff: + return self._buffer.write(struct.pack(">BI", 0xc6, n)) + else: + raise PackValueError('Bin is too large') + def bytes(self): return self._buffer.getvalue() diff --git a/msgpack/pack.h b/msgpack/pack.h index a75bdb0..d3aeff7 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -47,7 +47,7 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ if (len + l > bs) { bs = (len + l) * 2; - buf = (char*)realloc(buf, bs); + buf = (char*)PyMem_Realloc(buf, bs); if (!buf) return -1; } memcpy(buf + len, data, l); @@ -1,5 +1,6 @@ #!/usr/bin/env python # coding: utf-8 +import io import os import sys from glob import glob @@ -97,9 +98,8 @@ del libraries, macros desc = 'MessagePack (de)serializer.' -f = open('README.rst') -long_desc = f.read() -f.close() +with io.open('README.rst', encoding='utf-8') as f: + long_desc = f.read() del f setup(name='msgpack-python', @@ -112,7 +112,6 @@ setup(name='msgpack-python', description=desc, long_description=long_desc, url='http://msgpack.org/', - download_url='http://pypi.python.org/pypi/msgpack/', classifiers=[ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 3', diff --git a/test/test_buffer.py b/test/test_buffer.py index 5a71f90..87f359f 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -18,3 +18,12 @@ def test_unpack_bytearray(): assert [b'foo', b'bar'] == obj expected_type = bytes assert all(type(s) == expected_type for s in obj) + + +def test_unpack_memoryview(): + buf = bytearray(packb(('foo', 'bar'))) + view = memoryview(buf) + obj = unpackb(view, use_list=1) + assert [b'foo', b'bar'] == obj + expected_type = bytes + assert all(type(s) == expected_type for s in obj) diff --git a/test/test_limits.py b/test/test_limits.py index 3c1cf2a..197ef46 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -3,32 +3,35 @@ from __future__ import absolute_import, division, print_function, unicode_literals import pytest -from msgpack import packb, unpackb, Packer, Unpacker, ExtType +from msgpack import ( + packb, unpackb, Packer, Unpacker, ExtType, + PackOverflowError, PackValueError, UnpackValueError, +) def test_integer(): x = -(2 ** 63) assert unpackb(packb(x)) == x - with pytest.raises((OverflowError, ValueError)): + with pytest.raises(PackOverflowError): packb(x-1) x = 2 ** 64 - 1 assert unpackb(packb(x)) == x - with pytest.raises((OverflowError, ValueError)): + with pytest.raises(PackOverflowError): packb(x+1) def test_array_header(): packer = Packer() packer.pack_array_header(2**32-1) - with pytest.raises((OverflowError, ValueError)): + with pytest.raises(PackValueError): packer.pack_array_header(2**32) def test_map_header(): packer = Packer() packer.pack_map_header(2**32-1) - with pytest.raises((OverflowError, ValueError)): + with pytest.raises(PackValueError): packer.pack_array_header(2**32) @@ -41,7 +44,7 @@ def test_max_str_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_str_len=2, encoding='utf-8') - with pytest.raises(ValueError): + with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() @@ -55,7 +58,7 @@ def test_max_bin_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_bin_len=2) - with pytest.raises(ValueError): + with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() @@ -69,7 +72,7 @@ def test_max_array_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_array_len=2) - with pytest.raises(ValueError): + with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() @@ -83,7 +86,7 @@ def test_max_map_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_map_len=2) - with pytest.raises(ValueError): + with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() @@ -97,7 +100,7 @@ def test_max_ext_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_ext_len=2) - with pytest.raises(ValueError): + with pytest.raises(UnpackValueError): unpacker.feed(packed) unpacker.unpack() diff --git a/test/test_memoryview.py b/test/test_memoryview.py new file mode 100644 index 0000000..f6d74ed --- /dev/null +++ b/test/test_memoryview.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# coding: utf-8 + +from array import array +from msgpack import packb, unpackb +import sys + + +# For Python < 3: +# - array type only supports old buffer interface +# - array.frombytes is not available, must use deprecated array.fromstring +if sys.version_info[0] < 3: + def make_memoryview(obj): + return memoryview(buffer(obj)) + + def make_array(f, data): + a = array(f) + a.fromstring(data) + return a + + def get_data(a): + return a.tostring() +else: + make_memoryview = memoryview + + def make_array(f, data): + a = array(f) + a.frombytes(data) + return a + + def get_data(a): + return a.tobytes() + + +def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type): + # create a new array + original_array = array(format) + original_array.fromlist([255] * (nbytes // original_array.itemsize)) + original_data = get_data(original_array) + view = make_memoryview(original_array) + + # pack, unpack, and reconstruct array + packed = packb(view, use_bin_type=use_bin_type) + unpacked = unpackb(packed) + reconstructed_array = make_array(format, unpacked) + + # check that we got the right amount of data + assert len(original_data) == nbytes + # check packed header + assert packed[:1] == expected_header + # check packed length prefix, if any + assert packed[1:1+len(expected_prefix)] == expected_prefix + # check packed data + assert packed[1+len(expected_prefix):] == original_data + # check array unpacked correctly + assert original_array == reconstructed_array + + +def test_fixstr_from_byte(): + _runtest('B', 1, b'\xa1', b'', False) + _runtest('B', 31, b'\xbf', b'', False) + + +def test_fixstr_from_float(): + _runtest('f', 4, b'\xa4', b'', False) + _runtest('f', 28, b'\xbc', b'', False) + + +def test_str16_from_byte(): + _runtest('B', 2**8, b'\xda', b'\x01\x00', False) + _runtest('B', 2**16-1, b'\xda', b'\xff\xff', False) + + +def test_str16_from_float(): + _runtest('f', 2**8, b'\xda', b'\x01\x00', False) + _runtest('f', 2**16-4, b'\xda', b'\xff\xfc', False) + + +def test_str32_from_byte(): + _runtest('B', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + + +def test_str32_from_float(): + _runtest('f', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + + +def test_bin8_from_byte(): + _runtest('B', 1, b'\xc4', b'\x01', True) + _runtest('B', 2**8-1, b'\xc4', b'\xff', True) + + +def test_bin8_from_float(): + _runtest('f', 4, b'\xc4', b'\x04', True) + _runtest('f', 2**8-4, b'\xc4', b'\xfc', True) + + +def test_bin16_from_byte(): + _runtest('B', 2**8, b'\xc5', b'\x01\x00', True) + _runtest('B', 2**16-1, b'\xc5', b'\xff\xff', True) + + +def test_bin16_from_float(): + _runtest('f', 2**8, b'\xc5', b'\x01\x00', True) + _runtest('f', 2**16-4, b'\xc5', b'\xff\xfc', True) + + +def test_bin32_from_byte(): + _runtest('B', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) + + +def test_bin32_from_float(): + _runtest('f', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) diff --git a/test/test_stricttype.py b/test/test_stricttype.py new file mode 100644 index 0000000..a20b5eb --- /dev/null +++ b/test/test_stricttype.py @@ -0,0 +1,15 @@ +# coding: utf-8 + +from collections import namedtuple +from msgpack import packb, unpackb + + +def test_namedtuple(): + T = namedtuple('T', "foo bar") + def default(o): + if isinstance(o, T): + return dict(o._asdict()) + raise TypeError('Unsupported type %s' % (type(o),)) + packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) + unpacked = unpackb(packed, encoding='utf-8') + assert unpacked == {'foo': 1, 'bar': 42} @@ -1,5 +1,5 @@ [tox] -envlist = {py26,py27,py32,py33,py34,py35}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 +envlist = {py27,py33,py34,py35}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 [variants:pure] setenv= @@ -36,4 +36,3 @@ commands= python -c 'import sys; print(hex(sys.maxsize))' python -c 'from msgpack import _packer, _unpacker' py.test - |