diff options
author | folz <joachim.folz@dfki.de> | 2016-05-07 15:18:20 +0200 |
---|---|---|
committer | folz <joachim.folz@dfki.de> | 2016-06-13 15:37:33 +0200 |
commit | 2b63e9fbbb3440d73d6638ec8af6315aeb8ecd97 (patch) | |
tree | 44375cacb6b02cf66c2a3fcc3aaa5dbed83e49be | |
parent | b887c1a4ad7cec8dae05d0e293e4757ce5b2c896 (diff) | |
download | msgpack-python-2b63e9fbbb3440d73d6638ec8af6315aeb8ecd97.tar.gz |
enable unpacking from memoryview
-rw-r--r-- | msgpack/_unpacker.pyx | 92 | ||||
-rw-r--r-- | msgpack/fallback.py | 37 | ||||
-rw-r--r-- | test/test_buffer.py | 9 |
3 files changed, 106 insertions, 32 deletions
diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 23f6478..f6e06b0 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -8,16 +8,23 @@ from cpython.bytes cimport ( ) from cpython.buffer cimport ( Py_buffer, - PyBuffer_Release, + PyObject_CheckBuffer, PyObject_GetBuffer, + PyBuffer_Release, + PyBuffer_IsContiguous, + PyBUF_READ, PyBUF_SIMPLE, + PyBUF_FULL_RO, ) from cpython.mem cimport PyMem_Malloc, PyMem_Free from cpython.object cimport PyCallable_Check +from cpython.ref cimport Py_DECREF +from cpython.exc cimport PyErr_WarnEx cdef extern from "Python.h": ctypedef struct PyObject cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 + object PyMemoryView_GetContiguous(object obj, int buffertype, char order) from libc.stdlib cimport * from libc.string cimport * @@ -110,6 +117,42 @@ cdef inline init_ctx(unpack_context *ctx, def default_read_extended_type(typecode, data): raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) +cdef inline int get_data_from_buffer(object obj, + Py_buffer *view, + char **buf, + Py_ssize_t *buffer_len, + int *new_protocol) except 0: + cdef object contiguous + cdef Py_buffer tmp + if PyObject_CheckBuffer(obj): + new_protocol[0] = 1 + if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1: + raise + if view.itemsize != 1: + PyBuffer_Release(view) + raise BufferError("cannot unpack from multi-byte object") + if PyBuffer_IsContiguous(view, 'A') == 0: + PyBuffer_Release(view) + # create a contiguous copy and get buffer + contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C') + PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE) + # view must hold the only reference to contiguous, + # so memory is freed when view is released + Py_DECREF(contiguous) + buffer_len[0] = view.len + buf[0] = <char*> view.buf + return 1 + else: + new_protocol[0] = 0 + if PyObject_AsReadBuffer(obj, <const void**> buf, buffer_len) == -1: + raise BufferError("could not get memoryview") + PyErr_WarnEx(RuntimeWarning, + "using old buffer interface to unpack %s; " + "this leads to unpacking errors if slicing is used and " + "will be removed in a future version" % type(obj), + 1) + return 1 + def unpackb(object packed, object object_hook=None, object list_hook=None, bint use_list=1, encoding=None, unicode_errors="strict", object_pairs_hook=None, ext_hook=ExtType, @@ -129,27 +172,34 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cdef Py_ssize_t off = 0 cdef int ret - cdef char* buf + cdef Py_buffer view + cdef char* buf = NULL cdef Py_ssize_t buf_len cdef char* cenc = NULL cdef char* cerr = NULL + cdef int new_protocol = 0 + + get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) - PyObject_AsReadBuffer(packed, <const void**>&buf, &buf_len) + try: + if encoding is not None: + if isinstance(encoding, unicode): + encoding = encoding.encode('ascii') + cenc = PyBytes_AsString(encoding) - if encoding is not None: - if isinstance(encoding, unicode): - encoding = encoding.encode('ascii') - cenc = PyBytes_AsString(encoding) + if unicode_errors is not None: + if isinstance(unicode_errors, unicode): + unicode_errors = unicode_errors.encode('ascii') + cerr = PyBytes_AsString(unicode_errors) - if unicode_errors is not None: - if isinstance(unicode_errors, unicode): - unicode_errors = unicode_errors.encode('ascii') - cerr = PyBytes_AsString(unicode_errors) + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, + use_list, cenc, cerr, + max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) + ret = unpack_construct(&ctx, buf, buf_len, &off) + finally: + if new_protocol: + PyBuffer_Release(&view); - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, cenc, cerr, - max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) - ret = unpack_construct(&ctx, buf, buf_len, &off) if ret == 1: obj = unpack_data(&ctx) if off < buf_len: @@ -335,14 +385,20 @@ cdef class Unpacker(object): def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" cdef Py_buffer pybuff + cdef int new_protocol = 0 + cdef char* buf + cdef Py_ssize_t buf_len + if self.file_like is not None: raise AssertionError( "unpacker.feed() is not be able to use with `file_like`.") - PyObject_GetBuffer(next_bytes, &pybuff, PyBUF_SIMPLE) + + get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len, &new_protocol) try: - self.append_buffer(<char*>pybuff.buf, pybuff.len) + self.append_buffer(buf, buf_len) finally: - PyBuffer_Release(&pybuff) + if new_protocol: + PyBuffer_Release(&pybuff) cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): cdef: diff --git a/msgpack/fallback.py b/msgpack/fallback.py index a23ad8c..11087eb 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,8 +1,8 @@ """Fallback pure Python implementation of msgpack""" import sys -import array import struct +import warnings if sys.version_info[0] == 3: PY3 = True @@ -46,6 +46,7 @@ else: from io import BytesIO as StringIO newlist_hint = lambda size: [] + from msgpack.exceptions import ( BufferFull, OutOfData, @@ -79,6 +80,24 @@ def _check_type_strict(obj, t, type=type, tuple=tuple): return type(obj) is t +def _get_data_from_buffer(obj): + try: + view = memoryview(obj) + except TypeError: + # try to use legacy buffer protocol if 2.7, otherwise re-raise + if not PY3: + view = memoryview(buffer(obj)) + warnings.warn("using old buffer interface to unpack %s; " + "this leads to unpacking errors if slicing is used and " + "will be removed in a future version" % type(obj), + RuntimeWarning) + else: + raise + if view.itemsize != 1: + raise ValueError("cannot unpack from multi-byte object") + return view + + def unpack(stream, **kwargs): """ Unpack an object from `stream`. @@ -239,17 +258,11 @@ class Unpacker(object): raise TypeError("`ext_hook` is not callable") def feed(self, next_bytes): - if isinstance(next_bytes, array.array): - next_bytes = next_bytes.tostring() - if not isinstance(next_bytes, (bytes, bytearray)): - raise TypeError("next_bytes should be bytes, bytearray or array.array") assert self._feeding - - if (len(self._buffer) - self._buff_i + len(next_bytes) > self._max_buffer_size): + view = _get_data_from_buffer(next_bytes) + if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size): raise BufferFull - # bytes + bytearray -> bytearray - # So cast before append - self._buffer += next_bytes + self._buffer += view def _consume(self): """ Gets rid of the used parts of the buffer. """ @@ -308,7 +321,6 @@ class Unpacker(object): n = 0 obj = None self._reserve(1) - #b = struct.unpack_from("B", self._buffer, self._buff_i)[0] b = self._buffer[self._buff_i] self._buff_i += 1 if b & 0b10000000 == 0: @@ -340,7 +352,6 @@ class Unpacker(object): elif b == 0xc4: typ = TYPE_BIN self._reserve(1) - #n = struct.unpack_from("B", self._buffer, self._buff_i)[0] n = self._buffer[self._buff_i] self._buff_i += 1 if n > self._max_bin_len: @@ -396,7 +407,6 @@ class Unpacker(object): self._buff_i += 8 elif b == 0xcc: self._reserve(1) - #obj = struct.unpack_from("B", self._buffer, self._buff_i)[0] obj = self._buffer[self._buff_i] self._buff_i += 1 elif b == 0xcd: @@ -465,7 +475,6 @@ class Unpacker(object): elif b == 0xd9: typ = TYPE_RAW self._reserve(1) - #n, = struct.unpack_from("B", self._buffer, self._buff_i) n = self._buffer[self._buff_i] self._buff_i += 1 if n > self._max_str_len: diff --git a/test/test_buffer.py b/test/test_buffer.py index 5a71f90..87f359f 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -18,3 +18,12 @@ def test_unpack_bytearray(): assert [b'foo', b'bar'] == obj expected_type = bytes assert all(type(s) == expected_type for s in obj) + + +def test_unpack_memoryview(): + buf = bytearray(packb(('foo', 'bar'))) + view = memoryview(buf) + obj = unpackb(view, use_list=1) + assert [b'foo', b'bar'] == obj + expected_type = bytes + assert all(type(s) == expected_type for s in obj) |