summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfolz <joachim.folz@dfki.de>2016-05-07 15:18:20 +0200
committerfolz <joachim.folz@dfki.de>2016-06-13 15:37:33 +0200
commit2b63e9fbbb3440d73d6638ec8af6315aeb8ecd97 (patch)
tree44375cacb6b02cf66c2a3fcc3aaa5dbed83e49be
parentb887c1a4ad7cec8dae05d0e293e4757ce5b2c896 (diff)
downloadmsgpack-python-2b63e9fbbb3440d73d6638ec8af6315aeb8ecd97.tar.gz
enable unpacking from memoryview
-rw-r--r--msgpack/_unpacker.pyx92
-rw-r--r--msgpack/fallback.py37
-rw-r--r--test/test_buffer.py9
3 files changed, 106 insertions, 32 deletions
diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx
index 23f6478..f6e06b0 100644
--- a/msgpack/_unpacker.pyx
+++ b/msgpack/_unpacker.pyx
@@ -8,16 +8,23 @@ from cpython.bytes cimport (
)
from cpython.buffer cimport (
Py_buffer,
- PyBuffer_Release,
+ PyObject_CheckBuffer,
PyObject_GetBuffer,
+ PyBuffer_Release,
+ PyBuffer_IsContiguous,
+ PyBUF_READ,
PyBUF_SIMPLE,
+ PyBUF_FULL_RO,
)
from cpython.mem cimport PyMem_Malloc, PyMem_Free
from cpython.object cimport PyCallable_Check
+from cpython.ref cimport Py_DECREF
+from cpython.exc cimport PyErr_WarnEx
cdef extern from "Python.h":
ctypedef struct PyObject
cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1
+ object PyMemoryView_GetContiguous(object obj, int buffertype, char order)
from libc.stdlib cimport *
from libc.string cimport *
@@ -110,6 +117,42 @@ cdef inline init_ctx(unpack_context *ctx,
def default_read_extended_type(typecode, data):
raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode)
+cdef inline int get_data_from_buffer(object obj,
+ Py_buffer *view,
+ char **buf,
+ Py_ssize_t *buffer_len,
+ int *new_protocol) except 0:
+ cdef object contiguous
+ cdef Py_buffer tmp
+ if PyObject_CheckBuffer(obj):
+ new_protocol[0] = 1
+ if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1:
+ raise
+ if view.itemsize != 1:
+ PyBuffer_Release(view)
+ raise BufferError("cannot unpack from multi-byte object")
+ if PyBuffer_IsContiguous(view, 'A') == 0:
+ PyBuffer_Release(view)
+ # create a contiguous copy and get buffer
+ contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C')
+ PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE)
+ # view must hold the only reference to contiguous,
+ # so memory is freed when view is released
+ Py_DECREF(contiguous)
+ buffer_len[0] = view.len
+ buf[0] = <char*> view.buf
+ return 1
+ else:
+ new_protocol[0] = 0
+ if PyObject_AsReadBuffer(obj, <const void**> buf, buffer_len) == -1:
+ raise BufferError("could not get memoryview")
+ PyErr_WarnEx(RuntimeWarning,
+ "using old buffer interface to unpack %s; "
+ "this leads to unpacking errors if slicing is used and "
+ "will be removed in a future version" % type(obj),
+ 1)
+ return 1
+
def unpackb(object packed, object object_hook=None, object list_hook=None,
bint use_list=1, encoding=None, unicode_errors="strict",
object_pairs_hook=None, ext_hook=ExtType,
@@ -129,27 +172,34 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
cdef Py_ssize_t off = 0
cdef int ret
- cdef char* buf
+ cdef Py_buffer view
+ cdef char* buf = NULL
cdef Py_ssize_t buf_len
cdef char* cenc = NULL
cdef char* cerr = NULL
+ cdef int new_protocol = 0
+
+ get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
- PyObject_AsReadBuffer(packed, <const void**>&buf, &buf_len)
+ try:
+ if encoding is not None:
+ if isinstance(encoding, unicode):
+ encoding = encoding.encode('ascii')
+ cenc = PyBytes_AsString(encoding)
- if encoding is not None:
- if isinstance(encoding, unicode):
- encoding = encoding.encode('ascii')
- cenc = PyBytes_AsString(encoding)
+ if unicode_errors is not None:
+ if isinstance(unicode_errors, unicode):
+ unicode_errors = unicode_errors.encode('ascii')
+ cerr = PyBytes_AsString(unicode_errors)
- if unicode_errors is not None:
- if isinstance(unicode_errors, unicode):
- unicode_errors = unicode_errors.encode('ascii')
- cerr = PyBytes_AsString(unicode_errors)
+ init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
+ use_list, cenc, cerr,
+ max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
+ ret = unpack_construct(&ctx, buf, buf_len, &off)
+ finally:
+ if new_protocol:
+ PyBuffer_Release(&view);
- init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
- use_list, cenc, cerr,
- max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
- ret = unpack_construct(&ctx, buf, buf_len, &off)
if ret == 1:
obj = unpack_data(&ctx)
if off < buf_len:
@@ -335,14 +385,20 @@ cdef class Unpacker(object):
def feed(self, object next_bytes):
"""Append `next_bytes` to internal buffer."""
cdef Py_buffer pybuff
+ cdef int new_protocol = 0
+ cdef char* buf
+ cdef Py_ssize_t buf_len
+
if self.file_like is not None:
raise AssertionError(
"unpacker.feed() is not be able to use with `file_like`.")
- PyObject_GetBuffer(next_bytes, &pybuff, PyBUF_SIMPLE)
+
+ get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len, &new_protocol)
try:
- self.append_buffer(<char*>pybuff.buf, pybuff.len)
+ self.append_buffer(buf, buf_len)
finally:
- PyBuffer_Release(&pybuff)
+ if new_protocol:
+ PyBuffer_Release(&pybuff)
cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len):
cdef:
diff --git a/msgpack/fallback.py b/msgpack/fallback.py
index a23ad8c..11087eb 100644
--- a/msgpack/fallback.py
+++ b/msgpack/fallback.py
@@ -1,8 +1,8 @@
"""Fallback pure Python implementation of msgpack"""
import sys
-import array
import struct
+import warnings
if sys.version_info[0] == 3:
PY3 = True
@@ -46,6 +46,7 @@ else:
from io import BytesIO as StringIO
newlist_hint = lambda size: []
+
from msgpack.exceptions import (
BufferFull,
OutOfData,
@@ -79,6 +80,24 @@ def _check_type_strict(obj, t, type=type, tuple=tuple):
return type(obj) is t
+def _get_data_from_buffer(obj):
+ try:
+ view = memoryview(obj)
+ except TypeError:
+ # try to use legacy buffer protocol if 2.7, otherwise re-raise
+ if not PY3:
+ view = memoryview(buffer(obj))
+ warnings.warn("using old buffer interface to unpack %s; "
+ "this leads to unpacking errors if slicing is used and "
+ "will be removed in a future version" % type(obj),
+ RuntimeWarning)
+ else:
+ raise
+ if view.itemsize != 1:
+ raise ValueError("cannot unpack from multi-byte object")
+ return view
+
+
def unpack(stream, **kwargs):
"""
Unpack an object from `stream`.
@@ -239,17 +258,11 @@ class Unpacker(object):
raise TypeError("`ext_hook` is not callable")
def feed(self, next_bytes):
- if isinstance(next_bytes, array.array):
- next_bytes = next_bytes.tostring()
- if not isinstance(next_bytes, (bytes, bytearray)):
- raise TypeError("next_bytes should be bytes, bytearray or array.array")
assert self._feeding
-
- if (len(self._buffer) - self._buff_i + len(next_bytes) > self._max_buffer_size):
+ view = _get_data_from_buffer(next_bytes)
+ if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size):
raise BufferFull
- # bytes + bytearray -> bytearray
- # So cast before append
- self._buffer += next_bytes
+ self._buffer += view
def _consume(self):
""" Gets rid of the used parts of the buffer. """
@@ -308,7 +321,6 @@ class Unpacker(object):
n = 0
obj = None
self._reserve(1)
- #b = struct.unpack_from("B", self._buffer, self._buff_i)[0]
b = self._buffer[self._buff_i]
self._buff_i += 1
if b & 0b10000000 == 0:
@@ -340,7 +352,6 @@ class Unpacker(object):
elif b == 0xc4:
typ = TYPE_BIN
self._reserve(1)
- #n = struct.unpack_from("B", self._buffer, self._buff_i)[0]
n = self._buffer[self._buff_i]
self._buff_i += 1
if n > self._max_bin_len:
@@ -396,7 +407,6 @@ class Unpacker(object):
self._buff_i += 8
elif b == 0xcc:
self._reserve(1)
- #obj = struct.unpack_from("B", self._buffer, self._buff_i)[0]
obj = self._buffer[self._buff_i]
self._buff_i += 1
elif b == 0xcd:
@@ -465,7 +475,6 @@ class Unpacker(object):
elif b == 0xd9:
typ = TYPE_RAW
self._reserve(1)
- #n, = struct.unpack_from("B", self._buffer, self._buff_i)
n = self._buffer[self._buff_i]
self._buff_i += 1
if n > self._max_str_len:
diff --git a/test/test_buffer.py b/test/test_buffer.py
index 5a71f90..87f359f 100644
--- a/test/test_buffer.py
+++ b/test/test_buffer.py
@@ -18,3 +18,12 @@ def test_unpack_bytearray():
assert [b'foo', b'bar'] == obj
expected_type = bytes
assert all(type(s) == expected_type for s in obj)
+
+
+def test_unpack_memoryview():
+ buf = bytearray(packb(('foo', 'bar')))
+ view = memoryview(buf)
+ obj = unpackb(view, use_list=1)
+ assert [b'foo', b'bar'] == obj
+ expected_type = bytes
+ assert all(type(s) == expected_type for s in obj)