diff options
Diffstat (limited to 'msgpack')
-rw-r--r-- | msgpack/_packer.pyx | 18 | ||||
-rw-r--r-- | msgpack/_unpacker.pyx | 81 | ||||
-rw-r--r-- | msgpack/fallback.py | 52 | ||||
-rw-r--r-- | msgpack/unpack.h | 12 |
4 files changed, 118 insertions, 45 deletions
diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 13a18f6..39da91b 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -2,7 +2,7 @@ #cython: embedsignature=True from cpython cimport * -#from cpython.exc cimport PyErr_WarnEx +from cpython.exc cimport PyErr_WarnEx from msgpack.exceptions import PackValueError, PackOverflowError from msgpack import ExtType @@ -39,7 +39,7 @@ cdef extern from "pack.h": int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) cdef int DEFAULT_RECURSE_LIMIT=511 -cdef size_t ITEM_LIMIT = (2**32)-1 +cdef long long ITEM_LIMIT = (2**32)-1 cdef inline int PyBytesLike_Check(object o): @@ -110,9 +110,13 @@ cdef class Packer(object): self.pk.buf_size = buf_size self.pk.length = 0 - def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', + def __init__(self, default=None, encoding=None, unicode_errors=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=False, bint strict_types=False): + if encoding is not None: + PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated.", 1) + if unicode_errors is not None: + PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated.", 1) self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset @@ -122,7 +126,7 @@ cdef class Packer(object): raise TypeError("default must be a callable.") self._default = default if encoding is None: - self.encoding = NULL + self.encoding = 'utf_8' self.unicode_errors = NULL else: if isinstance(encoding, unicode): @@ -134,7 +138,8 @@ cdef class Packer(object): self._berrors = unicode_errors.encode('ascii') else: self._berrors = unicode_errors - self.unicode_errors = PyBytes_AsString(self._berrors) + if self._berrors is not None: + self.unicode_errors = PyBytes_AsString(self._berrors) def __dealloc__(self): PyMem_Free(self.pk.buf) @@ -149,7 +154,7 @@ cdef class Packer(object): cdef char* rawval cdef int ret cdef dict d - cdef size_t L + cdef Py_ssize_t L cdef int default_used = 0 cdef bint strict_types = self.strict_types cdef Py_buffer view @@ -203,6 +208,7 @@ cdef class Packer(object): elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): if not self.encoding: raise TypeError("Can't encode unicode string: no encoding is specified") + #TODO: Use faster API for UTF-8 o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) L = len(o) if L > ITEM_LIMIT: diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 564749e..b796d04 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -43,8 +43,9 @@ from msgpack import ExtType cdef extern from "unpack.h": ctypedef struct msgpack_user: bint use_list - PyObject* object_hook + bint raw_as_bytes bint has_pairs_hook # call object_hook with k-v pairs + PyObject* object_hook PyObject* list_hook PyObject* ext_hook char *encoding @@ -73,12 +74,14 @@ cdef extern from "unpack.h": cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, object ext_hook, - bint use_list, char* encoding, char* unicode_errors, + bint use_list, bint raw_as_bytes, + char* encoding, char* unicode_errors, Py_ssize_t max_str_len, Py_ssize_t max_bin_len, Py_ssize_t max_array_len, Py_ssize_t max_map_len, Py_ssize_t max_ext_len): unpack_init(ctx) ctx.user.use_list = use_list + ctx.user.raw_as_bytes = raw_as_bytes ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL ctx.user.max_str_len = max_str_len ctx.user.max_bin_len = max_bin_len @@ -155,7 +158,8 @@ cdef inline int get_data_from_buffer(object obj, return 1 def unpackb(object packed, object object_hook=None, object list_hook=None, - bint use_list=1, encoding=None, unicode_errors="strict", + bint use_list=True, bint raw_as_bytes=True, + encoding=None, unicode_errors="strict", object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=2147483647, # 2**32-1 Py_ssize_t max_bin_len=2147483647, @@ -180,21 +184,26 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, cdef char* cerr = NULL cdef int new_protocol = 0 - get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) + if encoding is not None: + PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1) + if isinstance(encoding, unicode): + encoding = encoding.encode('ascii') + elif not isinstance(encoding, bytes): + raise TypeError("encoding should be bytes or unicode") + cenc = PyBytes_AsString(encoding) + + if unicode_errors is not None: + PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1) + if isinstance(unicode_errors, unicode): + unicode_errors = unicode_errors.encode('ascii') + elif not isinstance(unicode_errors, bytes): + raise TypeError("unicode_errors should be bytes or unicode") + cerr = PyBytes_AsString(unicode_errors) + get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol) try: - if encoding is not None: - if isinstance(encoding, unicode): - encoding = encoding.encode('ascii') - cenc = PyBytes_AsString(encoding) - - if unicode_errors is not None: - if isinstance(unicode_errors, unicode): - unicode_errors = unicode_errors.encode('ascii') - cerr = PyBytes_AsString(unicode_errors) - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, cenc, cerr, + use_list, raw_as_bytes, cenc, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) ret = unpack_construct(&ctx, buf, buf_len, &off) finally: @@ -252,6 +261,16 @@ cdef class Unpacker(object): If true, unpack msgpack array to Python list. Otherwise, unpack to Python tuple. (default: True) + :param bool raw_as_bytes: + If true, unpack msgpack raw to Python bytes (default). + Otherwise, unpack to Python str (or unicode on Python 2) by decoding + with UTF-8 encoding (recommended). + Currently, the default is true, but it will be changed to false in + near future. So you must specify it explicitly for keeping backward + compatibility. + + *encoding* option which is deprecated overrides this option. + :param callable object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. @@ -262,14 +281,6 @@ cdef class Unpacker(object): Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) - :param str encoding: - Encoding used for decoding msgpack raw. - If it is None (default), msgpack raw is deserialized to Python bytes. - - :param str unicode_errors: - Used for decoding msgpack raw with *encoding*. - (default: `'strict'`) - :param int max_buffer_size: Limits size of data waiting unpacked. 0 means system's INT_MAX (default). Raises `BufferFull` exception when it is insufficient. @@ -287,16 +298,25 @@ cdef class Unpacker(object): :param int max_map_len: Limits max length of map. (default: 2**31-1) + :param str encoding: + Deprecated, use raw_as_bytes instead. + Encoding used for decoding msgpack raw. + If it is None (default), msgpack raw is deserialized to Python bytes. + + :param str unicode_errors: + Deprecated. Used for decoding msgpack raw with *encoding*. + (default: `'strict'`) + - example of streaming deserialize from file-like object:: + Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like) + unpacker = Unpacker(file_like, raw_as_bytes=False) for o in unpacker: process(o) - example of streaming deserialize from socket:: + Example of streaming deserialize from socket:: - unpacker = Unpacker() + unpacker = Unpacker(raw_as_bytes=False) while True: buf = sock.recv(1024**2) if not buf: @@ -324,7 +344,8 @@ cdef class Unpacker(object): PyMem_Free(self.buf) self.buf = NULL - def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, + def __init__(self, file_like=None, Py_ssize_t read_size=0, + bint use_list=True, bint raw_as_bytes=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, encoding=None, unicode_errors='strict', int max_buffer_size=0, object ext_hook=ExtType, @@ -363,6 +384,7 @@ cdef class Unpacker(object): self.stream_offset = 0 if encoding is not None: + PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1) if isinstance(encoding, unicode): self.encoding = encoding.encode('ascii') elif isinstance(encoding, bytes): @@ -372,6 +394,7 @@ cdef class Unpacker(object): cenc = PyBytes_AsString(self.encoding) if unicode_errors is not None: + PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1) if isinstance(unicode_errors, unicode): self.unicode_errors = unicode_errors.encode('ascii') elif isinstance(unicode_errors, bytes): @@ -381,7 +404,7 @@ cdef class Unpacker(object): cerr = PyBytes_AsString(self.unicode_errors) init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, - ext_hook, use_list, cenc, cerr, + ext_hook, use_list, raw_as_bytes, cenc, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 5447b53..d95f621 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -145,6 +145,16 @@ class Unpacker(object): If true, unpack msgpack array to Python list. Otherwise, unpack to Python tuple. (default: True) + :param bool raw_as_bytes: + If true, unpack msgpack raw to Python bytes (default). + Otherwise, unpack to Python str (or unicode on Python 2) by decoding + with UTF-8 encoding (recommended). + Currently, the default is true, but it will be changed to false in + near future. So you must specify it explicitly for keeping backward + compatibility. + + *encoding* option which is deprecated overrides this option. + :param callable object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. @@ -183,13 +193,13 @@ class Unpacker(object): example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like) + unpacker = Unpacker(file_like, raw_as_bytes=False) for o in unpacker: process(o) example of streaming deserialize from socket:: - unpacker = Unpacker() + unpacker = Unpacker(raw_as_bytes=False) while True: buf = sock.recv(1024**2) if not buf: @@ -199,15 +209,28 @@ class Unpacker(object): process(o) """ - def __init__(self, file_like=None, read_size=0, use_list=True, + def __init__(self, file_like=None, read_size=0, use_list=True, raw_as_bytes=True, object_hook=None, object_pairs_hook=None, list_hook=None, - encoding=None, unicode_errors='strict', max_buffer_size=0, + encoding=None, unicode_errors=None, max_buffer_size=0, ext_hook=ExtType, max_str_len=2147483647, # 2**32-1 max_bin_len=2147483647, max_array_len=2147483647, max_map_len=2147483647, max_ext_len=2147483647): + + if encoding is not None: + warnings.warn( + "encoding is deprecated, Use raw_as_bytes=False instead.", + PendingDeprecationWarning) + + if unicode_errors is not None: + warnings.warn( + "unicode_errors is deprecated.", + PendingDeprecationWarning) + else: + unicode_errors = 'strict' + if file_like is None: self._feeding = True else: @@ -234,6 +257,7 @@ class Unpacker(object): if read_size > self._max_buffer_size: raise ValueError("read_size must be smaller than max_buffer_size") self._read_size = read_size or min(self._max_buffer_size, 16*1024) + self._raw_as_bytes = bool(raw_as_bytes) self._encoding = encoding self._unicode_errors = unicode_errors self._use_list = use_list @@ -582,8 +606,10 @@ class Unpacker(object): if typ == TYPE_RAW: if self._encoding is not None: obj = obj.decode(self._encoding, self._unicode_errors) - else: + elif self._raw_as_bytes: obj = bytes(obj) + else: + obj = obj.decode('utf_8') return obj if typ == TYPE_EXT: return self._ext_hook(n, bytes(obj)) @@ -682,9 +708,23 @@ class Packer(object): :param str unicode_errors: (deprecated) Error handler for encoding unicode. (default: 'strict') """ - def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', + def __init__(self, default=None, encoding=None, unicode_errors=None, use_single_float=False, autoreset=True, use_bin_type=False, strict_types=False): + if encoding is None: + encoding = 'utf_8' + else: + warnings.warn( + "encoding is deprecated, Use raw_as_bytes=False instead.", + PendingDeprecationWarning) + + if unicode_errors is None: + unicode_errors = 'strict' + else: + warnings.warn( + "unicode_errors is deprecated.", + PendingDeprecationWarning) + self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset diff --git a/msgpack/unpack.h b/msgpack/unpack.h index da2cfb6..8c2fc46 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -20,9 +20,10 @@ #include "unpack_define.h" typedef struct unpack_user { - int use_list; - PyObject *object_hook; + bool use_list; + bool raw_as_bytes; bool has_pairs_hook; + PyObject *object_hook; PyObject *list_hook; PyObject *ext_hook; const char *encoding; @@ -225,10 +226,13 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* } PyObject *py; - if(u->encoding) { + + if (u->encoding) { py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors); - } else { + } else if (u->raw_as_bytes) { py = PyBytes_FromStringAndSize(p, l); + } else { + py = PyUnicode_DecodeUTF8(p, l, NULL); } if (!py) return -1; |