diff options
author | INADA Naoki <methane@users.noreply.github.com> | 2018-01-11 19:41:05 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-11 19:41:05 +0900 |
commit | 60ef3879d792ec92480cf9d6d610951657c2e8c7 (patch) | |
tree | 15d22f6c87d2da77feb86e8d09e8eb9a452bf4f5 /msgpack/_packer.pyx | |
parent | 5534d0c7af0114db3d27f7b96c82a7fe22ce1e40 (diff) | |
download | msgpack-python-60ef3879d792ec92480cf9d6d610951657c2e8c7.tar.gz |
packer: Use PyUnicode_AsUTF8AndSize() for utf-8 (#272)
Diffstat (limited to 'msgpack/_packer.pyx')
-rw-r--r-- | msgpack/_packer.pyx | 36 |
1 files changed, 23 insertions, 13 deletions
diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 39da91b..a4913ab 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -13,6 +13,7 @@ cdef extern from "Python.h": int PyMemoryView_Check(object obj) int PyByteArray_Check(object obj) int PyByteArray_CheckExact(object obj) + char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t *l) except NULL cdef extern from "pack.h": @@ -37,6 +38,7 @@ cdef extern from "pack.h": int msgpack_pack_bin(msgpack_packer* pk, size_t l) int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) + int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) cdef int DEFAULT_RECURSE_LIMIT=511 cdef long long ITEM_LIMIT = (2**32)-1 @@ -126,8 +128,12 @@ cdef class Packer(object): raise TypeError("default must be a callable.") self._default = default if encoding is None: - self.encoding = 'utf_8' - self.unicode_errors = NULL + if unicode_errors is None: + self.encoding = NULL + self.unicode_errors = NULL + else: + self.encoding = "utf_8" + self.unicode_errors = unicode_errors else: if isinstance(encoding, unicode): self._bencoding = encoding.encode('ascii') @@ -140,6 +146,8 @@ cdef class Packer(object): self._berrors = unicode_errors if self._berrors is not None: self.unicode_errors = PyBytes_AsString(self._berrors) + else: + self.unicode_errors = NULL def __dealloc__(self): PyMem_Free(self.pk.buf) @@ -206,17 +214,19 @@ cdef class Packer(object): if ret == 0: ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): - if not self.encoding: - raise TypeError("Can't encode unicode string: no encoding is specified") - #TODO: Use faster API for UTF-8 - o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) - L = len(o) - if L > ITEM_LIMIT: - raise PackValueError("unicode string is too large") - rawval = o - ret = msgpack_pack_raw(&self.pk, L) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, L) + if self.encoding == NULL: + ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); + if ret == -2: + raise PackValueError("unicode string is too large") + else: + o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) + L = len(o) + if L > ITEM_LIMIT: + raise PackValueError("unicode string is too large") + ret = msgpack_pack_raw(&self.pk, L) + if ret == 0: + rawval = o + ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyDict_CheckExact(o): d = <dict>o L = len(d) |