summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorINADA Naoki <methane@users.noreply.github.com>2018-01-11 19:41:05 +0900
committerGitHub <noreply@github.com>2018-01-11 19:41:05 +0900
commit60ef3879d792ec92480cf9d6d610951657c2e8c7 (patch)
tree15d22f6c87d2da77feb86e8d09e8eb9a452bf4f5
parent5534d0c7af0114db3d27f7b96c82a7fe22ce1e40 (diff)
downloadmsgpack-python-60ef3879d792ec92480cf9d6d610951657c2e8c7.tar.gz
packer: Use PyUnicode_AsUTF8AndSize() for utf-8 (#272)
-rwxr-xr-xdocker/runtests.sh2
-rw-r--r--msgpack/_packer.pyx36
-rw-r--r--msgpack/pack.h47
3 files changed, 71 insertions, 14 deletions
diff --git a/docker/runtests.sh b/docker/runtests.sh
index 11ef9f4..113b630 100755
--- a/docker/runtests.sh
+++ b/docker/runtests.sh
@@ -9,6 +9,6 @@ for V in cp36-cp36m cp35-cp35m cp27-cp27m cp27-cp27mu; do
pushd test # prevent importing msgpack package in current directory.
$PYBIN/python -c 'import sys; print(hex(sys.maxsize))'
$PYBIN/python -c 'from msgpack import _packer, _unpacker'
- $PYBIN/py.test -v
+ $PYBIN/pytest -v .
popd
done
diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx
index 39da91b..a4913ab 100644
--- a/msgpack/_packer.pyx
+++ b/msgpack/_packer.pyx
@@ -13,6 +13,7 @@ cdef extern from "Python.h":
int PyMemoryView_Check(object obj)
int PyByteArray_Check(object obj)
int PyByteArray_CheckExact(object obj)
+ char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t *l) except NULL
cdef extern from "pack.h":
@@ -37,6 +38,7 @@ cdef extern from "pack.h":
int msgpack_pack_bin(msgpack_packer* pk, size_t l)
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)
+ int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit)
cdef int DEFAULT_RECURSE_LIMIT=511
cdef long long ITEM_LIMIT = (2**32)-1
@@ -126,8 +128,12 @@ cdef class Packer(object):
raise TypeError("default must be a callable.")
self._default = default
if encoding is None:
- self.encoding = 'utf_8'
- self.unicode_errors = NULL
+ if unicode_errors is None:
+ self.encoding = NULL
+ self.unicode_errors = NULL
+ else:
+ self.encoding = "utf_8"
+ self.unicode_errors = unicode_errors
else:
if isinstance(encoding, unicode):
self._bencoding = encoding.encode('ascii')
@@ -140,6 +146,8 @@ cdef class Packer(object):
self._berrors = unicode_errors
if self._berrors is not None:
self.unicode_errors = PyBytes_AsString(self._berrors)
+ else:
+ self.unicode_errors = NULL
def __dealloc__(self):
PyMem_Free(self.pk.buf)
@@ -206,17 +214,19 @@ cdef class Packer(object):
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
- if not self.encoding:
- raise TypeError("Can't encode unicode string: no encoding is specified")
- #TODO: Use faster API for UTF-8
- o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
- L = len(o)
- if L > ITEM_LIMIT:
- raise PackValueError("unicode string is too large")
- rawval = o
- ret = msgpack_pack_raw(&self.pk, L)
- if ret == 0:
- ret = msgpack_pack_raw_body(&self.pk, rawval, L)
+ if self.encoding == NULL:
+ ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
+ if ret == -2:
+ raise PackValueError("unicode string is too large")
+ else:
+ o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
+ L = len(o)
+ if L > ITEM_LIMIT:
+ raise PackValueError("unicode string is too large")
+ ret = msgpack_pack_raw(&self.pk, L)
+ if ret == 0:
+ rawval = o
+ ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyDict_CheckExact(o):
d = <dict>o
L = len(d)
diff --git a/msgpack/pack.h b/msgpack/pack.h
index 3bc21ea..4f3ce1d 100644
--- a/msgpack/pack.h
+++ b/msgpack/pack.h
@@ -67,6 +67,53 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_
#include "pack_template.h"
+// return -2 when o is too long
+static inline int
+msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit)
+{
+#if PY_MAJOR_VERSION >= 3
+ assert(PyUnicode_Check(o));
+
+ Py_ssize_t len;
+ const char* buf = PyUnicode_AsUTF8AndSize(o, &len);
+ if (buf == NULL)
+ return -1;
+
+ if (len > limit) {
+ return -2;
+ }
+
+ int ret = msgpack_pack_raw(pk, len);
+ if (ret) return ret;
+
+ return msgpack_pack_raw_body(pk, buf, len);
+#else
+ PyObject *bytes;
+ Py_ssize_t len;
+ int ret;
+
+ // py2
+ bytes = PyUnicode_AsUTF8String(o);
+ if (bytes == NULL)
+ return -1;
+
+ len = PyString_GET_SIZE(bytes);
+ if (len > limit) {
+ Py_DECREF(bytes);
+ return -2;
+ }
+
+ ret = msgpack_pack_raw(pk, len);
+ if (ret) {
+ Py_DECREF(bytes);
+ return -1;
+ }
+ ret = msgpack_pack_raw_body(pk, PyString_AS_STRING(bytes), len);
+ Py_DECREF(bytes);
+ return ret;
+#endif
+}
+
#ifdef __cplusplus
}
#endif