summaryrefslogtreecommitdiff
path: root/msgpack
diff options
context:
space:
mode:
authorInada Naoki <songofacandy@gmail.com>2019-12-03 20:53:11 +0900
committerGitHub <noreply@github.com>2019-12-03 20:53:11 +0900
commit83ebb63c447a99c81d043eb6808bbfb50697a751 (patch)
tree1f31aa6d43adccf27d236f3b63adeb71aa933a26 /msgpack
parenta0480c760256b4afc18beaebd5e3c79de1d4ce56 (diff)
downloadmsgpack-python-83ebb63c447a99c81d043eb6808bbfb50697a751.tar.gz
Ressurect unicode_errors of the Packer. (#379)
Diffstat (limited to 'msgpack')
-rw-r--r--msgpack/_packer.pyx34
-rw-r--r--msgpack/fallback.py11
2 files changed, 35 insertions, 10 deletions
diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx
index 2e698e1..8b1a392 100644
--- a/msgpack/_packer.pyx
+++ b/msgpack/_packer.pyx
@@ -89,9 +89,15 @@ cdef class Packer(object):
Additionally tuples will not be serialized as lists.
This is useful when trying to implement accurate serialization
for python types.
+
+ :param str unicode_errors:
+ The error handler for encoding unicode. (default: 'strict')
+ DO NOT USE THIS!! This option is kept for very specific usage.
"""
cdef msgpack_packer pk
cdef object _default
+ cdef object _berrors
+ cdef const char *unicode_errors
cdef bint strict_types
cdef bool use_float
cdef bint autoreset
@@ -104,10 +110,8 @@ cdef class Packer(object):
self.pk.buf_size = buf_size
self.pk.length = 0
- def __init__(self, default=None,
- bint use_single_float=False,
- bint autoreset=True,
- bint use_bin_type=False,
+ def __init__(self, *, default=None, unicode_errors=None,
+ bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
bint strict_types=False):
self.use_float = use_single_float
self.strict_types = strict_types
@@ -118,6 +122,12 @@ cdef class Packer(object):
raise TypeError("default must be a callable.")
self._default = default
+ self._berrors = unicode_errors
+ if unicode_errors is None:
+ self.unicode_errors = NULL
+ else:
+ self.unicode_errors = self._berrors
+
def __dealloc__(self):
PyMem_Free(self.pk.buf)
self.pk.buf = NULL
@@ -183,9 +193,19 @@ cdef class Packer(object):
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
- ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
- if ret == -2:
- raise ValueError("unicode string is too large")
+ if self.unicode_errors == NULL:
+ ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
+ if ret == -2:
+ raise ValueError("unicode string is too large")
+ else:
+ o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors)
+ L = Py_SIZE(o)
+ if L > ITEM_LIMIT:
+ raise ValueError("unicode string is too large")
+ ret = msgpack_pack_raw(&self.pk, L)
+ if ret == 0:
+ rawval = o
+ ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyDict_CheckExact(o):
d = <dict>o
L = len(d)
diff --git a/msgpack/fallback.py b/msgpack/fallback.py
index 5dab906..0c0c101 100644
--- a/msgpack/fallback.py
+++ b/msgpack/fallback.py
@@ -667,7 +667,7 @@ class Unpacker(object):
elif self._raw:
obj = bytes(obj)
else:
- obj = obj.decode('utf_8')
+ obj = obj.decode('utf_8', self._unicode_errors)
return obj
if typ == TYPE_EXT:
return self._ext_hook(n, bytes(obj))
@@ -752,14 +752,19 @@ class Packer(object):
Additionally tuples will not be serialized as lists.
This is useful when trying to implement accurate serialization
for python types.
+
+ :param str unicode_errors:
+ The error handler for encoding unicode. (default: 'strict')
+ DO NOT USE THIS!! This option is kept for very specific usage.
"""
- def __init__(self, default=None,
+ def __init__(self, default=None, unicode_errors=None,
use_single_float=False, autoreset=True, use_bin_type=False,
strict_types=False):
self._strict_types = strict_types
self._use_float = use_single_float
self._autoreset = autoreset
self._use_bin_type = use_bin_type
+ self._unicode_errors = unicode_errors or "strict"
self._buffer = StringIO()
if default is not None:
if not callable(default):
@@ -816,7 +821,7 @@ class Packer(object):
self._pack_bin_header(n)
return self._buffer.write(obj)
if check(obj, unicode):
- obj = obj.encode("utf-8")
+ obj = obj.encode("utf-8", self._unicode_errors)
n = len(obj)
if n >= 2**32:
raise ValueError("String is too large")