diff options
author | Inada Naoki <songofacandy@gmail.com> | 2019-12-05 21:34:10 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-12-05 21:34:10 +0900 |
commit | 7e9905bdfaecde83ddb1a4575e734a10b055fde9 (patch) | |
tree | 2b9900fcdf0b5b1986b024edb2802971399b8fb8 | |
parent | de320488ae494b85a03b60dd33f91b650033d775 (diff) | |
download | msgpack-python-7e9905bdfaecde83ddb1a4575e734a10b055fde9.tar.gz |
Use new msgpack spec by default. (#386)
-rw-r--r-- | README.rst | 70 | ||||
-rw-r--r-- | msgpack/_packer.pyx | 6 | ||||
-rw-r--r-- | msgpack/_unpacker.pyx | 16 | ||||
-rw-r--r-- | msgpack/fallback.py | 20 | ||||
-rw-r--r-- | test/test_buffer.py | 4 | ||||
-rw-r--r-- | test/test_case.py | 11 | ||||
-rw-r--r-- | test/test_format.py | 10 | ||||
-rw-r--r-- | test/test_memoryview.py | 39 | ||||
-rw-r--r-- | test/test_newspec.py | 6 | ||||
-rw-r--r-- | test/test_read_size.py | 10 | ||||
-rw-r--r-- | test/test_sequnpack.py | 9 |
11 files changed, 75 insertions, 126 deletions
@@ -37,36 +37,16 @@ Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-pyt msgpack is removed and `import msgpack` fail. -Deprecating encoding option -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Compatibility with old format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -encoding and unicode_errors options are deprecated. +You can use ``use_bin_type=False`` option to pack ``bytes`` +object into raw type in old msgpack spec, instead of bin type in new msgpack spec. -In case of packer, use UTF-8 always. Storing other than UTF-8 is not recommended. +You can unpack old msgpack formatk using ``raw=True`` option. +It unpacks str (raw) type in msgpack into Python bytes. -For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes`` -object into msgpack raw type. - -In case of unpacker, there is new ``raw`` option. It is ``True`` by default -for backward compatibility, but it is changed to ``False`` in near future. -You can use ``raw=False`` instead of ``encoding='utf-8'``. - -Planned backward incompatible changes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When msgpack 1.0, I planning these breaking changes: - -* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option. -* packer: Change default of ``use_bin_type`` option from False to True. -* unpacker: Change default of ``raw`` option from True to False. -* unpacker: Reduce all ``max_xxx_len`` options for typical usage. -* unpacker: Remove ``write_bytes`` option from all methods. - -To avoid these breaking changes breaks your application, please: - -* Don't use deprecated options. -* Pass ``use_bin_type`` and ``raw`` options explicitly. -* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too. +See note in below for detail. Install @@ -76,6 +56,7 @@ Install $ pip install msgpack + Pure Python implementation ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -100,6 +81,13 @@ Without extension, using pure Python implementation on CPython runs slowly. How to use ---------- +.. note:: + + In examples below, I use ``raw=False`` and ``use_bin_type=True`` for users + using msgpack < 1.0. + These options are default from msgpack 1.0 so you can omit them. + + One-shot pack & unpack ^^^^^^^^^^^^^^^^^^^^^^ @@ -252,36 +240,18 @@ Notes string and binary type ^^^^^^^^^^^^^^^^^^^^^^ -Early versions of msgpack didn't distinguish string and binary types (like Python 1). +Early versions of msgpack didn't distinguish string and binary types. The type for representing both string and binary types was named **raw**. -For backward compatibility reasons, msgpack-python will still default all -strings to byte strings, unless you specify the ``use_bin_type=True`` option in -the packer. If you do so, it will use a non-standard type called **bin** to -serialize byte arrays, and **raw** becomes to mean **str**. If you want to -distinguish **bin** and **raw** in the unpacker, specify ``raw=False``. - -Note that Python 2 defaults to byte-arrays over Unicode strings: - -.. code-block:: pycon - - >>> import msgpack - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'])) - ['spam', 'eggs'] - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), - raw=False) - ['spam', u'eggs'] - -This is the same code in Python 3 (same behaviour, but Python 3 has a -different default): +You can pack into and unpack from this old spec using ``use_bin_type=False`` +and ``raw=True`` options. .. code-block:: pycon >>> import msgpack - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'])) + >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True) [b'spam', b'eggs'] - >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), - raw=False) + >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False) [b'spam', 'eggs'] diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index f3bde3f..8cf3c05 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -80,9 +80,7 @@ cdef class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. - It also enables str8 type for unicode. - Current default value is false, but it will be changed to true - in future version. You should specify it explicitly. + It also enables str8 type for unicode. (default: True) :param bool strict_types: If set to true, types will be checked to be exact. Derived classes @@ -113,7 +111,7 @@ cdef class Packer(object): self.pk.length = 0 def __init__(self, *, default=None, unicode_errors=None, - bint use_single_float=False, bint autoreset=True, bint use_bin_type=False, + bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, bint strict_types=False): self.use_float = use_single_float self.strict_types = strict_types diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index 3a9d494..f10e99d 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -131,7 +131,7 @@ cdef inline int get_data_from_buffer(object obj, def unpackb(object packed, *, object object_hook=None, object list_hook=None, - bint use_list=True, bint raw=True, bint strict_map_key=False, + bint use_list=True, bint raw=False, bint strict_map_key=False, unicode_errors=None, object_pairs_hook=None, ext_hook=ExtType, Py_ssize_t max_str_len=-1, @@ -217,12 +217,8 @@ cdef class Unpacker(object): Otherwise, unpack to Python tuple. (default: True) :param bool raw: - If true, unpack msgpack raw to Python bytes (default). - Otherwise, unpack to Python str (or unicode on Python 2) by decoding - with UTF-8 encoding (recommended). - Currently, the default is true, but it will be changed to false in - near future. So you must specify it explicitly for keeping backward - compatibility. + If true, unpack msgpack raw to Python bytes. + Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). :param bool strict_map_key: If true, only str or bytes are accepted for map (dict) keys. @@ -268,13 +264,13 @@ cdef class Unpacker(object): Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024) + unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024) for o in unpacker: process(o) Example of streaming deserialize from socket:: - unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024) + unpacker = Unpacker(max_buffer_size=10*1024*1024) while True: buf = sock.recv(1024**2) if not buf: @@ -309,7 +305,7 @@ cdef class Unpacker(object): self.buf = NULL def __init__(self, file_like=None, *, Py_ssize_t read_size=0, - bint use_list=True, bint raw=True, bint strict_map_key=False, + bint use_list=True, bint raw=False, bint strict_map_key=False, object object_hook=None, object object_pairs_hook=None, object list_hook=None, unicode_errors=None, Py_ssize_t max_buffer_size=0, object ext_hook=ExtType, diff --git a/msgpack/fallback.py b/msgpack/fallback.py index 9de3553..fa2f3a8 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -158,7 +158,7 @@ else: class Unpacker(object): """Streaming unpacker. - arguments: + Arguments: :param file_like: File-like object having `.read(n)` method. @@ -172,12 +172,8 @@ class Unpacker(object): Otherwise, unpack to Python tuple. (default: True) :param bool raw: - If true, unpack msgpack raw to Python bytes (default). - Otherwise, unpack to Python str (or unicode on Python 2) by decoding - with UTF-8 encoding (recommended). - Currently, the default is true, but it will be changed to false in - near future. So you must specify it explicitly for keeping backward - compatibility. + If true, unpack msgpack raw to Python bytes. + Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). :param bool strict_map_key: If true, only str or bytes are accepted for map (dict) keys. @@ -226,13 +222,13 @@ class Unpacker(object): Example of streaming deserialize from file-like object:: - unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024) + unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024) for o in unpacker: process(o) Example of streaming deserialize from socket:: - unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024) + unpacker = Unpacker(max_buffer_size=10*1024*1024) while True: buf = sock.recv(1024**2) if not buf: @@ -253,7 +249,7 @@ class Unpacker(object): file_like=None, read_size=0, use_list=True, - raw=True, + raw=False, strict_map_key=False, object_hook=None, object_pairs_hook=None, @@ -748,7 +744,7 @@ class Packer(object): :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. - It also enables str8 type for unicode. + It also enables str8 type for unicode. (default: True) :param bool strict_types: If set to true, types will be checked to be exact. Derived classes @@ -769,7 +765,7 @@ class Packer(object): unicode_errors=None, use_single_float=False, autoreset=True, - use_bin_type=False, + use_bin_type=True, strict_types=False, ): self._strict_types = strict_types diff --git a/test/test_buffer.py b/test/test_buffer.py index da68b27..62507cf 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -17,7 +17,7 @@ def test_unpack_buffer(): def test_unpack_bytearray(): - buf = bytearray(packb(("foo", "bar"))) + buf = bytearray(packb((b"foo", b"bar"))) obj = unpackb(buf, use_list=1) assert [b"foo", b"bar"] == obj expected_type = bytes @@ -25,7 +25,7 @@ def test_unpack_bytearray(): def test_unpack_memoryview(): - buf = bytearray(packb(("foo", "bar"))) + buf = bytearray(packb((b"foo", b"bar"))) view = memoryview(buf) obj = unpackb(view, use_list=1) assert [b"foo", b"bar"] == obj diff --git a/test/test_case.py b/test/test_case.py index 3bc1b26..3e60e59 100644 --- a/test/test_case.py +++ b/test/test_case.py @@ -1,13 +1,12 @@ #!/usr/bin/env python # coding: utf-8 - from msgpack import packb, unpackb -def check(length, obj): - v = packb(obj) +def check(length, obj, use_bin_type=True): + v = packb(obj, use_bin_type=use_bin_type) assert len(v) == length, "%r length should be %r but get %r" % (obj, length, len(v)) - assert unpackb(v, use_list=0) == obj + assert unpackb(v, use_list=0, raw=not use_bin_type) == obj def test_1(): @@ -56,7 +55,7 @@ def test_9(): def check_raw(overhead, num): - check(num + overhead, b" " * num) + check(num + overhead, b" " * num, use_bin_type=False) def test_fixraw(): @@ -135,4 +134,4 @@ def test_match(): def test_unicode(): - assert unpackb(packb("foobar"), use_list=1) == b"foobar" + assert unpackb(packb(u"foobar"), use_list=1) == u"foobar" diff --git a/test/test_format.py b/test/test_format.py index c2cdfbd..8c2f03f 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -4,8 +4,8 @@ from msgpack import unpackb -def check(src, should, use_list=0): - assert unpackb(src, use_list=use_list) == should +def check(src, should, use_list=0, raw=True): + assert unpackb(src, use_list=use_list, raw=raw) == should def testSimpleValue(): @@ -59,6 +59,12 @@ def testRaw(): b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", (b"", b"a", b"ab", b"", b"a", b"ab"), ) + check( + b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" + b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", + ("", "a", "ab", "", "a", "ab"), + raw=False, + ) def testArray(): diff --git a/test/test_memoryview.py b/test/test_memoryview.py index e1b63b8..86b2c1f 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -1,50 +1,33 @@ #!/usr/bin/env python # coding: utf-8 +import pytest from array import array from msgpack import packb, unpackb import sys -# For Python < 3: -# - array type only supports old buffer interface -# - array.frombytes is not available, must use deprecated array.fromstring -if sys.version_info[0] < 3: +pytestmark = pytest.mark.skipif( + sys.version_info[0] < 3, reason="Only Python 3 supports buffer protocol" +) - def make_memoryview(obj): - return memoryview(buffer(obj)) - def make_array(f, data): - a = array(f) - a.fromstring(data) - return a - - def get_data(a): - return a.tostring() - - -else: - make_memoryview = memoryview - - def make_array(f, data): - a = array(f) - a.frombytes(data) - return a - - def get_data(a): - return a.tobytes() +def make_array(f, data): + a = array(f) + a.frombytes(data) + return a def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type): # create a new array original_array = array(format) original_array.fromlist([255] * (nbytes // original_array.itemsize)) - original_data = get_data(original_array) - view = make_memoryview(original_array) + original_data = original_array.tobytes() + view = memoryview(original_array) # pack, unpack, and reconstruct array packed = packb(view, use_bin_type=use_bin_type) - unpacked = unpackb(packed) + unpacked = unpackb(packed, raw=(not use_bin_type)) reconstructed_array = make_array(format, unpacked) # check that we got the right amount of data diff --git a/test/test_newspec.py b/test/test_newspec.py index f4f2a23..b7da486 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -10,14 +10,16 @@ def test_str8(): assert len(b) == len(data) + 2 assert b[0:2] == header + b"\x20" assert b[2:] == data - assert unpackb(b) == data + assert unpackb(b, raw=True) == data + assert unpackb(b, raw=False) == data.decode() data = b"x" * 255 b = packb(data.decode(), use_bin_type=True) assert len(b) == len(data) + 2 assert b[0:2] == header + b"\xff" assert b[2:] == data - assert unpackb(b) == data + assert unpackb(b, raw=True) == data + assert unpackb(b, raw=False) == data.decode() def test_bin8(): diff --git a/test/test_read_size.py b/test/test_read_size.py index 8d8df64..33a7e7d 100644 --- a/test/test_read_size.py +++ b/test/test_read_size.py @@ -8,9 +8,9 @@ def test_read_array_header(): unpacker = Unpacker() unpacker.feed(packb(["a", "b", "c"])) assert unpacker.read_array_header() == 3 - assert unpacker.unpack() == b"a" - assert unpacker.unpack() == b"b" - assert unpacker.unpack() == b"c" + assert unpacker.unpack() == "a" + assert unpacker.unpack() == "b" + assert unpacker.unpack() == "c" try: unpacker.unpack() assert 0, "should raise exception" @@ -22,8 +22,8 @@ def test_read_map_header(): unpacker = Unpacker() unpacker.feed(packb({"a": "A"})) assert unpacker.read_map_header() == 1 - assert unpacker.unpack() == b"a" - assert unpacker.unpack() == b"A" + assert unpacker.unpack() == "a" + assert unpacker.unpack() == "A" try: unpacker.unpack() assert 0, "should raise exception" diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index e576571..9b69479 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,6 +1,5 @@ #!/usr/bin/env python # coding: utf-8 - import io from msgpack import Unpacker, BufferFull from msgpack import pack @@ -26,7 +25,7 @@ def test_partialdata(): with raises(StopIteration): next(iter(unpacker)) unpacker.feed(b"o") - assert next(iter(unpacker)) == b"hallo" + assert next(iter(unpacker)) == "hallo" def test_foobar(): @@ -98,13 +97,13 @@ def test_readbytes(): def test_issue124(): unpacker = Unpacker() unpacker.feed(b"\xa1?\xa1!") - assert tuple(unpacker) == (b"?", b"!") + assert tuple(unpacker) == ("?", "!") assert tuple(unpacker) == () unpacker.feed(b"\xa1?\xa1") - assert tuple(unpacker) == (b"?",) + assert tuple(unpacker) == ("?",) assert tuple(unpacker) == () unpacker.feed(b"!") - assert tuple(unpacker) == (b"!",) + assert tuple(unpacker) == ("!",) assert tuple(unpacker) == () |