summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorInada Naoki <songofacandy@gmail.com>2019-12-05 21:34:10 +0900
committerGitHub <noreply@github.com>2019-12-05 21:34:10 +0900
commit7e9905bdfaecde83ddb1a4575e734a10b055fde9 (patch)
tree2b9900fcdf0b5b1986b024edb2802971399b8fb8
parentde320488ae494b85a03b60dd33f91b650033d775 (diff)
downloadmsgpack-python-7e9905bdfaecde83ddb1a4575e734a10b055fde9.tar.gz
Use new msgpack spec by default. (#386)
-rw-r--r--README.rst70
-rw-r--r--msgpack/_packer.pyx6
-rw-r--r--msgpack/_unpacker.pyx16
-rw-r--r--msgpack/fallback.py20
-rw-r--r--test/test_buffer.py4
-rw-r--r--test/test_case.py11
-rw-r--r--test/test_format.py10
-rw-r--r--test/test_memoryview.py39
-rw-r--r--test/test_newspec.py6
-rw-r--r--test/test_read_size.py10
-rw-r--r--test/test_sequnpack.py9
11 files changed, 75 insertions, 126 deletions
diff --git a/README.rst b/README.rst
index 82b6c02..f9f074f 100644
--- a/README.rst
+++ b/README.rst
@@ -37,36 +37,16 @@ Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-pyt
msgpack is removed and `import msgpack` fail.
-Deprecating encoding option
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Compatibility with old format
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-encoding and unicode_errors options are deprecated.
+You can use ``use_bin_type=False`` option to pack ``bytes``
+object into raw type in old msgpack spec, instead of bin type in new msgpack spec.
-In case of packer, use UTF-8 always. Storing other than UTF-8 is not recommended.
+You can unpack old msgpack formatk using ``raw=True`` option.
+It unpacks str (raw) type in msgpack into Python bytes.
-For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes``
-object into msgpack raw type.
-
-In case of unpacker, there is new ``raw`` option. It is ``True`` by default
-for backward compatibility, but it is changed to ``False`` in near future.
-You can use ``raw=False`` instead of ``encoding='utf-8'``.
-
-Planned backward incompatible changes
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-When msgpack 1.0, I planning these breaking changes:
-
-* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option.
-* packer: Change default of ``use_bin_type`` option from False to True.
-* unpacker: Change default of ``raw`` option from True to False.
-* unpacker: Reduce all ``max_xxx_len`` options for typical usage.
-* unpacker: Remove ``write_bytes`` option from all methods.
-
-To avoid these breaking changes breaks your application, please:
-
-* Don't use deprecated options.
-* Pass ``use_bin_type`` and ``raw`` options explicitly.
-* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too.
+See note in below for detail.
Install
@@ -76,6 +56,7 @@ Install
$ pip install msgpack
+
Pure Python implementation
^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -100,6 +81,13 @@ Without extension, using pure Python implementation on CPython runs slowly.
How to use
----------
+.. note::
+
+ In examples below, I use ``raw=False`` and ``use_bin_type=True`` for users
+ using msgpack < 1.0.
+ These options are default from msgpack 1.0 so you can omit them.
+
+
One-shot pack & unpack
^^^^^^^^^^^^^^^^^^^^^^
@@ -252,36 +240,18 @@ Notes
string and binary type
^^^^^^^^^^^^^^^^^^^^^^
-Early versions of msgpack didn't distinguish string and binary types (like Python 1).
+Early versions of msgpack didn't distinguish string and binary types.
The type for representing both string and binary types was named **raw**.
-For backward compatibility reasons, msgpack-python will still default all
-strings to byte strings, unless you specify the ``use_bin_type=True`` option in
-the packer. If you do so, it will use a non-standard type called **bin** to
-serialize byte arrays, and **raw** becomes to mean **str**. If you want to
-distinguish **bin** and **raw** in the unpacker, specify ``raw=False``.
-
-Note that Python 2 defaults to byte-arrays over Unicode strings:
-
-.. code-block:: pycon
-
- >>> import msgpack
- >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
- ['spam', 'eggs']
- >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
- raw=False)
- ['spam', u'eggs']
-
-This is the same code in Python 3 (same behaviour, but Python 3 has a
-different default):
+You can pack into and unpack from this old spec using ``use_bin_type=False``
+and ``raw=True`` options.
.. code-block:: pycon
>>> import msgpack
- >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
+ >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True)
[b'spam', b'eggs']
- >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
- raw=False)
+ >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False)
[b'spam', 'eggs']
diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx
index f3bde3f..8cf3c05 100644
--- a/msgpack/_packer.pyx
+++ b/msgpack/_packer.pyx
@@ -80,9 +80,7 @@ cdef class Packer(object):
:param bool use_bin_type:
Use bin type introduced in msgpack spec 2.0 for bytes.
- It also enables str8 type for unicode.
- Current default value is false, but it will be changed to true
- in future version. You should specify it explicitly.
+ It also enables str8 type for unicode. (default: True)
:param bool strict_types:
If set to true, types will be checked to be exact. Derived classes
@@ -113,7 +111,7 @@ cdef class Packer(object):
self.pk.length = 0
def __init__(self, *, default=None, unicode_errors=None,
- bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
+ bint use_single_float=False, bint autoreset=True, bint use_bin_type=True,
bint strict_types=False):
self.use_float = use_single_float
self.strict_types = strict_types
diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx
index 3a9d494..f10e99d 100644
--- a/msgpack/_unpacker.pyx
+++ b/msgpack/_unpacker.pyx
@@ -131,7 +131,7 @@ cdef inline int get_data_from_buffer(object obj,
def unpackb(object packed, *, object object_hook=None, object list_hook=None,
- bint use_list=True, bint raw=True, bint strict_map_key=False,
+ bint use_list=True, bint raw=False, bint strict_map_key=False,
unicode_errors=None,
object_pairs_hook=None, ext_hook=ExtType,
Py_ssize_t max_str_len=-1,
@@ -217,12 +217,8 @@ cdef class Unpacker(object):
Otherwise, unpack to Python tuple. (default: True)
:param bool raw:
- If true, unpack msgpack raw to Python bytes (default).
- Otherwise, unpack to Python str (or unicode on Python 2) by decoding
- with UTF-8 encoding (recommended).
- Currently, the default is true, but it will be changed to false in
- near future. So you must specify it explicitly for keeping backward
- compatibility.
+ If true, unpack msgpack raw to Python bytes.
+ Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
:param bool strict_map_key:
If true, only str or bytes are accepted for map (dict) keys.
@@ -268,13 +264,13 @@ cdef class Unpacker(object):
Example of streaming deserialize from file-like object::
- unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024)
+ unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024)
for o in unpacker:
process(o)
Example of streaming deserialize from socket::
- unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024)
+ unpacker = Unpacker(max_buffer_size=10*1024*1024)
while True:
buf = sock.recv(1024**2)
if not buf:
@@ -309,7 +305,7 @@ cdef class Unpacker(object):
self.buf = NULL
def __init__(self, file_like=None, *, Py_ssize_t read_size=0,
- bint use_list=True, bint raw=True, bint strict_map_key=False,
+ bint use_list=True, bint raw=False, bint strict_map_key=False,
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
unicode_errors=None, Py_ssize_t max_buffer_size=0,
object ext_hook=ExtType,
diff --git a/msgpack/fallback.py b/msgpack/fallback.py
index 9de3553..fa2f3a8 100644
--- a/msgpack/fallback.py
+++ b/msgpack/fallback.py
@@ -158,7 +158,7 @@ else:
class Unpacker(object):
"""Streaming unpacker.
- arguments:
+ Arguments:
:param file_like:
File-like object having `.read(n)` method.
@@ -172,12 +172,8 @@ class Unpacker(object):
Otherwise, unpack to Python tuple. (default: True)
:param bool raw:
- If true, unpack msgpack raw to Python bytes (default).
- Otherwise, unpack to Python str (or unicode on Python 2) by decoding
- with UTF-8 encoding (recommended).
- Currently, the default is true, but it will be changed to false in
- near future. So you must specify it explicitly for keeping backward
- compatibility.
+ If true, unpack msgpack raw to Python bytes.
+ Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
:param bool strict_map_key:
If true, only str or bytes are accepted for map (dict) keys.
@@ -226,13 +222,13 @@ class Unpacker(object):
Example of streaming deserialize from file-like object::
- unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024)
+ unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024)
for o in unpacker:
process(o)
Example of streaming deserialize from socket::
- unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024)
+ unpacker = Unpacker(max_buffer_size=10*1024*1024)
while True:
buf = sock.recv(1024**2)
if not buf:
@@ -253,7 +249,7 @@ class Unpacker(object):
file_like=None,
read_size=0,
use_list=True,
- raw=True,
+ raw=False,
strict_map_key=False,
object_hook=None,
object_pairs_hook=None,
@@ -748,7 +744,7 @@ class Packer(object):
:param bool use_bin_type:
Use bin type introduced in msgpack spec 2.0 for bytes.
- It also enables str8 type for unicode.
+ It also enables str8 type for unicode. (default: True)
:param bool strict_types:
If set to true, types will be checked to be exact. Derived classes
@@ -769,7 +765,7 @@ class Packer(object):
unicode_errors=None,
use_single_float=False,
autoreset=True,
- use_bin_type=False,
+ use_bin_type=True,
strict_types=False,
):
self._strict_types = strict_types
diff --git a/test/test_buffer.py b/test/test_buffer.py
index da68b27..62507cf 100644
--- a/test/test_buffer.py
+++ b/test/test_buffer.py
@@ -17,7 +17,7 @@ def test_unpack_buffer():
def test_unpack_bytearray():
- buf = bytearray(packb(("foo", "bar")))
+ buf = bytearray(packb((b"foo", b"bar")))
obj = unpackb(buf, use_list=1)
assert [b"foo", b"bar"] == obj
expected_type = bytes
@@ -25,7 +25,7 @@ def test_unpack_bytearray():
def test_unpack_memoryview():
- buf = bytearray(packb(("foo", "bar")))
+ buf = bytearray(packb((b"foo", b"bar")))
view = memoryview(buf)
obj = unpackb(view, use_list=1)
assert [b"foo", b"bar"] == obj
diff --git a/test/test_case.py b/test/test_case.py
index 3bc1b26..3e60e59 100644
--- a/test/test_case.py
+++ b/test/test_case.py
@@ -1,13 +1,12 @@
#!/usr/bin/env python
# coding: utf-8
-
from msgpack import packb, unpackb
-def check(length, obj):
- v = packb(obj)
+def check(length, obj, use_bin_type=True):
+ v = packb(obj, use_bin_type=use_bin_type)
assert len(v) == length, "%r length should be %r but get %r" % (obj, length, len(v))
- assert unpackb(v, use_list=0) == obj
+ assert unpackb(v, use_list=0, raw=not use_bin_type) == obj
def test_1():
@@ -56,7 +55,7 @@ def test_9():
def check_raw(overhead, num):
- check(num + overhead, b" " * num)
+ check(num + overhead, b" " * num, use_bin_type=False)
def test_fixraw():
@@ -135,4 +134,4 @@ def test_match():
def test_unicode():
- assert unpackb(packb("foobar"), use_list=1) == b"foobar"
+ assert unpackb(packb(u"foobar"), use_list=1) == u"foobar"
diff --git a/test/test_format.py b/test/test_format.py
index c2cdfbd..8c2f03f 100644
--- a/test/test_format.py
+++ b/test/test_format.py
@@ -4,8 +4,8 @@
from msgpack import unpackb
-def check(src, should, use_list=0):
- assert unpackb(src, use_list=use_list) == should
+def check(src, should, use_list=0, raw=True):
+ assert unpackb(src, use_list=use_list, raw=raw) == should
def testSimpleValue():
@@ -59,6 +59,12 @@ def testRaw():
b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
(b"", b"a", b"ab", b"", b"a", b"ab"),
)
+ check(
+ b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00"
+ b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
+ ("", "a", "ab", "", "a", "ab"),
+ raw=False,
+ )
def testArray():
diff --git a/test/test_memoryview.py b/test/test_memoryview.py
index e1b63b8..86b2c1f 100644
--- a/test/test_memoryview.py
+++ b/test/test_memoryview.py
@@ -1,50 +1,33 @@
#!/usr/bin/env python
# coding: utf-8
+import pytest
from array import array
from msgpack import packb, unpackb
import sys
-# For Python < 3:
-# - array type only supports old buffer interface
-# - array.frombytes is not available, must use deprecated array.fromstring
-if sys.version_info[0] < 3:
+pytestmark = pytest.mark.skipif(
+ sys.version_info[0] < 3, reason="Only Python 3 supports buffer protocol"
+)
- def make_memoryview(obj):
- return memoryview(buffer(obj))
- def make_array(f, data):
- a = array(f)
- a.fromstring(data)
- return a
-
- def get_data(a):
- return a.tostring()
-
-
-else:
- make_memoryview = memoryview
-
- def make_array(f, data):
- a = array(f)
- a.frombytes(data)
- return a
-
- def get_data(a):
- return a.tobytes()
+def make_array(f, data):
+ a = array(f)
+ a.frombytes(data)
+ return a
def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type):
# create a new array
original_array = array(format)
original_array.fromlist([255] * (nbytes // original_array.itemsize))
- original_data = get_data(original_array)
- view = make_memoryview(original_array)
+ original_data = original_array.tobytes()
+ view = memoryview(original_array)
# pack, unpack, and reconstruct array
packed = packb(view, use_bin_type=use_bin_type)
- unpacked = unpackb(packed)
+ unpacked = unpackb(packed, raw=(not use_bin_type))
reconstructed_array = make_array(format, unpacked)
# check that we got the right amount of data
diff --git a/test/test_newspec.py b/test/test_newspec.py
index f4f2a23..b7da486 100644
--- a/test/test_newspec.py
+++ b/test/test_newspec.py
@@ -10,14 +10,16 @@ def test_str8():
assert len(b) == len(data) + 2
assert b[0:2] == header + b"\x20"
assert b[2:] == data
- assert unpackb(b) == data
+ assert unpackb(b, raw=True) == data
+ assert unpackb(b, raw=False) == data.decode()
data = b"x" * 255
b = packb(data.decode(), use_bin_type=True)
assert len(b) == len(data) + 2
assert b[0:2] == header + b"\xff"
assert b[2:] == data
- assert unpackb(b) == data
+ assert unpackb(b, raw=True) == data
+ assert unpackb(b, raw=False) == data.decode()
def test_bin8():
diff --git a/test/test_read_size.py b/test/test_read_size.py
index 8d8df64..33a7e7d 100644
--- a/test/test_read_size.py
+++ b/test/test_read_size.py
@@ -8,9 +8,9 @@ def test_read_array_header():
unpacker = Unpacker()
unpacker.feed(packb(["a", "b", "c"]))
assert unpacker.read_array_header() == 3
- assert unpacker.unpack() == b"a"
- assert unpacker.unpack() == b"b"
- assert unpacker.unpack() == b"c"
+ assert unpacker.unpack() == "a"
+ assert unpacker.unpack() == "b"
+ assert unpacker.unpack() == "c"
try:
unpacker.unpack()
assert 0, "should raise exception"
@@ -22,8 +22,8 @@ def test_read_map_header():
unpacker = Unpacker()
unpacker.feed(packb({"a": "A"}))
assert unpacker.read_map_header() == 1
- assert unpacker.unpack() == b"a"
- assert unpacker.unpack() == b"A"
+ assert unpacker.unpack() == "a"
+ assert unpacker.unpack() == "A"
try:
unpacker.unpack()
assert 0, "should raise exception"
diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py
index e576571..9b69479 100644
--- a/test/test_sequnpack.py
+++ b/test/test_sequnpack.py
@@ -1,6 +1,5 @@
#!/usr/bin/env python
# coding: utf-8
-
import io
from msgpack import Unpacker, BufferFull
from msgpack import pack
@@ -26,7 +25,7 @@ def test_partialdata():
with raises(StopIteration):
next(iter(unpacker))
unpacker.feed(b"o")
- assert next(iter(unpacker)) == b"hallo"
+ assert next(iter(unpacker)) == "hallo"
def test_foobar():
@@ -98,13 +97,13 @@ def test_readbytes():
def test_issue124():
unpacker = Unpacker()
unpacker.feed(b"\xa1?\xa1!")
- assert tuple(unpacker) == (b"?", b"!")
+ assert tuple(unpacker) == ("?", "!")
assert tuple(unpacker) == ()
unpacker.feed(b"\xa1?\xa1")
- assert tuple(unpacker) == (b"?",)
+ assert tuple(unpacker) == ("?",)
assert tuple(unpacker) == ()
unpacker.feed(b"!")
- assert tuple(unpacker) == (b"!",)
+ assert tuple(unpacker) == ("!",)
assert tuple(unpacker) == ()