Use new msgpack spec by default. (#386)

author: Inada Naoki <songofacandy@gmail.com> 2019-12-05 21:34:10 +0900
committer: GitHub <noreply@github.com> 2019-12-05 21:34:10 +0900
commit: 7e9905bdfaecde83ddb1a4575e734a10b055fde9 (patch)
tree: 2b9900fcdf0b5b1986b024edb2802971399b8fb8
parent: de320488ae494b85a03b60dd33f91b650033d775 (diff)
download: msgpack-python-7e9905bdfaecde83ddb1a4575e734a10b055fde9.tar.gz
11 files changed, 75 insertions, 126 deletions
diff --git a/README.rst b/README.rst
index 82b6c02..f9f074f 100644
--- a/README.rst
+++ b/README.rst
@@ -37,36 +37,16 @@ Sadly, this doesn't work for upgrade install.  After `pip install -U msgpack-pyt
 msgpack is removed and `import msgpack` fail.
 
 
-Deprecating encoding option
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Compatibility with old format
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-encoding and unicode_errors options are deprecated.
+You can use ``use_bin_type=False`` option to pack ``bytes``
+object into raw type in old msgpack spec, instead of bin type in new msgpack spec.
 
-In case of packer, use UTF-8 always.  Storing other than UTF-8 is not recommended.
+You can unpack old msgpack formatk using ``raw=True`` option.
+It unpacks str (raw) type in msgpack into Python bytes.
 
-For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes``
-object into msgpack raw type.
-
-In case of unpacker, there is new ``raw`` option.  It is ``True`` by default
-for backward compatibility, but it is changed to ``False`` in near future.
-You can use ``raw=False`` instead of ``encoding='utf-8'``.
-
-Planned backward incompatible changes
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-When msgpack 1.0, I planning these breaking changes:
-
-* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option.
-* packer: Change default of ``use_bin_type`` option from False to True.
-* unpacker: Change default of ``raw`` option from True to False.
-* unpacker: Reduce all ``max_xxx_len`` options for typical usage.
-* unpacker: Remove ``write_bytes`` option from all methods.
-
-To avoid these breaking changes breaks your application, please:
-
-* Don't use deprecated options.
-* Pass ``use_bin_type`` and ``raw`` options explicitly.
-* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too.
+See note in below for detail.
 
 
 Install
@@ -76,6 +56,7 @@ Install
 
    $ pip install msgpack
 
+
 Pure Python implementation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -100,6 +81,13 @@ Without extension, using pure Python implementation on CPython runs slowly.
 How to use
 ----------
 
+.. note::
+
+   In examples below, I use ``raw=False`` and ``use_bin_type=True`` for users
+   using msgpack < 1.0.
+   These options are default from msgpack 1.0 so you can omit them.
+
+
 One-shot pack & unpack
 ^^^^^^^^^^^^^^^^^^^^^^
 
@@ -252,36 +240,18 @@ Notes
 string and binary type
 ^^^^^^^^^^^^^^^^^^^^^^
 
-Early versions of msgpack didn't distinguish string and binary types (like Python 1).
+Early versions of msgpack didn't distinguish string and binary types.
 The type for representing both string and binary types was named **raw**.
 
-For backward compatibility reasons, msgpack-python will still default all
-strings to byte strings, unless you specify the ``use_bin_type=True`` option in
-the packer. If you do so, it will use a non-standard type called **bin** to
-serialize byte arrays, and **raw** becomes to mean **str**. If you want to
-distinguish **bin** and **raw** in the unpacker, specify ``raw=False``.
-
-Note that Python 2 defaults to byte-arrays over Unicode strings:
-
-.. code-block:: pycon
-
-    >>> import msgpack
-    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
-    ['spam', 'eggs']
-    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
-                        raw=False)
-    ['spam', u'eggs']
-
-This is the same code in Python 3 (same behaviour, but Python 3 has a
-different default):
+You can pack into and unpack from this old spec using ``use_bin_type=False``
+and ``raw=True`` options.
 
 .. code-block:: pycon
 
     >>> import msgpack
-    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
+    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True)
     [b'spam', b'eggs']
-    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
-                        raw=False)
+    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False)
     [b'spam', 'eggs']
 
 
diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx
index f3bde3f..8cf3c05 100644
--- a/msgpack/_packer.pyx
+++ b/msgpack/_packer.pyx
@@ -80,9 +80,7 @@ cdef class Packer(object):
 
     :param bool use_bin_type:
         Use bin type introduced in msgpack spec 2.0 for bytes.
-        It also enables str8 type for unicode.
-        Current default value is false, but it will be changed to true
-        in future version.  You should specify it explicitly.
+        It also enables str8 type for unicode. (default: True)
 
     :param bool strict_types:
         If set to true, types will be checked to be exact. Derived classes
@@ -113,7 +111,7 @@ cdef class Packer(object):
         self.pk.length = 0
 
     def __init__(self, *, default=None, unicode_errors=None,
-                 bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
+                 bint use_single_float=False, bint autoreset=True, bint use_bin_type=True,
                  bint strict_types=False):
         self.use_float = use_single_float
         self.strict_types = strict_types
diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx
index 3a9d494..f10e99d 100644
--- a/msgpack/_unpacker.pyx
+++ b/msgpack/_unpacker.pyx
@@ -131,7 +131,7 @@ cdef inline int get_data_from_buffer(object obj,
 
 
 def unpackb(object packed, *, object object_hook=None, object list_hook=None,
-            bint use_list=True, bint raw=True, bint strict_map_key=False,
+            bint use_list=True, bint raw=False, bint strict_map_key=False,
             unicode_errors=None,
             object_pairs_hook=None, ext_hook=ExtType,
             Py_ssize_t max_str_len=-1,
@@ -217,12 +217,8 @@ cdef class Unpacker(object):
         Otherwise, unpack to Python tuple. (default: True)
 
     :param bool raw:
-        If true, unpack msgpack raw to Python bytes (default).
-        Otherwise, unpack to Python str (or unicode on Python 2) by decoding
-        with UTF-8 encoding (recommended).
-        Currently, the default is true, but it will be changed to false in
-        near future.  So you must specify it explicitly for keeping backward
-        compatibility.
+        If true, unpack msgpack raw to Python bytes.
+        Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
 
     :param bool strict_map_key:
         If true, only str or bytes are accepted for map (dict) keys.
@@ -268,13 +264,13 @@ cdef class Unpacker(object):
 
     Example of streaming deserialize from file-like object::
 
-        unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024)
+        unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024)
         for o in unpacker:
             process(o)
 
     Example of streaming deserialize from socket::
 
-        unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024)
+        unpacker = Unpacker(max_buffer_size=10*1024*1024)
         while True:
             buf = sock.recv(1024**2)
             if not buf:
@@ -309,7 +305,7 @@ cdef class Unpacker(object):
         self.buf = NULL
 
     def __init__(self, file_like=None, *, Py_ssize_t read_size=0,
-                 bint use_list=True, bint raw=True, bint strict_map_key=False,
+                 bint use_list=True, bint raw=False, bint strict_map_key=False,
                  object object_hook=None, object object_pairs_hook=None, object list_hook=None,
                  unicode_errors=None, Py_ssize_t max_buffer_size=0,
                  object ext_hook=ExtType,
diff --git a/msgpack/fallback.py b/msgpack/fallback.py
index 9de3553..fa2f3a8 100644
--- a/msgpack/fallback.py
+++ b/msgpack/fallback.py
@@ -158,7 +158,7 @@ else:
 class Unpacker(object):
     """Streaming unpacker.
 
-    arguments:
+    Arguments:
 
     :param file_like:
         File-like object having `.read(n)` method.
@@ -172,12 +172,8 @@ class Unpacker(object):
         Otherwise, unpack to Python tuple. (default: True)
 
     :param bool raw:
-        If true, unpack msgpack raw to Python bytes (default).
-        Otherwise, unpack to Python str (or unicode on Python 2) by decoding
-        with UTF-8 encoding (recommended).
-        Currently, the default is true, but it will be changed to false in
-        near future.  So you must specify it explicitly for keeping backward
-        compatibility.
+        If true, unpack msgpack raw to Python bytes.
+        Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
 
     :param bool strict_map_key:
         If true, only str or bytes are accepted for map (dict) keys.
@@ -226,13 +222,13 @@ class Unpacker(object):
 
     Example of streaming deserialize from file-like object::
 
-        unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024)
+        unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024)
         for o in unpacker:
             process(o)
 
     Example of streaming deserialize from socket::
 
-        unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024)
+        unpacker = Unpacker(max_buffer_size=10*1024*1024)
         while True:
             buf = sock.recv(1024**2)
             if not buf:
@@ -253,7 +249,7 @@ class Unpacker(object):
         file_like=None,
         read_size=0,
         use_list=True,
-        raw=True,
+        raw=False,
         strict_map_key=False,
         object_hook=None,
         object_pairs_hook=None,
@@ -748,7 +744,7 @@ class Packer(object):
 
     :param bool use_bin_type:
         Use bin type introduced in msgpack spec 2.0 for bytes.
-        It also enables str8 type for unicode.
+        It also enables str8 type for unicode. (default: True)
 
     :param bool strict_types:
         If set to true, types will be checked to be exact. Derived classes
@@ -769,7 +765,7 @@ class Packer(object):
         unicode_errors=None,
         use_single_float=False,
         autoreset=True,
-        use_bin_type=False,
+        use_bin_type=True,
         strict_types=False,
     ):
         self._strict_types = strict_types
diff --git a/test/test_buffer.py b/test/test_buffer.py
index da68b27..62507cf 100644
--- a/test/test_buffer.py
+++ b/test/test_buffer.py
@@ -17,7 +17,7 @@ def test_unpack_buffer():
 
 
 def test_unpack_bytearray():
-    buf = bytearray(packb(("foo", "bar")))
+    buf = bytearray(packb((b"foo", b"bar")))
     obj = unpackb(buf, use_list=1)
     assert [b"foo", b"bar"] == obj
     expected_type = bytes
@@ -25,7 +25,7 @@ def test_unpack_bytearray():
 
 
 def test_unpack_memoryview():
-    buf = bytearray(packb(("foo", "bar")))
+    buf = bytearray(packb((b"foo", b"bar")))
     view = memoryview(buf)
     obj = unpackb(view, use_list=1)
     assert [b"foo", b"bar"] == obj
diff --git a/test/test_case.py b/test/test_case.py
index 3bc1b26..3e60e59 100644
--- a/test/test_case.py
+++ b/test/test_case.py
@@ -1,13 +1,12 @@
 #!/usr/bin/env python
 # coding: utf-8
-
 from msgpack import packb, unpackb
 
 
-def check(length, obj):
-    v = packb(obj)
+def check(length, obj, use_bin_type=True):
+    v = packb(obj, use_bin_type=use_bin_type)
     assert len(v) == length, "%r length should be %r but get %r" % (obj, length, len(v))
-    assert unpackb(v, use_list=0) == obj
+    assert unpackb(v, use_list=0, raw=not use_bin_type) == obj
 
 
 def test_1():
@@ -56,7 +55,7 @@ def test_9():
 
 
 def check_raw(overhead, num):
-    check(num + overhead, b" " * num)
+    check(num + overhead, b" " * num, use_bin_type=False)
 
 
 def test_fixraw():
@@ -135,4 +134,4 @@ def test_match():
 
 
 def test_unicode():
-    assert unpackb(packb("foobar"), use_list=1) == b"foobar"
+    assert unpackb(packb(u"foobar"), use_list=1) == u"foobar"
diff --git a/test/test_format.py b/test/test_format.py
index c2cdfbd..8c2f03f 100644
--- a/test/test_format.py
+++ b/test/test_format.py
@@ -4,8 +4,8 @@
 from msgpack import unpackb
 
 
-def check(src, should, use_list=0):
-    assert unpackb(src, use_list=use_list) == should
+def check(src, should, use_list=0, raw=True):
+    assert unpackb(src, use_list=use_list, raw=raw) == should
 
 
 def testSimpleValue():
@@ -59,6 +59,12 @@ def testRaw():
         b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
         (b"", b"a", b"ab", b"", b"a", b"ab"),
     )
+    check(
+        b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00"
+        b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
+        ("", "a", "ab", "", "a", "ab"),
+        raw=False,
+    )
 
 
 def testArray():
diff --git a/test/test_memoryview.py b/test/test_memoryview.py
index e1b63b8..86b2c1f 100644
--- a/test/test_memoryview.py
+++ b/test/test_memoryview.py
@@ -1,50 +1,33 @@
 #!/usr/bin/env python
 # coding: utf-8
 
+import pytest
 from array import array
 from msgpack import packb, unpackb
 import sys
 
 
-# For Python < 3:
-#  - array type only supports old buffer interface
-#  - array.frombytes is not available, must use deprecated array.fromstring
-if sys.version_info[0] < 3:
+pytestmark = pytest.mark.skipif(
+    sys.version_info[0] < 3, reason="Only Python 3 supports buffer protocol"
+)
 
-    def make_memoryview(obj):
-        return memoryview(buffer(obj))
 
-    def make_array(f, data):
-        a = array(f)
-        a.fromstring(data)
-        return a
-
-    def get_data(a):
-        return a.tostring()
-
-
-else:
-    make_memoryview = memoryview
-
-    def make_array(f, data):
-        a = array(f)
-        a.frombytes(data)
-        return a
-
-    def get_data(a):
-        return a.tobytes()
+def make_array(f, data):
+    a = array(f)
+    a.frombytes(data)
+    return a
 
 
 def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type):
     # create a new array
     original_array = array(format)
     original_array.fromlist([255] * (nbytes // original_array.itemsize))
-    original_data = get_data(original_array)
-    view = make_memoryview(original_array)
+    original_data = original_array.tobytes()
+    view = memoryview(original_array)
 
     # pack, unpack, and reconstruct array
     packed = packb(view, use_bin_type=use_bin_type)
-    unpacked = unpackb(packed)
+    unpacked = unpackb(packed, raw=(not use_bin_type))
     reconstructed_array = make_array(format, unpacked)
 
     # check that we got the right amount of data
diff --git a/test/test_newspec.py b/test/test_newspec.py
index f4f2a23..b7da486 100644
--- a/test/test_newspec.py
+++ b/test/test_newspec.py
@@ -10,14 +10,16 @@ def test_str8():
     assert len(b) == len(data) + 2
     assert b[0:2] == header + b"\x20"
     assert b[2:] == data
-    assert unpackb(b) == data
+    assert unpackb(b, raw=True) == data
+    assert unpackb(b, raw=False) == data.decode()
 
     data = b"x" * 255
     b = packb(data.decode(), use_bin_type=True)
     assert len(b) == len(data) + 2
     assert b[0:2] == header + b"\xff"
     assert b[2:] == data
-    assert unpackb(b) == data
+    assert unpackb(b, raw=True) == data
+    assert unpackb(b, raw=False) == data.decode()
 
 
 def test_bin8():
diff --git a/test/test_read_size.py b/test/test_read_size.py
index 8d8df64..33a7e7d 100644
--- a/test/test_read_size.py
+++ b/test/test_read_size.py
@@ -8,9 +8,9 @@ def test_read_array_header():
     unpacker = Unpacker()
     unpacker.feed(packb(["a", "b", "c"]))
     assert unpacker.read_array_header() == 3
-    assert unpacker.unpack() == b"a"
-    assert unpacker.unpack() == b"b"
-    assert unpacker.unpack() == b"c"
+    assert unpacker.unpack() == "a"
+    assert unpacker.unpack() == "b"
+    assert unpacker.unpack() == "c"
     try:
         unpacker.unpack()
         assert 0, "should raise exception"
@@ -22,8 +22,8 @@ def test_read_map_header():
     unpacker = Unpacker()
     unpacker.feed(packb({"a": "A"}))
     assert unpacker.read_map_header() == 1
-    assert unpacker.unpack() == b"a"
-    assert unpacker.unpack() == b"A"
+    assert unpacker.unpack() == "a"
+    assert unpacker.unpack() == "A"
     try:
         unpacker.unpack()
         assert 0, "should raise exception"
diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py
index e576571..9b69479 100644
--- a/test/test_sequnpack.py
+++ b/test/test_sequnpack.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 # coding: utf-8
-
 import io
 from msgpack import Unpacker, BufferFull
 from msgpack import pack
@@ -26,7 +25,7 @@ def test_partialdata():
     with raises(StopIteration):
         next(iter(unpacker))
     unpacker.feed(b"o")
-    assert next(iter(unpacker)) == b"hallo"
+    assert next(iter(unpacker)) == "hallo"
 
 
 def test_foobar():
@@ -98,13 +97,13 @@ def test_readbytes():
 def test_issue124():
     unpacker = Unpacker()
     unpacker.feed(b"\xa1?\xa1!")
-    assert tuple(unpacker) == (b"?", b"!")
+    assert tuple(unpacker) == ("?", "!")
     assert tuple(unpacker) == ()
     unpacker.feed(b"\xa1?\xa1")
-    assert tuple(unpacker) == (b"?",)
+    assert tuple(unpacker) == ("?",)
     assert tuple(unpacker) == ()
     unpacker.feed(b"!")
-    assert tuple(unpacker) == (b"!",)
+    assert tuple(unpacker) == ("!",)
     assert tuple(unpacker) == ()
author	Inada Naoki <songofacandy@gmail.com>	2019-12-05 21:34:10 +0900
committer	GitHub <noreply@github.com>	2019-12-05 21:34:10 +0900
commit	7e9905bdfaecde83ddb1a4575e734a10b055fde9 (patch)
tree	2b9900fcdf0b5b1986b024edb2802971399b8fb8
parent	de320488ae494b85a03b60dd33f91b650033d775 (diff)
download	msgpack-python-7e9905bdfaecde83ddb1a4575e734a10b055fde9.tar.gz