From 1c22a813294d98176decb516dcd5834a09f79c32 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 25 Apr 2018 12:10:39 +0300
Subject: Bypass the decode() method in bytes subclasses.

The Python and the C implementations produce different results
due to using the decode() method.
---
 simplejson/decoder.py           |  2 +-
 simplejson/encoder.py           | 34 ++++++++++++++++++++++------------
 simplejson/tests/test_decode.py | 22 +++++++++++++++++++++-
 simplejson/tests/test_dump.py   | 33 ++++++++++++++++++++++++++++++++-
 4 files changed, 76 insertions(+), 15 deletions(-)

(limited to 'simplejson')

diff --git a/simplejson/decoder.py b/simplejson/decoder.py
index 0c3a45c..7f0b056 100644
--- a/simplejson/decoder.py
+++ b/simplejson/decoder.py
@@ -366,7 +366,7 @@ class JSONDecoder(object):
 
         """
         if _PY3 and isinstance(s, bytes):
-            s = s.decode(self.encoding)
+            s = str(s, self.encoding)
         obj, end = self.raw_decode(s)
         end = _w(s, end).end()
         if end != len(s):
diff --git a/simplejson/encoder.py b/simplejson/encoder.py
index d2b6bca..fa45f50 100644
--- a/simplejson/encoder.py
+++ b/simplejson/encoder.py
@@ -5,7 +5,7 @@ import re
 from operator import itemgetter
 # Do not import Decimal directly to avoid reload issues
 import decimal
-from .compat import unichr, binary_type, string_types, integer_types, PY3
+from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3
 def _import_speedups():
     try:
         from . import _speedups
@@ -41,13 +41,18 @@ def encode_basestring(s, _PY3=PY3, _q=u'"'):
     """
     if _PY3:
         if isinstance(s, bytes):
-            s = s.decode('utf-8')
-        if type(s) is not str:
+            s = str(s, 'utf-8')
+        elif type(s) is not str:
+            # convetr an str subclass instance to exact str
+            # raise a TypeError otherwise
             s = str.__str__(s)
     else:
         if isinstance(s, str) and HAS_UTF8.search(s) is not None:
-            s = s.decode('utf-8')
-        if type(s) not in (str, unicode):
+            s = unicode(s, 'utf-8')
+        elif type(s) not in (str, unicode):
+            # convetr an str subclass instance to exact str
+            # convetr a unicode subclass instance to exact unicode
+            # raise a TypeError otherwise
             if isinstance(s, str):
                 s = str.__str__(s)
             else:
@@ -63,13 +68,18 @@ def py_encode_basestring_ascii(s, _PY3=PY3):
     """
     if _PY3:
         if isinstance(s, bytes):
-            s = s.decode('utf-8')
-        if type(s) is not str:
+            s = str(s, 'utf-8')
+        elif type(s) is not str:
+            # convetr an str subclass instance to exact str
+            # raise a TypeError otherwise
             s = str.__str__(s)
     else:
         if isinstance(s, str) and HAS_UTF8.search(s) is not None:
-            s = s.decode('utf-8')
-        if type(s) not in (str, unicode):
+            s = unicode(s, 'utf-8')
+        elif type(s) not in (str, unicode):
+            # convetr an str subclass instance to exact str
+            # convetr a unicode subclass instance to exact unicode
+            # raise a TypeError otherwise
             if isinstance(s, str):
                 s = str.__str__(s)
             else:
@@ -274,7 +284,7 @@ class JSONEncoder(object):
         if isinstance(o, binary_type):
             _encoding = self.encoding
             if (_encoding is not None and not (_encoding == 'utf-8')):
-                o = o.decode(_encoding)
+                o = text_type(o, _encoding)
         if isinstance(o, string_types):
             if self.ensure_ascii:
                 return encode_basestring_ascii(o)
@@ -312,7 +322,7 @@ class JSONEncoder(object):
         if self.encoding != 'utf-8' and self.encoding is not None:
             def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
                 if isinstance(o, binary_type):
-                    o = o.decode(_encoding)
+                    o = text_type(o, _encoding)
                 return _orig_encoder(o)
 
         def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan,
@@ -535,7 +545,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
         if isinstance(key, string_types): # pragma: no cover
             pass
         elif _PY3 and isinstance(key, bytes) and _encoding is not None:
-            key = key.decode(_encoding)
+            key = str(key, _encoding)
         elif isinstance(key, float):
             key = _floatstr(key)
         elif key is True:
diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py
index 30b692a..6960ee5 100644
--- a/simplejson/tests/test_decode.py
+++ b/simplejson/tests/test_decode.py
@@ -3,9 +3,17 @@ import decimal
 from unittest import TestCase
 
 import simplejson as json
-from simplejson.compat import StringIO
+from simplejson.compat import StringIO, b, binary_type
 from simplejson import OrderedDict
 
+class MisbehavingBytesSubtype(binary_type):
+    def decode(self, encoding=None):
+        return "bad decode"
+    def __str__(self):
+        return "bad __str__"
+    def __bytes__(self):
+        return b("bad __bytes__")
+
 class TestDecode(TestCase):
     if not hasattr(TestCase, 'assertIs'):
         def assertIs(self, a, b):
@@ -87,6 +95,18 @@ class TestDecode(TestCase):
             ({'a': {}}, 11),
             cls().raw_decode(" \n{\"a\": {}}"))
 
+    def test_bytes_decode(self):
+        cls = json.decoder.JSONDecoder
+        data = b('"\xe2\x82\xac"')
+        self.assertEqual(cls().decode(data), u'\u20ac')
+        self.assertEqual(cls(encoding='latin1').decode(data), u'\xe2\x82\xac')
+        self.assertEqual(cls(encoding=None).decode(data), u'\u20ac')
+
+        data = MisbehavingBytesSubtype(b('"\xe2\x82\xac"'))
+        self.assertEqual(cls().decode(data), u'\u20ac')
+        self.assertEqual(cls(encoding='latin1').decode(data), u'\xe2\x82\xac')
+        self.assertEqual(cls(encoding=None).decode(data), u'\u20ac')
+
     def test_bounds_checking(self):
         # https://github.com/simplejson/simplejson/issues/98
         j = json.decoder.JSONDecoder()
diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py
index 6b36c20..eff24c2 100644
--- a/simplejson/tests/test_dump.py
+++ b/simplejson/tests/test_dump.py
@@ -1,11 +1,19 @@
 from unittest import TestCase
-from simplejson.compat import StringIO, long_type, b, text_type, PY3
+from simplejson.compat import StringIO, long_type, b, binary_type, text_type, PY3
 import simplejson as json
 
 class MisbehavingTextSubtype(text_type):
     def __str__(self):
         return "FAIL!"
 
+class MisbehavingBytesSubtype(binary_type):
+    def decode(self, encoding=None):
+        return "bad decode"
+    def __str__(self):
+        return "bad __str__"
+    def __bytes__(self):
+        return b("bad __bytes__")
+
 def as_text_type(s):
     if PY3 and isinstance(s, bytes):
         return s.decode('ascii')
@@ -143,6 +151,29 @@ class TestDump(TestCase):
             json.dumps(MisbehavingTextSubtype(text)),
             json.dumps(text)
         )
+        self.assertEqual(
+            json.dumps([MisbehavingTextSubtype(text)]),
+            json.dumps([text])
+        )
+        self.assertEqual(
+            json.dumps({MisbehavingTextSubtype(text): 42}),
+            json.dumps({text: 42})
+        )
+
+    def test_misbehaving_bytes_subtype(self):
+        data = b("this is some data \xe2\x82\xac")
+        self.assertEqual(
+            json.dumps(MisbehavingBytesSubtype(data)),
+            json.dumps(data)
+        )
+        self.assertEqual(
+            json.dumps([MisbehavingBytesSubtype(data)]),
+            json.dumps([data])
+        )
+        self.assertEqual(
+            json.dumps({MisbehavingBytesSubtype(data): 42}),
+            json.dumps({data: 42})
+        )
 
     def test_bytes_toplevel(self):
         self.assertEqual(json.dumps(b('\xe2\x82\xac')), r'"\u20ac"')
-- 
cgit v1.2.1


From 3e5e4420f7de79956c1ec2d909e994ab142c2930 Mon Sep 17 00:00:00 2001
From: Bob Ippolito <bob@redivi.com>
Date: Wed, 25 Apr 2018 09:36:05 -0700
Subject: fix comment typos

---
 simplejson/encoder.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'simplejson')

diff --git a/simplejson/encoder.py b/simplejson/encoder.py
index fa45f50..2cf5e3a 100644
--- a/simplejson/encoder.py
+++ b/simplejson/encoder.py
@@ -43,15 +43,15 @@ def encode_basestring(s, _PY3=PY3, _q=u'"'):
         if isinstance(s, bytes):
             s = str(s, 'utf-8')
         elif type(s) is not str:
-            # convetr an str subclass instance to exact str
+            # convert an str subclass instance to exact str
             # raise a TypeError otherwise
             s = str.__str__(s)
     else:
         if isinstance(s, str) and HAS_UTF8.search(s) is not None:
             s = unicode(s, 'utf-8')
         elif type(s) not in (str, unicode):
-            # convetr an str subclass instance to exact str
-            # convetr a unicode subclass instance to exact unicode
+            # convert an str subclass instance to exact str
+            # convert a unicode subclass instance to exact unicode
             # raise a TypeError otherwise
             if isinstance(s, str):
                 s = str.__str__(s)
-- 
cgit v1.2.1


From 831764b485060d11f903e050904d53ac42337d16 Mon Sep 17 00:00:00 2001
From: Bob Ippolito <bob@redivi.com>
Date: Wed, 25 Apr 2018 09:36:42 -0700
Subject: Fix the other comment typos

---
 simplejson/encoder.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'simplejson')

diff --git a/simplejson/encoder.py b/simplejson/encoder.py
index 2cf5e3a..7ea172e 100644
--- a/simplejson/encoder.py
+++ b/simplejson/encoder.py
@@ -70,15 +70,15 @@ def py_encode_basestring_ascii(s, _PY3=PY3):
         if isinstance(s, bytes):
             s = str(s, 'utf-8')
         elif type(s) is not str:
-            # convetr an str subclass instance to exact str
+            # convert an str subclass instance to exact str
             # raise a TypeError otherwise
             s = str.__str__(s)
     else:
         if isinstance(s, str) and HAS_UTF8.search(s) is not None:
             s = unicode(s, 'utf-8')
         elif type(s) not in (str, unicode):
-            # convetr an str subclass instance to exact str
-            # convetr a unicode subclass instance to exact unicode
+            # convert an str subclass instance to exact str
+            # convert a unicode subclass instance to exact unicode
             # raise a TypeError otherwise
             if isinstance(s, str):
                 s = str.__str__(s)
-- 
cgit v1.2.1