From 1c22a813294d98176decb516dcd5834a09f79c32 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 25 Apr 2018 12:10:39 +0300 Subject: Bypass the decode() method in bytes subclasses. The Python and the C implementations produce different results due to using the decode() method. --- simplejson/decoder.py | 2 +- simplejson/encoder.py | 34 ++++++++++++++++++++++------------ simplejson/tests/test_decode.py | 22 +++++++++++++++++++++- simplejson/tests/test_dump.py | 33 ++++++++++++++++++++++++++++++++- 4 files changed, 76 insertions(+), 15 deletions(-) (limited to 'simplejson') diff --git a/simplejson/decoder.py b/simplejson/decoder.py index 0c3a45c..7f0b056 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -366,7 +366,7 @@ class JSONDecoder(object): """ if _PY3 and isinstance(s, bytes): - s = s.decode(self.encoding) + s = str(s, self.encoding) obj, end = self.raw_decode(s) end = _w(s, end).end() if end != len(s): diff --git a/simplejson/encoder.py b/simplejson/encoder.py index d2b6bca..fa45f50 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -5,7 +5,7 @@ import re from operator import itemgetter # Do not import Decimal directly to avoid reload issues import decimal -from .compat import unichr, binary_type, string_types, integer_types, PY3 +from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3 def _import_speedups(): try: from . import _speedups @@ -41,13 +41,18 @@ def encode_basestring(s, _PY3=PY3, _q=u'"'): """ if _PY3: if isinstance(s, bytes): - s = s.decode('utf-8') - if type(s) is not str: + s = str(s, 'utf-8') + elif type(s) is not str: + # convetr an str subclass instance to exact str + # raise a TypeError otherwise s = str.__str__(s) else: if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') - if type(s) not in (str, unicode): + s = unicode(s, 'utf-8') + elif type(s) not in (str, unicode): + # convetr an str subclass instance to exact str + # convetr a unicode subclass instance to exact unicode + # raise a TypeError otherwise if isinstance(s, str): s = str.__str__(s) else: @@ -63,13 +68,18 @@ def py_encode_basestring_ascii(s, _PY3=PY3): """ if _PY3: if isinstance(s, bytes): - s = s.decode('utf-8') - if type(s) is not str: + s = str(s, 'utf-8') + elif type(s) is not str: + # convetr an str subclass instance to exact str + # raise a TypeError otherwise s = str.__str__(s) else: if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') - if type(s) not in (str, unicode): + s = unicode(s, 'utf-8') + elif type(s) not in (str, unicode): + # convetr an str subclass instance to exact str + # convetr a unicode subclass instance to exact unicode + # raise a TypeError otherwise if isinstance(s, str): s = str.__str__(s) else: @@ -274,7 +284,7 @@ class JSONEncoder(object): if isinstance(o, binary_type): _encoding = self.encoding if (_encoding is not None and not (_encoding == 'utf-8')): - o = o.decode(_encoding) + o = text_type(o, _encoding) if isinstance(o, string_types): if self.ensure_ascii: return encode_basestring_ascii(o) @@ -312,7 +322,7 @@ class JSONEncoder(object): if self.encoding != 'utf-8' and self.encoding is not None: def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): if isinstance(o, binary_type): - o = o.decode(_encoding) + o = text_type(o, _encoding) return _orig_encoder(o) def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan, @@ -535,7 +545,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, if isinstance(key, string_types): # pragma: no cover pass elif _PY3 and isinstance(key, bytes) and _encoding is not None: - key = key.decode(_encoding) + key = str(key, _encoding) elif isinstance(key, float): key = _floatstr(key) elif key is True: diff --git a/simplejson/tests/test_decode.py b/simplejson/tests/test_decode.py index 30b692a..6960ee5 100644 --- a/simplejson/tests/test_decode.py +++ b/simplejson/tests/test_decode.py @@ -3,9 +3,17 @@ import decimal from unittest import TestCase import simplejson as json -from simplejson.compat import StringIO +from simplejson.compat import StringIO, b, binary_type from simplejson import OrderedDict +class MisbehavingBytesSubtype(binary_type): + def decode(self, encoding=None): + return "bad decode" + def __str__(self): + return "bad __str__" + def __bytes__(self): + return b("bad __bytes__") + class TestDecode(TestCase): if not hasattr(TestCase, 'assertIs'): def assertIs(self, a, b): @@ -87,6 +95,18 @@ class TestDecode(TestCase): ({'a': {}}, 11), cls().raw_decode(" \n{\"a\": {}}")) + def test_bytes_decode(self): + cls = json.decoder.JSONDecoder + data = b('"\xe2\x82\xac"') + self.assertEqual(cls().decode(data), u'\u20ac') + self.assertEqual(cls(encoding='latin1').decode(data), u'\xe2\x82\xac') + self.assertEqual(cls(encoding=None).decode(data), u'\u20ac') + + data = MisbehavingBytesSubtype(b('"\xe2\x82\xac"')) + self.assertEqual(cls().decode(data), u'\u20ac') + self.assertEqual(cls(encoding='latin1').decode(data), u'\xe2\x82\xac') + self.assertEqual(cls(encoding=None).decode(data), u'\u20ac') + def test_bounds_checking(self): # https://github.com/simplejson/simplejson/issues/98 j = json.decoder.JSONDecoder() diff --git a/simplejson/tests/test_dump.py b/simplejson/tests/test_dump.py index 6b36c20..eff24c2 100644 --- a/simplejson/tests/test_dump.py +++ b/simplejson/tests/test_dump.py @@ -1,11 +1,19 @@ from unittest import TestCase -from simplejson.compat import StringIO, long_type, b, text_type, PY3 +from simplejson.compat import StringIO, long_type, b, binary_type, text_type, PY3 import simplejson as json class MisbehavingTextSubtype(text_type): def __str__(self): return "FAIL!" +class MisbehavingBytesSubtype(binary_type): + def decode(self, encoding=None): + return "bad decode" + def __str__(self): + return "bad __str__" + def __bytes__(self): + return b("bad __bytes__") + def as_text_type(s): if PY3 and isinstance(s, bytes): return s.decode('ascii') @@ -143,6 +151,29 @@ class TestDump(TestCase): json.dumps(MisbehavingTextSubtype(text)), json.dumps(text) ) + self.assertEqual( + json.dumps([MisbehavingTextSubtype(text)]), + json.dumps([text]) + ) + self.assertEqual( + json.dumps({MisbehavingTextSubtype(text): 42}), + json.dumps({text: 42}) + ) + + def test_misbehaving_bytes_subtype(self): + data = b("this is some data \xe2\x82\xac") + self.assertEqual( + json.dumps(MisbehavingBytesSubtype(data)), + json.dumps(data) + ) + self.assertEqual( + json.dumps([MisbehavingBytesSubtype(data)]), + json.dumps([data]) + ) + self.assertEqual( + json.dumps({MisbehavingBytesSubtype(data): 42}), + json.dumps({data: 42}) + ) def test_bytes_toplevel(self): self.assertEqual(json.dumps(b('\xe2\x82\xac')), r'"\u20ac"') -- cgit v1.2.1 From 3e5e4420f7de79956c1ec2d909e994ab142c2930 Mon Sep 17 00:00:00 2001 From: Bob Ippolito Date: Wed, 25 Apr 2018 09:36:05 -0700 Subject: fix comment typos --- simplejson/encoder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'simplejson') diff --git a/simplejson/encoder.py b/simplejson/encoder.py index fa45f50..2cf5e3a 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -43,15 +43,15 @@ def encode_basestring(s, _PY3=PY3, _q=u'"'): if isinstance(s, bytes): s = str(s, 'utf-8') elif type(s) is not str: - # convetr an str subclass instance to exact str + # convert an str subclass instance to exact str # raise a TypeError otherwise s = str.__str__(s) else: if isinstance(s, str) and HAS_UTF8.search(s) is not None: s = unicode(s, 'utf-8') elif type(s) not in (str, unicode): - # convetr an str subclass instance to exact str - # convetr a unicode subclass instance to exact unicode + # convert an str subclass instance to exact str + # convert a unicode subclass instance to exact unicode # raise a TypeError otherwise if isinstance(s, str): s = str.__str__(s) -- cgit v1.2.1 From 831764b485060d11f903e050904d53ac42337d16 Mon Sep 17 00:00:00 2001 From: Bob Ippolito Date: Wed, 25 Apr 2018 09:36:42 -0700 Subject: Fix the other comment typos --- simplejson/encoder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'simplejson') diff --git a/simplejson/encoder.py b/simplejson/encoder.py index 2cf5e3a..7ea172e 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -70,15 +70,15 @@ def py_encode_basestring_ascii(s, _PY3=PY3): if isinstance(s, bytes): s = str(s, 'utf-8') elif type(s) is not str: - # convetr an str subclass instance to exact str + # convert an str subclass instance to exact str # raise a TypeError otherwise s = str.__str__(s) else: if isinstance(s, str) and HAS_UTF8.search(s) is not None: s = unicode(s, 'utf-8') elif type(s) not in (str, unicode): - # convetr an str subclass instance to exact str - # convetr a unicode subclass instance to exact unicode + # convert an str subclass instance to exact str + # convert a unicode subclass instance to exact unicode # raise a TypeError otherwise if isinstance(s, str): s = str.__str__(s) -- cgit v1.2.1