diff options
author | Richard van der Hoff <richard@matrix.org> | 2018-03-29 00:17:55 +0100 |
---|---|---|
committer | Richard van der Hoff <richard@matrix.org> | 2018-03-29 00:17:55 +0100 |
commit | 19012377f349da419f80e91ecd41d5f09f90d32b (patch) | |
tree | faa09ee0557ef874d70e96d83d1de68976765e8e | |
parent | 0406430cbcb0e31d27bd064db57837b94ed05294 (diff) | |
download | simplejson-19012377f349da419f80e91ecd41d5f09f90d32b.tar.gz |
Avoid escaping U+2028 and U+2029 without ensure_ascii
There is no need to escape U+2028 and U+2029 when ensure_ascii is false, and
doing so makes us inconsistent with the standard JSON library.
-rw-r--r-- | simplejson/encoder.py | 9 | ||||
-rw-r--r-- | simplejson/tests/test_encode_for_html.py | 8 | ||||
-rw-r--r-- | simplejson/tests/test_unicode.py | 5 |
3 files changed, 18 insertions, 4 deletions
diff --git a/simplejson/encoder.py b/simplejson/encoder.py index ae76ae3..b2b8775 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -17,10 +17,10 @@ c_encode_basestring_ascii, c_make_encoder = _import_speedups() from .decoder import PosInf from .raw_json import RawJSON -#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') +#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t]') # This is required because u() will mangle the string and ur'' isn't valid # python3 syntax -ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]') +ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -399,6 +399,11 @@ class JSONEncoderForHTML(JSONEncoder): chunk = chunk.replace('&', '\\u0026') chunk = chunk.replace('<', '\\u003c') chunk = chunk.replace('>', '\\u003e') + + if not self.ensure_ascii: + chunk = chunk.replace(u'\u2028', '\\u2028') + chunk = chunk.replace(u'\u2029', '\\u2029') + yield chunk diff --git a/simplejson/tests/test_encode_for_html.py b/simplejson/tests/test_encode_for_html.py index f995254..3a840aa 100644 --- a/simplejson/tests/test_encode_for_html.py +++ b/simplejson/tests/test_encode_for_html.py @@ -7,11 +7,19 @@ class TestEncodeForHTML(unittest.TestCase): def setUp(self): self.decoder = json.JSONDecoder() self.encoder = json.JSONEncoderForHTML() + self.non_ascii_encoder = json.JSONEncoderForHTML(ensure_ascii=False) def test_basic_encode(self): self.assertEqual(r'"\u0026"', self.encoder.encode('&')) self.assertEqual(r'"\u003c"', self.encoder.encode('<')) self.assertEqual(r'"\u003e"', self.encoder.encode('>')) + self.assertEqual(r'"\u2028"', self.encoder.encode(u'\u2028')) + + def test_non_ascii_basic_encode(self): + self.assertEqual(r'"\u0026"', self.non_ascii_encoder.encode('&')) + self.assertEqual(r'"\u003c"', self.non_ascii_encoder.encode('<')) + self.assertEqual(r'"\u003e"', self.non_ascii_encoder.encode('>')) + self.assertEqual(r'"\u2028"', self.non_ascii_encoder.encode(u'\u2028')) def test_basic_roundtrip(self): for char in '&<>': diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index 3b37f65..1c7e95e 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -106,10 +106,11 @@ class TestUnicode(TestCase): s1 = u'\u2029\u2028' s2 = s1.encode('utf8') expect = '"\\u2029\\u2028"' + expect_non_ascii = u'"\u2029\u2028"' self.assertEqual(json.dumps(s1), expect) self.assertEqual(json.dumps(s2), expect) - self.assertEqual(json.dumps(s1, ensure_ascii=False), expect) - self.assertEqual(json.dumps(s2, ensure_ascii=False), expect) + self.assertEqual(json.dumps(s1, ensure_ascii=False), expect_non_ascii) + self.assertEqual(json.dumps(s2, ensure_ascii=False), expect_non_ascii) def test_invalid_escape_sequences(self): # incomplete escape sequence |