summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard van der Hoff <richard@matrix.org>2018-03-29 00:17:55 +0100
committerRichard van der Hoff <richard@matrix.org>2018-03-29 00:17:55 +0100
commit19012377f349da419f80e91ecd41d5f09f90d32b (patch)
treefaa09ee0557ef874d70e96d83d1de68976765e8e
parent0406430cbcb0e31d27bd064db57837b94ed05294 (diff)
downloadsimplejson-19012377f349da419f80e91ecd41d5f09f90d32b.tar.gz
Avoid escaping U+2028 and U+2029 without ensure_ascii
There is no need to escape U+2028 and U+2029 when ensure_ascii is false, and doing so makes us inconsistent with the standard JSON library.
-rw-r--r--simplejson/encoder.py9
-rw-r--r--simplejson/tests/test_encode_for_html.py8
-rw-r--r--simplejson/tests/test_unicode.py5
3 files changed, 18 insertions, 4 deletions
diff --git a/simplejson/encoder.py b/simplejson/encoder.py
index ae76ae3..b2b8775 100644
--- a/simplejson/encoder.py
+++ b/simplejson/encoder.py
@@ -17,10 +17,10 @@ c_encode_basestring_ascii, c_make_encoder = _import_speedups()
from .decoder import PosInf
from .raw_json import RawJSON
-#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
+#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t]')
# This is required because u() will mangle the string and ur'' isn't valid
# python3 syntax
-ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]')
+ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
HAS_UTF8 = re.compile(r'[\x80-\xff]')
ESCAPE_DCT = {
@@ -399,6 +399,11 @@ class JSONEncoderForHTML(JSONEncoder):
chunk = chunk.replace('&', '\\u0026')
chunk = chunk.replace('<', '\\u003c')
chunk = chunk.replace('>', '\\u003e')
+
+ if not self.ensure_ascii:
+ chunk = chunk.replace(u'\u2028', '\\u2028')
+ chunk = chunk.replace(u'\u2029', '\\u2029')
+
yield chunk
diff --git a/simplejson/tests/test_encode_for_html.py b/simplejson/tests/test_encode_for_html.py
index f995254..3a840aa 100644
--- a/simplejson/tests/test_encode_for_html.py
+++ b/simplejson/tests/test_encode_for_html.py
@@ -7,11 +7,19 @@ class TestEncodeForHTML(unittest.TestCase):
def setUp(self):
self.decoder = json.JSONDecoder()
self.encoder = json.JSONEncoderForHTML()
+ self.non_ascii_encoder = json.JSONEncoderForHTML(ensure_ascii=False)
def test_basic_encode(self):
self.assertEqual(r'"\u0026"', self.encoder.encode('&'))
self.assertEqual(r'"\u003c"', self.encoder.encode('<'))
self.assertEqual(r'"\u003e"', self.encoder.encode('>'))
+ self.assertEqual(r'"\u2028"', self.encoder.encode(u'\u2028'))
+
+ def test_non_ascii_basic_encode(self):
+ self.assertEqual(r'"\u0026"', self.non_ascii_encoder.encode('&'))
+ self.assertEqual(r'"\u003c"', self.non_ascii_encoder.encode('<'))
+ self.assertEqual(r'"\u003e"', self.non_ascii_encoder.encode('>'))
+ self.assertEqual(r'"\u2028"', self.non_ascii_encoder.encode(u'\u2028'))
def test_basic_roundtrip(self):
for char in '&<>':
diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py
index 3b37f65..1c7e95e 100644
--- a/simplejson/tests/test_unicode.py
+++ b/simplejson/tests/test_unicode.py
@@ -106,10 +106,11 @@ class TestUnicode(TestCase):
s1 = u'\u2029\u2028'
s2 = s1.encode('utf8')
expect = '"\\u2029\\u2028"'
+ expect_non_ascii = u'"\u2029\u2028"'
self.assertEqual(json.dumps(s1), expect)
self.assertEqual(json.dumps(s2), expect)
- self.assertEqual(json.dumps(s1, ensure_ascii=False), expect)
- self.assertEqual(json.dumps(s2, ensure_ascii=False), expect)
+ self.assertEqual(json.dumps(s1, ensure_ascii=False), expect_non_ascii)
+ self.assertEqual(json.dumps(s2, ensure_ascii=False), expect_non_ascii)
def test_invalid_escape_sequences(self):
# incomplete escape sequence