diff options
author | Bob Ippolito <bob@redivi.com> | 2018-04-02 10:40:47 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-04-02 10:40:47 -0700 |
commit | ca2f85ed1f6a7d7679af942faa695f95c30ccbda (patch) | |
tree | d887b379d3ba115d7b7c9cad48f134d17e4a6d4f | |
parent | 0406430cbcb0e31d27bd064db57837b94ed05294 (diff) | |
parent | 3309d122f922540102da1463744e1ad54c3ccb18 (diff) | |
download | simplejson-ca2f85ed1f6a7d7679af942faa695f95c30ccbda.tar.gz |
Merge pull request #211 from richvdh/rav/fix_linebreak_encoding
Avoid escaping U+2028 and U+2029 without ensure_ascii
-rw-r--r-- | index.rst | 5 | ||||
-rw-r--r-- | simplejson/encoder.py | 17 | ||||
-rw-r--r-- | simplejson/tests/test_encode_for_html.py | 8 | ||||
-rw-r--r-- | simplejson/tests/test_unicode.py | 5 |
4 files changed, 27 insertions, 8 deletions
@@ -767,6 +767,11 @@ Encoders and decoders Subclass of :class:`JSONEncoder` that escapes &, <, and > for embedding in HTML. + It also escapes the characters U+2028 (LINE SEPARATOR) and + U+2029 (PARAGRAPH SEPARATOR), irrespective of the *ensure_ascii* setting, + as these characters are not valid in JavaScript strings (see + http://timelessrepo.com/json-isnt-a-javascript-subset). + .. versionchanged:: 2.1.0 New in 2.1.0 diff --git a/simplejson/encoder.py b/simplejson/encoder.py index ae76ae3..ec73ce3 100644 --- a/simplejson/encoder.py +++ b/simplejson/encoder.py @@ -17,10 +17,7 @@ c_encode_basestring_ascii, c_make_encoder = _import_speedups() from .decoder import PosInf from .raw_json import RawJSON -#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') -# This is required because u() will mangle the string and ur'' isn't valid -# python3 syntax -ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]') +ESCAPE = re.compile(r'[\x00-\x1f\\"]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -35,8 +32,6 @@ ESCAPE_DCT = { for i in range(0x20): #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) -for i in [0x2028, 0x2029]: - ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,)) FLOAT_REPR = repr @@ -382,6 +377,11 @@ class JSONEncoderForHTML(JSONEncoder): characters &, < and > should be escaped. They cannot be escaped with the usual entities (e.g. &) because they are not expanded within <script> tags. + + This class also escapes the line separator and paragraph separator + characters U+2028 and U+2029, irrespective of the ensure_ascii setting, + as these characters are not valid in JavaScript strings (see + http://timelessrepo.com/json-isnt-a-javascript-subset). """ def encode(self, o): @@ -399,6 +399,11 @@ class JSONEncoderForHTML(JSONEncoder): chunk = chunk.replace('&', '\\u0026') chunk = chunk.replace('<', '\\u003c') chunk = chunk.replace('>', '\\u003e') + + if not self.ensure_ascii: + chunk = chunk.replace(u'\u2028', '\\u2028') + chunk = chunk.replace(u'\u2029', '\\u2029') + yield chunk diff --git a/simplejson/tests/test_encode_for_html.py b/simplejson/tests/test_encode_for_html.py index f995254..3a840aa 100644 --- a/simplejson/tests/test_encode_for_html.py +++ b/simplejson/tests/test_encode_for_html.py @@ -7,11 +7,19 @@ class TestEncodeForHTML(unittest.TestCase): def setUp(self): self.decoder = json.JSONDecoder() self.encoder = json.JSONEncoderForHTML() + self.non_ascii_encoder = json.JSONEncoderForHTML(ensure_ascii=False) def test_basic_encode(self): self.assertEqual(r'"\u0026"', self.encoder.encode('&')) self.assertEqual(r'"\u003c"', self.encoder.encode('<')) self.assertEqual(r'"\u003e"', self.encoder.encode('>')) + self.assertEqual(r'"\u2028"', self.encoder.encode(u'\u2028')) + + def test_non_ascii_basic_encode(self): + self.assertEqual(r'"\u0026"', self.non_ascii_encoder.encode('&')) + self.assertEqual(r'"\u003c"', self.non_ascii_encoder.encode('<')) + self.assertEqual(r'"\u003e"', self.non_ascii_encoder.encode('>')) + self.assertEqual(r'"\u2028"', self.non_ascii_encoder.encode(u'\u2028')) def test_basic_roundtrip(self): for char in '&<>': diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index 3b37f65..1c7e95e 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -106,10 +106,11 @@ class TestUnicode(TestCase): s1 = u'\u2029\u2028' s2 = s1.encode('utf8') expect = '"\\u2029\\u2028"' + expect_non_ascii = u'"\u2029\u2028"' self.assertEqual(json.dumps(s1), expect) self.assertEqual(json.dumps(s2), expect) - self.assertEqual(json.dumps(s1, ensure_ascii=False), expect) - self.assertEqual(json.dumps(s2, ensure_ascii=False), expect) + self.assertEqual(json.dumps(s1, ensure_ascii=False), expect_non_ascii) + self.assertEqual(json.dumps(s2, ensure_ascii=False), expect_non_ascii) def test_invalid_escape_sequences(self): # incomplete escape sequence |