summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBob Ippolito <bob@redivi.com>2011-05-15 19:23:52 -0700
committerBob Ippolito <bob@redivi.com>2011-05-15 19:23:52 -0700
commit4989e693bab39b1ce5cf6fc0b21dbacd108c312c (patch)
treebea0b1a1500c460474daff76173107182a901bbb
parente27ec198ec6d28bd0d211939122523a781414318 (diff)
downloadsimplejson-4989e693bab39b1ce5cf6fc0b21dbacd108c312c.tar.gz
Force unicode linebreak characters to be escaped (U+2028 and U+2029)
-rw-r--r--CHANGES.txt2
-rw-r--r--simplejson/encoder.py4
-rw-r--r--simplejson/tests/test_unicode.py10
3 files changed, 15 insertions, 1 deletions
diff --git a/CHANGES.txt b/CHANGES.txt
index 03d86ec..c391be5 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
Version 2.1.7 released 2011-XX-XX
+* Force unicode linebreak characters to be escaped (U+2028 and U+2029)
+ http://timelessrepo.com/json-isnt-a-javascript-subset
* Moved documentation from a git submodule to
http://simplejson.readthedocs.org/
diff --git a/simplejson/encoder.py b/simplejson/encoder.py
index f43f6f4..c72bd7f 100644
--- a/simplejson/encoder.py
+++ b/simplejson/encoder.py
@@ -13,7 +13,7 @@ c_encode_basestring_ascii, c_make_encoder = _import_speedups()
from simplejson.decoder import PosInf
-ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
+ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
HAS_UTF8 = re.compile(r'[\x80-\xff]')
ESCAPE_DCT = {
@@ -24,6 +24,8 @@ ESCAPE_DCT = {
'\n': '\\n',
'\r': '\\r',
'\t': '\\t',
+ u'\u2028': '\\u2028',
+ u'\u2029': '\\u2029',
}
for i in range(0x20):
#ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py
index f73e5bf..83fe65b 100644
--- a/simplejson/tests/test_unicode.py
+++ b/simplejson/tests/test_unicode.py
@@ -97,3 +97,13 @@ class TestUnicode(TestCase):
self.assertEquals(json.dumps(doc2), doc_ascii)
self.assertEquals(json.dumps(doc1, ensure_ascii=False), doc_unicode)
self.assertEquals(json.dumps(doc2, ensure_ascii=False), doc_unicode)
+
+ def test_ensure_ascii_linebreak_encoding(self):
+ # http://timelessrepo.com/json-isnt-a-javascript-subset
+ s1 = u'\u2029\u2028'
+ s2 = s1.encode('utf8')
+ expect = '"\\u2029\\u2028"'
+ self.assertEquals(json.dumps(s1), expect)
+ self.assertEquals(json.dumps(s2), expect)
+ self.assertEquals(json.dumps(s1, ensure_ascii=False), expect)
+ self.assertEquals(json.dumps(s2, ensure_ascii=False), expect)