diff options
author | Bob Ippolito <bob@redivi.com> | 2012-12-30 19:38:52 -0800 |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2012-12-30 19:38:52 -0800 |
commit | f8cdf39583e2400e5f461ab85459d4d21807a4a0 (patch) | |
tree | d4cf382894dda810df840a66edac90553062c4c1 /simplejson/decoder.py | |
parent | 5ed2285e1d88f8075d47b10f941e7581d78828ff (diff) | |
download | simplejson-f8cdf39583e2400e5f461ab85459d4d21807a4a0.tar.gz |
better test coverage for invalid surrogates
Diffstat (limited to 'simplejson/decoder.py')
-rw-r--r-- | simplejson/decoder.py | 35 |
1 files changed, 25 insertions, 10 deletions
diff --git a/simplejson/decoder.py b/simplejson/decoder.py index c844b3c..546a168 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -154,18 +154,33 @@ def py_scanstring(s, end, encoding=None, strict=True, if len(esc) != 4: msg = "Invalid \\uXXXX escape" raise JSONDecodeError(msg, s, end) - uni = int(esc, 16) + try: + uni = int(esc, 16) + except ValueError: + msg = "Invalid \\uXXXX escape" + raise JSONDecodeError(msg, s, end) # Check for surrogate pair on UCS-4 systems - if 0xd800 <= uni <= 0xdbff and _maxunicode > 65535: - msg = "Invalid \\uXXXX\\uXXXX surrogate pair" - if not s[end + 5:end + 7] == '\\u': - raise JSONDecodeError(msg, s, end) - esc2 = s[end + 7:end + 11] - if len(esc2) != 4: + if _maxunicode > 65535: + unimask = uni & 0xfc00 + if unimask == 0xd800: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise JSONDecodeError(msg, s, end) + esc2 = s[end + 7:end + 11] + if len(esc2) != 4: + raise JSONDecodeError(msg, s, end) + try: + uni2 = int(esc2, 16) + except ValueError: + raise JSONDecodeError(msg, s, end) + if uni2 & 0xfc00 != 0xdc00: + msg = "Unpaired high surrogate" + raise JSONDecodeError(msg, s, end) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + elif unimask == 0xdc00: + msg = "Unpaired low surrogate" raise JSONDecodeError(msg, s, end) - uni2 = int(esc2, 16) - uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) - next_end += 6 char = unichr(uni) end = next_end # Append the unescaped character |