summaryrefslogtreecommitdiff
path: root/Lib/json
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2013-11-26 21:27:11 +0200
committerSerhiy Storchaka <storchaka@gmail.com>2013-11-26 21:27:11 +0200
commit69dec63c6909ed56f12cb86ce8da610e12910727 (patch)
treee6961d340d6feeda954fded6ce3ea2d92869f1ef /Lib/json
parentdc58945c84ffbfcef5b6ed13ab89338e6a2c06b6 (diff)
parent7131376f0ecacba73a5ff8098a5a44d113fe45ea (diff)
downloadcpython-69dec63c6909ed56f12cb86ce8da610e12910727.tar.gz
Issue #11489: JSON decoder now accepts lone surrogates.
Diffstat (limited to 'Lib/json')
-rw-r--r--Lib/json/decoder.py35
1 files changed, 17 insertions, 18 deletions
diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py
index da7ef9c819..59e5f41f4d 100644
--- a/Lib/json/decoder.py
+++ b/Lib/json/decoder.py
@@ -58,6 +58,16 @@ BACKSLASH = {
'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
}
+def _decode_uXXXX(s, pos):
+ esc = s[pos + 1:pos + 5]
+ if len(esc) == 4 and esc[1] not in 'xX':
+ try:
+ return int(esc, 16)
+ except ValueError:
+ pass
+ msg = "Invalid \\uXXXX escape"
+ raise ValueError(errmsg(msg, s, pos))
+
def py_scanstring(s, end, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match):
"""Scan the string s for a JSON string. End is the index of the
@@ -107,25 +117,14 @@ def py_scanstring(s, end, strict=True,
raise ValueError(errmsg(msg, s, end))
end += 1
else:
- esc = s[end + 1:end + 5]
- next_end = end + 5
- if len(esc) != 4:
- msg = "Invalid \\uXXXX escape"
- raise ValueError(errmsg(msg, s, end))
- uni = int(esc, 16)
- if 0xd800 <= uni <= 0xdbff:
- msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
- if not s[end + 5:end + 7] == '\\u':
- raise ValueError(errmsg(msg, s, end))
- esc2 = s[end + 7:end + 11]
- if len(esc2) != 4:
- raise ValueError(errmsg(msg, s, end))
- uni2 = int(esc2, 16)
- uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
- next_end += 6
+ uni = _decode_uXXXX(s, end)
+ end += 5
+ if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
+ uni2 = _decode_uXXXX(s, end + 1)
+ if 0xdc00 <= uni2 <= 0xdfff:
+ uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
+ end += 6
char = chr(uni)
-
- end = next_end
_append(char)
return ''.join(chunks), end