diff options
Diffstat (limited to 'simplejson/decoder.py')
-rw-r--r-- | simplejson/decoder.py | 44 |
1 files changed, 20 insertions, 24 deletions
diff --git a/simplejson/decoder.py b/simplejson/decoder.py index 1a8f772..e1f10ae 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -46,9 +46,22 @@ BACKSLASH = { DEFAULT_ENCODING = "utf-8" +def scan_four_digit_hex(s, end, _m=re.compile(r'^[0-9a-fA-F]{4}$').match): + """Scan a four digit hex number from s[end:end + 4] + """ + msg = "Invalid \\uXXXX escape sequence" + esc = s[end:end + 4] + if not _m(esc): + raise JSONDecodeError(msg, s, end - 2) + try: + return int(esc, 16), end + 4 + except ValueError: + raise JSONDecodeError(msg, s, end - 2) + def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join, - _PY3=PY3, _maxunicode=sys.maxunicode): + _PY3=PY3, _maxunicode=sys.maxunicode, + _scan_four_digit_hex=scan_four_digit_hex): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. Unescapes all valid JSON string escape sequences and raises ValueError @@ -100,35 +113,18 @@ def py_scanstring(s, end, encoding=None, strict=True, end += 1 else: # Unicode escape sequence - msg = "Invalid \\uXXXX escape sequence" - esc = s[end + 1:end + 5] - escX = esc[1:2] - if len(esc) != 4 or escX == 'x' or escX == 'X': - raise JSONDecodeError(msg, s, end - 1) - try: - uni = int(esc, 16) - except ValueError: - raise JSONDecodeError(msg, s, end - 1) - if uni < 0 or uni > _maxunicode: - raise JSONDecodeError(msg, s, end - 1) - end += 5 + uni, end = _scan_four_digit_hex(s, end + 1) # Check for surrogate pair on UCS-4 systems # Note that this will join high/low surrogate pairs # but will also pass unpaired surrogates through if (_maxunicode > 65535 and uni & 0xfc00 == 0xd800 and s[end:end + 2] == '\\u'): - esc2 = s[end + 2:end + 6] - escX = esc2[1:2] - if len(esc2) == 4 and not (escX == 'x' or escX == 'X'): - try: - uni2 = int(esc2, 16) - except ValueError: - raise JSONDecodeError(msg, s, end) - if uni2 & 0xfc00 == 0xdc00: - uni = 0x10000 + (((uni - 0xd800) << 10) | - (uni2 - 0xdc00)) - end += 6 + uni2, end2 = _scan_four_digit_hex(s, end + 2) + if uni2 & 0xfc00 == 0xdc00: + uni = 0x10000 + (((uni - 0xd800) << 10) | + (uni2 - 0xdc00)) + end = end2 char = unichr(uni) # Append the unescaped character _append(char) |