summaryrefslogtreecommitdiff
path: root/simplejson/decoder.py
diff options
context:
space:
mode:
Diffstat (limited to 'simplejson/decoder.py')
-rw-r--r--simplejson/decoder.py44
1 files changed, 20 insertions, 24 deletions
diff --git a/simplejson/decoder.py b/simplejson/decoder.py
index 1a8f772..e1f10ae 100644
--- a/simplejson/decoder.py
+++ b/simplejson/decoder.py
@@ -46,9 +46,22 @@ BACKSLASH = {
DEFAULT_ENCODING = "utf-8"
+def scan_four_digit_hex(s, end, _m=re.compile(r'^[0-9a-fA-F]{4}$').match):
+ """Scan a four digit hex number from s[end:end + 4]
+ """
+ msg = "Invalid \\uXXXX escape sequence"
+ esc = s[end:end + 4]
+ if not _m(esc):
+ raise JSONDecodeError(msg, s, end - 2)
+ try:
+ return int(esc, 16), end + 4
+ except ValueError:
+ raise JSONDecodeError(msg, s, end - 2)
+
def py_scanstring(s, end, encoding=None, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,
- _PY3=PY3, _maxunicode=sys.maxunicode):
+ _PY3=PY3, _maxunicode=sys.maxunicode,
+ _scan_four_digit_hex=scan_four_digit_hex):
"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
Unescapes all valid JSON string escape sequences and raises ValueError
@@ -100,35 +113,18 @@ def py_scanstring(s, end, encoding=None, strict=True,
end += 1
else:
# Unicode escape sequence
- msg = "Invalid \\uXXXX escape sequence"
- esc = s[end + 1:end + 5]
- escX = esc[1:2]
- if len(esc) != 4 or escX == 'x' or escX == 'X':
- raise JSONDecodeError(msg, s, end - 1)
- try:
- uni = int(esc, 16)
- except ValueError:
- raise JSONDecodeError(msg, s, end - 1)
- if uni < 0 or uni > _maxunicode:
- raise JSONDecodeError(msg, s, end - 1)
- end += 5
+ uni, end = _scan_four_digit_hex(s, end + 1)
# Check for surrogate pair on UCS-4 systems
# Note that this will join high/low surrogate pairs
# but will also pass unpaired surrogates through
if (_maxunicode > 65535 and
uni & 0xfc00 == 0xd800 and
s[end:end + 2] == '\\u'):
- esc2 = s[end + 2:end + 6]
- escX = esc2[1:2]
- if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
- try:
- uni2 = int(esc2, 16)
- except ValueError:
- raise JSONDecodeError(msg, s, end)
- if uni2 & 0xfc00 == 0xdc00:
- uni = 0x10000 + (((uni - 0xd800) << 10) |
- (uni2 - 0xdc00))
- end += 6
+ uni2, end2 = _scan_four_digit_hex(s, end + 2)
+ if uni2 & 0xfc00 == 0xdc00:
+ uni = 0x10000 + (((uni - 0xd800) << 10) |
+ (uni2 - 0xdc00))
+ end = end2
char = unichr(uni)
# Append the unescaped character
_append(char)