diff options
author | Bob Ippolito <bob@redivi.com> | 2008-03-23 09:43:55 +0000 |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2008-03-23 09:43:55 +0000 |
commit | 609133a6f03397e5e6fe8e688438e548379bf2af (patch) | |
tree | 70acc4c17bf547021e8ae49cc542a2e21da11484 /simplejson/decoder.py | |
parent | 95dd20b6b733197d8a0443af9fc277d7a31eb335 (diff) | |
download | simplejson-609133a6f03397e5e6fe8e688438e548379bf2af.tar.gz |
surrogate pair decoding
git-svn-id: http://simplejson.googlecode.com/svn/trunk@72 a4795897-2c25-0410-b006-0d3caba88fa1
Diffstat (limited to 'simplejson/decoder.py')
-rw-r--r-- | simplejson/decoder.py | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/simplejson/decoder.py b/simplejson/decoder.py index 162483c..97d3077 100644 --- a/simplejson/decoder.py +++ b/simplejson/decoder.py @@ -106,13 +106,26 @@ def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match): end += 1 else: esc = s[end + 1:end + 5] + next_end = end + 5 + msg = "Invalid \\uXXXX escape" try: - m = unichr(int(esc, 16)) if len(esc) != 4 or not esc.isalnum(): raise ValueError + uni = int(esc, 16) + if 0xd800 <= uni <= 0xdbff: + msg = "Invalid \\uXXXX\\uXXXX surrogate pair" + if not s[end + 5:end + 7] == '\\u': + raise ValueError + esc2 = s[end + 7:end + 11] + if len(esc2) != 4 or not esc2.isalnum(): + raise ValueError + uni2 = int(esc2, 16) + uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) + next_end += 6 + m = unichr(uni) except ValueError: - raise ValueError(errmsg("Invalid \\uXXXX escape", s, end)) - end += 5 + raise ValueError(errmsg(msg, s, end)) + end = next_end _append(m) return u''.join(chunks), end |