diff options
author | Bob Ippolito <bob@redivi.com> | 2013-05-07 16:38:34 -0700 |
---|---|---|
committer | Bob Ippolito <bob@redivi.com> | 2013-05-07 23:02:33 -0700 |
commit | 35816bfe2d0ddeb5ddcc68239683cbb35b7e3ff2 (patch) | |
tree | fb0698c12a52678392b8ad1a0393f5186138899a /simplejson/tests | |
parent | fc7b04d6bb1bfc0ffdddbd7d53ffd56f4142ea34 (diff) | |
download | simplejson-35816bfe2d0ddeb5ddcc68239683cbb35b7e3ff2.tar.gz |
pass-through in decoder for lone surrogates #62v3.3.0surrogate-62baserock/morph
Diffstat (limited to 'simplejson/tests')
-rw-r--r-- | simplejson/tests/test_scanstring.py | 61 | ||||
-rw-r--r-- | simplejson/tests/test_unicode.py | 13 |
2 files changed, 55 insertions, 19 deletions
diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py index 3b63d6b..3d98f0d 100644 --- a/simplejson/tests/test_scanstring.py +++ b/simplejson/tests/test_scanstring.py @@ -23,10 +23,6 @@ class TestScanString(TestCase): self._test_scanstring(simplejson.decoder.c_scanstring) def _test_scanstring(self, scanstring): - self.assertEqual( - scanstring('"z\\ud834\\udd20x"', 1, None, True), - (u'z\U0001d120x', 16)) - if sys.maxunicode == 65535: self.assertEqual( scanstring(u'"z\U0001d120x"', 1, None, True), @@ -129,9 +125,10 @@ class TestScanString(TestCase): self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True) self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True) if sys.maxunicode > 65535: - self.assertRaises(ValueError, scanstring, '\\ud834"', 0, None, True), - self.assertRaises(ValueError, scanstring, '\\ud834\\u"', 0, None, True), - self.assertRaises(ValueError, scanstring, '\\ud834\\x0123"', 0, None, True), + self.assertRaises(ValueError, + scanstring, '\\ud834\\u"', 0, None, True) + self.assertRaises(ValueError, + scanstring, '\\ud834\\x0123"', 0, None, True) def test_issue3623(self): self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, @@ -145,3 +142,53 @@ class TestScanString(TestCase): assert maxsize is not None self.assertRaises(OverflowError, json.decoder.scanstring, "xxx", maxsize + 1) + + def test_surrogates(self): + scanstring = json.decoder.scanstring + + def assertScan(given, expect, test_utf8=True): + givens = [given] + if not PY3 and test_utf8: + givens.append(given.encode('utf8')) + for given in givens: + (res, count) = scanstring(given, 1, None, True) + self.assertEqual(len(given), count) + self.assertEqual(res, expect) + + assertScan( + u'"z\\ud834\\u0079x"', + u'z\ud834yx') + assertScan( + u'"z\\ud834\\udd20x"', + u'z\U0001d120x') + assertScan( + u'"z\\ud834\\ud834\\udd20x"', + u'z\ud834\U0001d120x') + assertScan( + u'"z\\ud834x"', + u'z\ud834x') + assertScan( + u'"z\\udd20x"', + u'z\udd20x') + assertScan( + u'"z\ud834x"', + u'z\ud834x') + # It may look strange to join strings together, but Python is drunk. + # https://gist.github.com/etrepum/5538443 + assertScan( + u'"z\\ud834\udd20x12345"', + u''.join([u'z\ud834', u'\udd20x12345'])) + assertScan( + u'"z\ud834\\udd20x"', + u''.join([u'z\ud834', u'\udd20x'])) + # these have different behavior given UTF8 input, because the surrogate + # pair may be joined (in maxunicode > 65535 builds) + assertScan( + u''.join([u'"z\ud834', u'\udd20x"']), + u''.join([u'z\ud834', u'\udd20x']), + test_utf8=False) + + self.assertRaises(ValueError, + scanstring, u'"z\\ud83x"', 1, None, True) + self.assertRaises(ValueError, + scanstring, u'"z\\ud834\\udd2x"', 1, None, True) diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py index f240176..f04cc5c 100644 --- a/simplejson/tests/test_unicode.py +++ b/simplejson/tests/test_unicode.py @@ -123,26 +123,15 @@ class TestUnicode(TestCase): self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1x34"') self.assertRaises(json.JSONDecodeError, json.loads, '"\\ux234"') if sys.maxunicode > 65535: - # unpaired low surrogate - self.assertRaises(json.JSONDecodeError, json.loads, '"\\udc00"') - self.assertRaises(json.JSONDecodeError, json.loads, '"\\udcff"') - # unpaired high surrogate - self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800"') - self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800x"') - self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800xx"') - self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800xxxxxx"') + # invalid escape sequence for low surrogate self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u"') self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0"') self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00"') self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000"') - # invalid escape sequence for low surrogate self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000x"') self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00x0"') self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0x00"') self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ux000"') - # invalid value for low surrogate - self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0000"') - self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ufc00"') def test_ensure_ascii_still_works(self): # in the ascii range, ensure that everything is the same |