summaryrefslogtreecommitdiff
path: root/simplejson/tests
diff options
context:
space:
mode:
authorBob Ippolito <bob@redivi.com>2013-05-07 16:38:34 -0700
committerBob Ippolito <bob@redivi.com>2013-05-07 23:02:33 -0700
commit35816bfe2d0ddeb5ddcc68239683cbb35b7e3ff2 (patch)
treefb0698c12a52678392b8ad1a0393f5186138899a /simplejson/tests
parentfc7b04d6bb1bfc0ffdddbd7d53ffd56f4142ea34 (diff)
downloadsimplejson-35816bfe2d0ddeb5ddcc68239683cbb35b7e3ff2.tar.gz
pass-through in decoder for lone surrogates #62v3.3.0surrogate-62baserock/morph
Diffstat (limited to 'simplejson/tests')
-rw-r--r--simplejson/tests/test_scanstring.py61
-rw-r--r--simplejson/tests/test_unicode.py13
2 files changed, 55 insertions, 19 deletions
diff --git a/simplejson/tests/test_scanstring.py b/simplejson/tests/test_scanstring.py
index 3b63d6b..3d98f0d 100644
--- a/simplejson/tests/test_scanstring.py
+++ b/simplejson/tests/test_scanstring.py
@@ -23,10 +23,6 @@ class TestScanString(TestCase):
self._test_scanstring(simplejson.decoder.c_scanstring)
def _test_scanstring(self, scanstring):
- self.assertEqual(
- scanstring('"z\\ud834\\udd20x"', 1, None, True),
- (u'z\U0001d120x', 16))
-
if sys.maxunicode == 65535:
self.assertEqual(
scanstring(u'"z\U0001d120x"', 1, None, True),
@@ -129,9 +125,10 @@ class TestScanString(TestCase):
self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True)
self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True)
if sys.maxunicode > 65535:
- self.assertRaises(ValueError, scanstring, '\\ud834"', 0, None, True),
- self.assertRaises(ValueError, scanstring, '\\ud834\\u"', 0, None, True),
- self.assertRaises(ValueError, scanstring, '\\ud834\\x0123"', 0, None, True),
+ self.assertRaises(ValueError,
+ scanstring, '\\ud834\\u"', 0, None, True)
+ self.assertRaises(ValueError,
+ scanstring, '\\ud834\\x0123"', 0, None, True)
def test_issue3623(self):
self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,
@@ -145,3 +142,53 @@ class TestScanString(TestCase):
assert maxsize is not None
self.assertRaises(OverflowError, json.decoder.scanstring, "xxx",
maxsize + 1)
+
+ def test_surrogates(self):
+ scanstring = json.decoder.scanstring
+
+ def assertScan(given, expect, test_utf8=True):
+ givens = [given]
+ if not PY3 and test_utf8:
+ givens.append(given.encode('utf8'))
+ for given in givens:
+ (res, count) = scanstring(given, 1, None, True)
+ self.assertEqual(len(given), count)
+ self.assertEqual(res, expect)
+
+ assertScan(
+ u'"z\\ud834\\u0079x"',
+ u'z\ud834yx')
+ assertScan(
+ u'"z\\ud834\\udd20x"',
+ u'z\U0001d120x')
+ assertScan(
+ u'"z\\ud834\\ud834\\udd20x"',
+ u'z\ud834\U0001d120x')
+ assertScan(
+ u'"z\\ud834x"',
+ u'z\ud834x')
+ assertScan(
+ u'"z\\udd20x"',
+ u'z\udd20x')
+ assertScan(
+ u'"z\ud834x"',
+ u'z\ud834x')
+ # It may look strange to join strings together, but Python is drunk.
+ # https://gist.github.com/etrepum/5538443
+ assertScan(
+ u'"z\\ud834\udd20x12345"',
+ u''.join([u'z\ud834', u'\udd20x12345']))
+ assertScan(
+ u'"z\ud834\\udd20x"',
+ u''.join([u'z\ud834', u'\udd20x']))
+ # these have different behavior given UTF8 input, because the surrogate
+ # pair may be joined (in maxunicode > 65535 builds)
+ assertScan(
+ u''.join([u'"z\ud834', u'\udd20x"']),
+ u''.join([u'z\ud834', u'\udd20x']),
+ test_utf8=False)
+
+ self.assertRaises(ValueError,
+ scanstring, u'"z\\ud83x"', 1, None, True)
+ self.assertRaises(ValueError,
+ scanstring, u'"z\\ud834\\udd2x"', 1, None, True)
diff --git a/simplejson/tests/test_unicode.py b/simplejson/tests/test_unicode.py
index f240176..f04cc5c 100644
--- a/simplejson/tests/test_unicode.py
+++ b/simplejson/tests/test_unicode.py
@@ -123,26 +123,15 @@ class TestUnicode(TestCase):
self.assertRaises(json.JSONDecodeError, json.loads, '"\\u1x34"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ux234"')
if sys.maxunicode > 65535:
- # unpaired low surrogate
- self.assertRaises(json.JSONDecodeError, json.loads, '"\\udc00"')
- self.assertRaises(json.JSONDecodeError, json.loads, '"\\udcff"')
- # unpaired high surrogate
- self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800"')
- self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800x"')
- self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800xx"')
- self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800xxxxxx"')
+ # invalid escape sequence for low surrogate
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000"')
- # invalid escape sequence for low surrogate
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u000x"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u00x0"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0x00"')
self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ux000"')
- # invalid value for low surrogate
- self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\u0000"')
- self.assertRaises(json.JSONDecodeError, json.loads, '"\\ud800\\ufc00"')
def test_ensure_ascii_still_works(self):
# in the ascii range, ensure that everything is the same