Merge 3.2: issue #14937.

author: Martin v. L?wis <martin@v.loewis.de> 2012-06-03 12:00:48 +0200
committer: Martin v. L?wis <martin@v.loewis.de> 2012-06-03 12:00:48 +0200
commit: 183bc7b78cb50c73b3142710b3b8fbafde4a3833 (patch)
tree: 8a29bbfd852dccb5f14340c8d1c6309cf8fdfb72 /Lib/test/test_unicode.py
parent: 15670dc71126aabce6daca6c8d32f55ea146cc11 (diff)
parent: 9fe4e3d311149c5f07073f7c55509fcfd7dde47e (diff)
download: cpython-183bc7b78cb50c73b3142710b3b8fbafde4a3833.tar.gz
1 files changed, 238 insertions, 95 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 19b06a034d..4024be7e6f 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -5,17 +5,13 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
 
 """#"
+import _string
 import codecs
 import struct
 import sys
 import unittest
 import warnings
 from test import support, string_tests
-import _string
-
-# decorator to skip tests on narrow builds
-requires_wide_build = unittest.skipIf(sys.maxunicode == 65535,
-                                      'requires wide build')
 
 # Error handling (bad decoder return)
 def search_function(encoding):
@@ -175,6 +171,15 @@ class UnicodeTest(string_tests.CommonTest,
 
     def test_find(self):
         string_tests.CommonTest.test_find(self)
+        # test implementation details of the memchr fast path
+        self.checkequal(100, 'a' * 100 + '\u0102', 'find', '\u0102')
+        self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0201')
+        self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0120')
+        self.checkequal(-1, 'a' * 100 + '\u0102', 'find', '\u0220')
+        self.checkequal(100, 'a' * 100 + '\U00100304', 'find', '\U00100304')
+        self.checkequal(-1, 'a' * 100 + '\U00100304', 'find', '\U00100204')
+        self.checkequal(-1, 'a' * 100 + '\U00100304', 'find', '\U00102004')
+        # check mixed argument types
         self.checkequalnofix(0,  'abcdefghiabc', 'find', 'abc')
         self.checkequalnofix(9,  'abcdefghiabc', 'find', 'abc', 1)
         self.checkequalnofix(-1, 'abcdefghiabc', 'find', 'def', 4)
@@ -184,6 +189,14 @@ class UnicodeTest(string_tests.CommonTest,
 
     def test_rfind(self):
         string_tests.CommonTest.test_rfind(self)
+        # test implementation details of the memrchr fast path
+        self.checkequal(0, '\u0102' + 'a' * 100 , 'rfind', '\u0102')
+        self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0201')
+        self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0120')
+        self.checkequal(-1, '\u0102' + 'a' * 100 , 'rfind', '\u0220')
+        self.checkequal(0, '\U00100304' + 'a' * 100, 'rfind', '\U00100304')
+        self.checkequal(-1, '\U00100304' + 'a' * 100, 'rfind', '\U00100204')
+        self.checkequal(-1, '\U00100304' + 'a' * 100, 'rfind', '\U00102004')
         # check mixed argument types
         self.checkequalnofix(9,   'abcdefghiabc', 'rfind', 'abc')
         self.checkequalnofix(12,  'abcdefghiabc', 'rfind', '')
@@ -280,6 +293,12 @@ class UnicodeTest(string_tests.CommonTest,
         self.checkequalnofix('one@two!three!', 'one!two!three!', 'replace', '!', '@', 1)
         self.assertRaises(TypeError, 'replace'.replace, "r", 42)
 
+    @support.cpython_only
+    def test_replace_id(self):
+        pattern = 'abc'
+        text = 'abc def'
+        self.assertIs(text.replace(pattern, pattern), text)
+
     def test_bytes_comparison(self):
         with support.check_warnings():
             warnings.simplefilter('ignore', BytesWarning)
@@ -350,6 +369,8 @@ class UnicodeTest(string_tests.CommonTest,
     def test_islower(self):
         string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
         self.checkequalnofix(False, '\u1FFc', 'islower')
+        self.assertFalse('\u2167'.islower())
+        self.assertTrue('\u2177'.islower())
         # non-BMP, uppercase
         self.assertFalse('\U00010401'.islower())
         self.assertFalse('\U00010427'.islower())
@@ -364,6 +385,8 @@ class UnicodeTest(string_tests.CommonTest,
         string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
         if not sys.platform.startswith('java'):
             self.checkequalnofix(False, '\u1FFc', 'isupper')
+        self.assertTrue('\u2167'.isupper())
+        self.assertFalse('\u2177'.isupper())
         # non-BMP, uppercase
         self.assertTrue('\U00010401'.isupper())
         self.assertTrue('\U00010427'.isupper())
@@ -520,7 +543,6 @@ class UnicodeTest(string_tests.CommonTest,
                 self.assertFalse(meth(s), '%a.%s() is False' % (s, meth_name))
 
 
-    @requires_wide_build
     def test_lower(self):
         string_tests.CommonTest.test_lower(self)
         self.assertEqual('\U00010427'.lower(), '\U0001044F')
@@ -530,8 +552,27 @@ class UnicodeTest(string_tests.CommonTest,
                          '\U0001044F\U0001044F')
         self.assertEqual('X\U00010427x\U0001044F'.lower(),
                          'x\U0001044Fx\U0001044F')
+        self.assertEqual('ﬁ'.lower(), 'ﬁ')
+        self.assertEqual('\u0130'.lower(), '\u0069\u0307')
+        # Special case for GREEK CAPITAL LETTER SIGMA U+03A3
+        self.assertEqual('\u03a3'.lower(), '\u03c3')
+        self.assertEqual('\u0345\u03a3'.lower(), '\u0345\u03c3')
+        self.assertEqual('A\u0345\u03a3'.lower(), 'a\u0345\u03c2')
+        self.assertEqual('A\u0345\u03a3a'.lower(), 'a\u0345\u03c3a')
+        self.assertEqual('A\u0345\u03a3'.lower(), 'a\u0345\u03c2')
+        self.assertEqual('A\u03a3\u0345'.lower(), 'a\u03c2\u0345')
+        self.assertEqual('\u03a3\u0345 '.lower(), '\u03c3\u0345 ')
+        self.assertEqual('\U0008fffe'.lower(), '\U0008fffe')
+        self.assertEqual('\u2177'.lower(), '\u2177')
+
+    def test_casefold(self):
+        self.assertEqual('hello'.casefold(), 'hello')
+        self.assertEqual('hELlo'.casefold(), 'hello')
+        self.assertEqual('ß'.casefold(), 'ss')
+        self.assertEqual('ﬁ'.casefold(), 'fi')
+        self.assertEqual('\u03a3'.casefold(), '\u03c3')
+        self.assertEqual('A\u0345\u03a3'.casefold(), 'a\u03b9\u03c3')
 
-    @requires_wide_build
     def test_upper(self):
         string_tests.CommonTest.test_upper(self)
         self.assertEqual('\U0001044F'.upper(), '\U00010427')
@@ -541,8 +582,14 @@ class UnicodeTest(string_tests.CommonTest,
                          '\U00010427\U00010427')
         self.assertEqual('X\U00010427x\U0001044F'.upper(),
                          'X\U00010427X\U00010427')
+        self.assertEqual('ﬁ'.upper(), 'FI')
+        self.assertEqual('\u0130'.upper(), '\u0130')
+        self.assertEqual('\u03a3'.upper(), '\u03a3')
+        self.assertEqual('ß'.upper(), 'SS')
+        self.assertEqual('\u1fd2'.upper(), '\u0399\u0308\u0300')
+        self.assertEqual('\U0008fffe'.upper(), '\U0008fffe')
+        self.assertEqual('\u2177'.upper(), '\u2167')
 
-    @requires_wide_build
     def test_capitalize(self):
         string_tests.CommonTest.test_capitalize(self)
         self.assertEqual('\U0001044F'.capitalize(), '\U00010427')
@@ -554,8 +601,12 @@ class UnicodeTest(string_tests.CommonTest,
                          '\U00010427\U0001044F')
         self.assertEqual('X\U00010427x\U0001044F'.capitalize(),
                          'X\U0001044Fx\U0001044F')
+        self.assertEqual('h\u0130'.capitalize(), 'H\u0069\u0307')
+        exp = '\u0399\u0308\u0300\u0069\u0307'
+        self.assertEqual('\u1fd2\u0130'.capitalize(), exp)
+        self.assertEqual('ﬁnnish'.capitalize(), 'FInnish')
+        self.assertEqual('A\u0345\u03a3'.capitalize(), 'A\u0345\u03c2')
 
-    @requires_wide_build
     def test_title(self):
         string_tests.MixinStrUnicodeUserStringTest.test_title(self)
         self.assertEqual('\U0001044F'.title(), '\U00010427')
@@ -569,8 +620,10 @@ class UnicodeTest(string_tests.CommonTest,
                          '\U00010427\U0001044F \U00010427\U0001044F')
         self.assertEqual('X\U00010427x\U0001044F X\U00010427x\U0001044F'.title(),
                          'X\U0001044Fx\U0001044F X\U0001044Fx\U0001044F')
+        self.assertEqual('ﬁNNISH'.title(), 'Finnish')
+        self.assertEqual('A\u03a3 \u1fa1xy'.title(), 'A\u03c2 \u1fa9xy')
+        self.assertEqual('A\u03a3A'.title(), 'A\u03c3a')
 
-    @requires_wide_build
     def test_swapcase(self):
         string_tests.CommonTest.test_swapcase(self)
         self.assertEqual('\U0001044F'.swapcase(), '\U00010427')
@@ -583,6 +636,19 @@ class UnicodeTest(string_tests.CommonTest,
                          '\U00010427\U0001044F')
         self.assertEqual('X\U00010427x\U0001044F'.swapcase(),
                          'x\U0001044FX\U00010427')
+        self.assertEqual('ﬁ'.swapcase(), 'FI')
+        self.assertEqual('\u0130'.swapcase(), '\u0069\u0307')
+        # Special case for GREEK CAPITAL LETTER SIGMA U+03A3
+        self.assertEqual('\u03a3'.swapcase(), '\u03c3')
+        self.assertEqual('\u0345\u03a3'.swapcase(), '\u0399\u03c3')
+        self.assertEqual('A\u0345\u03a3'.swapcase(), 'a\u0399\u03c2')
+        self.assertEqual('A\u0345\u03a3a'.swapcase(), 'a\u0399\u03c3A')
+        self.assertEqual('A\u0345\u03a3'.swapcase(), 'a\u0399\u03c2')
+        self.assertEqual('A\u03a3\u0345'.swapcase(), 'a\u03c2\u0399')
+        self.assertEqual('\u03a3\u0345 '.swapcase(), '\u03c3\u0399 ')
+        self.assertEqual('\u03a3'.swapcase(), '\u03c3')
+        self.assertEqual('ß'.swapcase(), 'SS')
+        self.assertEqual('\u1fd2'.swapcase(), '\u0399\u0308\u0300')
 
     def test_contains(self):
         # Testing Unicode contains method
@@ -776,7 +842,7 @@ class UnicodeTest(string_tests.CommonTest,
         self.assertEqual('{0!s}'.format(G('data')), 'string is data')
 
         msg = 'object.__format__ with a non-empty format string is deprecated'
-        with support.check_warnings((msg, PendingDeprecationWarning)):
+        with support.check_warnings((msg, DeprecationWarning)):
             self.assertEqual('{0:^10}'.format(E('data')), ' E(data)  ')
             self.assertEqual('{0:^10s}'.format(E('data')), ' E(data)  ')
             self.assertEqual('{0:>15s}'.format(G('data')), ' string is data')
@@ -858,6 +924,14 @@ class UnicodeTest(string_tests.CommonTest,
         self.assertRaises(ValueError, format, '', '#')
         self.assertRaises(ValueError, format, '', '#20')
 
+        # Non-ASCII
+        self.assertEqual("{0:s}{1:s}".format("ABC", "\u0410\u0411\u0412"),
+                         'ABC\u0410\u0411\u0412')
+        self.assertEqual("{0:.3s}".format("ABC\u0410\u0411\u0412"),
+                         'ABC')
+        self.assertEqual("{0:.0s}".format("ABC\u0410\u0411\u0412"),
+                         '')
+
     def test_format_map(self):
         self.assertEqual(''.format_map({}), '')
         self.assertEqual('a'.format_map({}), 'a')
@@ -990,6 +1064,10 @@ class UnicodeTest(string_tests.CommonTest,
         self.assertEqual('%f' % INF, 'inf')
         self.assertEqual('%F' % INF, 'INF')
 
+        # PEP 393
+        self.assertEqual('%.1s' % "a\xe9\u20ac", 'a')
+        self.assertEqual('%.2s' % "a\xe9\u20ac", 'a\xe9')
+
     def test_startswith_endswith_errors(self):
         for meth in ('foo'.startswith, 'foo'.endswith):
             with self.assertRaises(TypeError) as cm:
@@ -1014,10 +1092,13 @@ class UnicodeTest(string_tests.CommonTest,
         class UnicodeSubclass(str):
             pass
 
-        self.assertEqual(
-            str(UnicodeSubclass('unicode subclass becomes unicode')),
-            'unicode subclass becomes unicode'
-        )
+        for text in ('ascii', '\xe9', '\u20ac', '\U0010FFFF'):
+            subclass = UnicodeSubclass(text)
+            self.assertEqual(str(subclass), text)
+            self.assertEqual(len(subclass), len(text))
+            if text == 'ascii':
+                self.assertEqual(subclass.encode('ascii'), b'ascii')
+                self.assertEqual(subclass.encode('utf-8'), b'ascii')
 
         self.assertEqual(
             str('strings are converted to unicode'),
@@ -1123,15 +1204,12 @@ class UnicodeTest(string_tests.CommonTest,
     def test_codecs_utf8(self):
         self.assertEqual(''.encode('utf-8'), b'')
         self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
-        if sys.maxunicode == 65535:
-            self.assertEqual('\ud800\udc02'.encode('utf-8'), b'\xf0\x90\x80\x82')
-            self.assertEqual('\ud84d\udc56'.encode('utf-8'), b'\xf0\xa3\x91\x96')
+        self.assertEqual('\U00010002'.encode('utf-8'), b'\xf0\x90\x80\x82')
+        self.assertEqual('\U00023456'.encode('utf-8'), b'\xf0\xa3\x91\x96')
         self.assertEqual('\ud800'.encode('utf-8', 'surrogatepass'), b'\xed\xa0\x80')
         self.assertEqual('\udc00'.encode('utf-8', 'surrogatepass'), b'\xed\xb0\x80')
-        if sys.maxunicode == 65535:
-            self.assertEqual(
-                ('\ud800\udc02'*1000).encode('utf-8'),
-                b'\xf0\x90\x80\x82'*1000)
+        self.assertEqual(('\U00010002'*10).encode('utf-8'),
+                         b'\xf0\x90\x80\x82'*10)
         self.assertEqual(
             '\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
             '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
@@ -1344,14 +1422,6 @@ class UnicodeTest(string_tests.CommonTest,
         self.assertRaises(TypeError, str, b"hello", "test.unicode2")
         self.assertRaises(TypeError, "hello".encode, "test.unicode1")
         self.assertRaises(TypeError, "hello".encode, "test.unicode2")
-        # executes PyUnicode_Encode()
-        import imp
-        self.assertRaises(
-            ImportError,
-            imp.find_module,
-            "non-existing module",
-            ["non-existing dir"]
-        )
 
         # Error handling (wrong arguments)
         self.assertRaises(TypeError, "hello".encode, 42, 42, 42)
@@ -1369,18 +1439,25 @@ class UnicodeTest(string_tests.CommonTest,
         self.assertEqual('hello'.encode('ascii'), b'hello')
         self.assertEqual('hello'.encode('utf-7'), b'hello')
         self.assertEqual('hello'.encode('utf-8'), b'hello')
-        self.assertEqual('hello'.encode('utf8'), b'hello')
+        self.assertEqual('hello'.encode('utf-8'), b'hello')
         self.assertEqual('hello'.encode('utf-16-le'), b'h\000e\000l\000l\000o\000')
         self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
         self.assertEqual('hello'.encode('latin-1'), b'hello')
 
+        # Default encoding is utf-8
+        self.assertEqual('\u2603'.encode(), b'\xe2\x98\x83')
+
         # Roundtrip safety for BMP (just the first 1024 chars)
         for c in range(1024):
             u = chr(c)
             for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
                              'utf-16-be', 'raw_unicode_escape',
                              'unicode_escape', 'unicode_internal'):
-                self.assertEqual(str(u.encode(encoding),encoding), u)
+                with warnings.catch_warnings():
+                    # unicode-internal has been deprecated
+                    warnings.simplefilter("ignore", DeprecationWarning)
+
+                    self.assertEqual(str(u.encode(encoding),encoding), u)
 
         # Roundtrip safety for BMP (just the first 256 chars)
         for c in range(256):
@@ -1395,18 +1472,20 @@ class UnicodeTest(string_tests.CommonTest,
                 self.assertEqual(str(u.encode(encoding),encoding), u)
 
         # Roundtrip safety for non-BMP (just a few chars)
-        u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
-        for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
-                         #'raw_unicode_escape',
-                         'unicode_escape', 'unicode_internal'):
-            self.assertEqual(str(u.encode(encoding),encoding), u)
+        with warnings.catch_warnings():
+            # unicode-internal has been deprecated
+            warnings.simplefilter("ignore", DeprecationWarning)
+
+            u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
+            for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
+                             'raw_unicode_escape',
+                             'unicode_escape', 'unicode_internal'):
+                self.assertEqual(str(u.encode(encoding),encoding), u)
 
-        # UTF-8 must be roundtrip safe for all UCS-2 code points
-        # This excludes surrogates: in the full range, there would be
-        # a surrogate pair (\udbff\udc00), which gets converted back
-        # to a non-BMP character (\U0010fc00)
-        u = ''.join(map(chr, list(range(0,0xd800)) +
-                             list(range(0xe000,0x10000))))
+        # UTF-8 must be roundtrip safe for all code points
+        # (except surrogates, which are forbidden).
+        u = ''.join(map(chr, list(range(0, 0xd800)) +
+                             list(range(0xe000, 0x110000))))
         for encoding in ('utf-8',):
             self.assertEqual(str(u.encode(encoding),encoding), u)
 
@@ -1591,17 +1670,39 @@ class UnicodeTest(string_tests.CommonTest,
             return
         self.assertRaises(OverflowError, 't\tt\t'.expandtabs, sys.maxsize)
 
+    @support.cpython_only
+    def test_expandtabs_optimization(self):
+        s = 'abc'
+        self.assertIs(s.expandtabs(), s)
+
     def test_raiseMemError(self):
-        # Ensure that the freelist contains a consistent object, even
-        # when a string allocation fails with a MemoryError.
-        # This used to crash the interpreter,
-        # or leak references when the number was smaller.
-        charwidth = 4 if sys.maxunicode >= 0x10000 else 2
-        # Note: sys.maxsize is half of the actual max allocation because of
-        # the signedness of Py_ssize_t.
-        alloc = lambda: "a" * (sys.maxsize // charwidth * 2)
-        self.assertRaises(MemoryError, alloc)
-        self.assertRaises(MemoryError, alloc)
+        if struct.calcsize('P') == 8:
+            # 64 bits pointers
+            ascii_struct_size = 48
+            compact_struct_size = 72
+        else:
+            # 32 bits pointers
+            ascii_struct_size = 24
+            compact_struct_size = 36
+
+        for char in ('a', '\xe9', '\u20ac', '\U0010ffff'):
+            code = ord(char)
+            if code < 0x100:
+                char_size = 1  # sizeof(Py_UCS1)
+                struct_size = ascii_struct_size
+            elif code < 0x10000:
+                char_size = 2  # sizeof(Py_UCS2)
+                struct_size = compact_struct_size
+            else:
+                char_size = 4  # sizeof(Py_UCS4)
+                struct_size = compact_struct_size
+            # Note: sys.maxsize is half of the actual max allocation because of
+            # the signedness of Py_ssize_t. Strings of maxlen-1 should in principle
+            # be allocatable, given enough memory.
+            maxlen = ((sys.maxsize - struct_size) // char_size)
+            alloc = lambda: char * maxlen
+            self.assertRaises(MemoryError, alloc)
+            self.assertRaises(MemoryError, alloc)
 
     def test_format_subclass(self):
         class S(str):
@@ -1614,11 +1715,10 @@ class UnicodeTest(string_tests.CommonTest,
     # Test PyUnicode_FromFormat()
     def test_from_format(self):
         support.import_module('ctypes')
-        from ctypes import pythonapi, py_object, c_int
-        if sys.maxunicode == 65535:
-            name = "PyUnicodeUCS2_FromFormat"
-        else:
-            name = "PyUnicodeUCS4_FromFormat"
+        from ctypes import (pythonapi, py_object,
+            c_int, c_long, c_longlong, c_ssize_t,
+            c_uint, c_ulong, c_ulonglong, c_size_t)
+        name = "PyUnicode_FromFormat"
         _PyUnicode_FromFormat = getattr(pythonapi, name)
         _PyUnicode_FromFormat.restype = py_object
 
@@ -1639,13 +1739,40 @@ class UnicodeTest(string_tests.CommonTest,
             'string, got a non-ASCII byte: 0xe9$',
             PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
 
+        # test "%c"
         self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0xabcd)), '\uabcd')
         self.assertEqual(PyUnicode_FromFormat(b'%c', c_int(0x10ffff)), '\U0010ffff')
 
-        # other tests
+        # test "%"
+        self.assertEqual(PyUnicode_FromFormat(b'%'), '%')
+        self.assertEqual(PyUnicode_FromFormat(b'%%'), '%')
+        self.assertEqual(PyUnicode_FromFormat(b'%%s'), '%s')
+        self.assertEqual(PyUnicode_FromFormat(b'[%%]'), '[%]')
+        self.assertEqual(PyUnicode_FromFormat(b'%%%s', b'abc'), '%abc')
+
+        # test integer formats (%i, %d, %u)
+        self.assertEqual(PyUnicode_FromFormat(b'%03i', c_int(10)), '010')
+        self.assertEqual(PyUnicode_FromFormat(b'%0.4i', c_int(10)), '0010')
+        self.assertEqual(PyUnicode_FromFormat(b'%i', c_int(-123)), '-123')
+        self.assertEqual(PyUnicode_FromFormat(b'%li', c_long(-123)), '-123')
+        self.assertEqual(PyUnicode_FromFormat(b'%lli', c_longlong(-123)), '-123')
+        self.assertEqual(PyUnicode_FromFormat(b'%zi', c_ssize_t(-123)), '-123')
+
+        self.assertEqual(PyUnicode_FromFormat(b'%d', c_int(-123)), '-123')
+        self.assertEqual(PyUnicode_FromFormat(b'%ld', c_long(-123)), '-123')
+        self.assertEqual(PyUnicode_FromFormat(b'%lld', c_longlong(-123)), '-123')
+        self.assertEqual(PyUnicode_FromFormat(b'%zd', c_ssize_t(-123)), '-123')
+
+        self.assertEqual(PyUnicode_FromFormat(b'%u', c_uint(123)), '123')
+        self.assertEqual(PyUnicode_FromFormat(b'%lu', c_ulong(123)), '123')
+        self.assertEqual(PyUnicode_FromFormat(b'%llu', c_ulonglong(123)), '123')
+        self.assertEqual(PyUnicode_FromFormat(b'%zu', c_size_t(123)), '123')
+
+        # test %A
         text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
         self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
 
+        # test %V
         text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
         self.assertEqual(text, 'repr=abc')
 
@@ -1659,6 +1786,13 @@ class UnicodeTest(string_tests.CommonTest,
         text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff')
         self.assertEqual(text, 'repr=abc\ufffd')
 
+        # not supported: copy the raw format string. these tests are just here
+        # to check for crashs and should not be considered as specifications
+        self.assertEqual(PyUnicode_FromFormat(b'%1%s', b'abc'), '%s')
+        self.assertEqual(PyUnicode_FromFormat(b'%1abc'), '%1abc')
+        self.assertEqual(PyUnicode_FromFormat(b'%+i', c_int(10)), '%+i')
+        self.assertEqual(PyUnicode_FromFormat(b'%.%s', b'abc'), '%.%s')
+
     # Test PyUnicode_AsWideChar()
     def test_aswidechar(self):
         from _testcapi import unicode_aswidechar
@@ -1719,6 +1853,51 @@ class UnicodeTest(string_tests.CommonTest,
         self.assertEqual(size, nchar)
         self.assertEqual(wchar, nonbmp + '\0')
 
+    def test_subclass_add(self):
+        class S(str):
+            def __add__(self, o):
+                return "3"
+        self.assertEqual(S("4") + S("5"), "3")
+        class S(str):
+            def __iadd__(self, o):
+                return "3"
+        s = S("1")
+        s += "4"
+        self.assertEqual(s, "3")
+
+    def test_encode_decimal(self):
+        from _testcapi import unicode_encodedecimal
+        self.assertEqual(unicode_encodedecimal('123'),
+                         b'123')
+        self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
+                         b'3.14')
+        self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+                         b' 3.14 ')
+        self.assertRaises(UnicodeEncodeError,
+                          unicode_encodedecimal, "123\u20ac", "strict")
+        self.assertRaisesRegex(
+            ValueError,
+            "^'decimal' codec can't encode character",
+            unicode_encodedecimal, "123\u20ac", "replace")
+
+    def test_transform_decimal(self):
+        from _testcapi import unicode_transformdecimaltoascii as transform_decimal
+        self.assertEqual(transform_decimal('123'),
+                         '123')
+        self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
+                         '3.14')
+        self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+                         "\N{EM SPACE}3.14\N{EN SPACE}")
+        self.assertEqual(transform_decimal('123\u20ac'),
+                         '123\u20ac')
+
+    def test_getnewargs(self):
+        text = 'abc'
+        args = text.__getnewargs__()
+        self.assertIsNot(args[0], text)
+        self.assertEqual(args[0], text)
+        self.assertEqual(len(args), 1)
+
 
 class StringModuleTest(unittest.TestCase):
     def test_formatter_parser(self):
@@ -1770,42 +1949,6 @@ class StringModuleTest(unittest.TestCase):
             ]])
         self.assertRaises(TypeError, _string.formatter_field_name_split, 1)
 
-    def test_encode_decimal(self):
-        from _testcapi import unicode_encodedecimal
-        self.assertEqual(unicode_encodedecimal('123'),
-                         b'123')
-        self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
-                         b'3.14')
-        self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
-                         b' 3.14 ')
-        self.assertRaises(UnicodeEncodeError,
-                          unicode_encodedecimal, "123\u20ac", "strict")
-        self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"),
-                         b'123?')
-        self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"),
-                         b'123')
-        self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"),
-                         b'123&#8364;')
-        self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
-                         b'123\\u20ac')
-        self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"),
-                         b'123? ')
-        self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"),
-                         b'123??')
-        self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"),
-                         b'123?0')
-
-    def test_transform_decimal(self):
-        from _testcapi import unicode_transformdecimaltoascii as transform_decimal
-        self.assertEqual(transform_decimal('123'),
-                         '123')
-        self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
-                         '3.14')
-        self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
-                         "\N{EM SPACE}3.14\N{EN SPACE}")
-        self.assertEqual(transform_decimal('123\u20ac'),
-                         '123\u20ac')
-
 
 def test_main():
     support.run_unittest(__name__)
author	Martin v. L?wis <martin@v.loewis.de>	2012-06-03 12:00:48 +0200
committer	Martin v. L?wis <martin@v.loewis.de>	2012-06-03 12:00:48 +0200
commit	183bc7b78cb50c73b3142710b3b8fbafde4a3833 (patch)
tree	8a29bbfd852dccb5f14340c8d1c6309cf8fdfb72 /Lib/test/test_unicode.py
parent	15670dc71126aabce6daca6c8d32f55ea146cc11 (diff)
parent	9fe4e3d311149c5f07073f7c55509fcfd7dde47e (diff)
download	cpython-183bc7b78cb50c73b3142710b3b8fbafde4a3833.tar.gz