summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Behnel <stefan_ml@behnel.de>2019-01-03 09:49:35 +0100
committerStefan Behnel <stefan_ml@behnel.de>2019-01-03 09:50:23 +0100
commit4b64bbe1c792c0b0f40d97bb98e4192e28923ca2 (patch)
tree514fea12a5cf1e93ad14f5586ec570957889096d
parent8103d0552c8129211241f047f3bfa10c43e3ab2a (diff)
downloadcython-4b64bbe1c792c0b0f40d97bb98e4192e28923ca2.tar.gz
De-optimise the unicode methods ".upper()", ".lower()" and ".title()" on single character values since they must still be able to return multiple characters.
-rw-r--r--CHANGES.rst5
-rw-r--r--Cython/Compiler/Optimize.py9
-rw-r--r--tests/run/py_ucs4_type.pyx29
-rw-r--r--tests/run/py_unicode_type.pyx4
4 files changed, 32 insertions, 15 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index a7468576b..a848617ef 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -20,6 +20,11 @@ Features added
Bugs fixed
----------
+* The unicode methods ``.upper()``, ``.lower()`` and ``.title()`` were
+ incorrectly optimised for single character input values and only returned
+ the first character if multiple characters should have been returned.
+ They now use the original Python methods again.
+
* The ``Py_hash_t`` type failed to accept arbitrary "index" values.
(Github issue #2752)
diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py
index 4c306d566..a577facf1 100644
--- a/Cython/Compiler/Optimize.py
+++ b/Cython/Compiler/Optimize.py
@@ -3374,6 +3374,8 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None),
])
+ # DISABLED: Return value can only be one character, which is not correct.
+ '''
def _inject_unicode_character_conversion(self, node, function, args, is_unbound_method):
if is_unbound_method or len(args) != 1:
return node
@@ -3392,9 +3394,10 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
func_call = func_call.coerce_to_pyobject(self.current_env)
return func_call
- _handle_simple_method_unicode_lower = _inject_unicode_character_conversion
- _handle_simple_method_unicode_upper = _inject_unicode_character_conversion
- _handle_simple_method_unicode_title = _inject_unicode_character_conversion
+ #_handle_simple_method_unicode_lower = _inject_unicode_character_conversion
+ #_handle_simple_method_unicode_upper = _inject_unicode_character_conversion
+ #_handle_simple_method_unicode_title = _inject_unicode_character_conversion
+ '''
PyUnicode_Splitlines_func_type = PyrexTypes.CFuncType(
Builtin.list_type, [
diff --git a/tests/run/py_ucs4_type.pyx b/tests/run/py_ucs4_type.pyx
index 7193319c6..afd45fca3 100644
--- a/tests/run/py_ucs4_type.pyx
+++ b/tests/run/py_ucs4_type.pyx
@@ -132,15 +132,24 @@ def unicode_type_methods(Py_UCS4 uchar):
uchar.isupper(),
]
-@cython.test_assert_path_exists('//PythonCapiCallNode')
-@cython.test_fail_if_path_exists('//SimpleCallNode')
+#@cython.test_assert_path_exists('//PythonCapiCallNode')
+#@cython.test_fail_if_path_exists('//SimpleCallNode')
def unicode_methods(Py_UCS4 uchar):
"""
- >>> unicode_methods(ord('A')) == ['a', 'A', 'A']
+ >>> unicode_methods(ord('A')) == ['a', 'A', 'A'] or unicode_methods(ord('A'))
+ True
+ >>> unicode_methods(ord('a')) == ['a', 'A', 'A'] or unicode_methods(ord('a'))
True
- >>> unicode_methods(ord('a')) == ['a', 'A', 'A']
+ >>> unicode_methods(0x1E9E) == [u'\\xdf', u'\\u1e9e', u'\\u1e9e'] or unicode_methods(0x1E9E)
+ True
+ >>> unicode_methods(0x0130) in (
+ ... [u'i\\u0307', u'\\u0130', u'\\u0130'], # Py3
+ ... [u'i', u'\\u0130', u'\\u0130'], # Py2
+ ... ) or unicode_methods(0x0130)
True
"""
+ # \u1E9E == 'LATIN CAPITAL LETTER SHARP S'
+ # \u0130 == 'LATIN CAPITAL LETTER I WITH DOT ABOVE'
return [
# character conversion
uchar.lower(),
@@ -149,11 +158,11 @@ def unicode_methods(Py_UCS4 uchar):
]
-@cython.test_assert_path_exists('//PythonCapiCallNode')
-@cython.test_fail_if_path_exists(
- '//SimpleCallNode',
- '//CoerceFromPyTypeNode',
-)
+#@cython.test_assert_path_exists('//PythonCapiCallNode')
+#@cython.test_fail_if_path_exists(
+# '//SimpleCallNode',
+# '//CoerceFromPyTypeNode',
+#)
def unicode_method_return_type(Py_UCS4 uchar):
"""
>>> unicode_method_return_type(ord('A'))
@@ -366,5 +375,5 @@ def uchar_lookup_in_dict(obj, Py_UCS4 uchar):
_WARNINGS = """
-364:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour.
+373:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour.
"""
diff --git a/tests/run/py_unicode_type.pyx b/tests/run/py_unicode_type.pyx
index d8d172bc9..0d33be927 100644
--- a/tests/run/py_unicode_type.pyx
+++ b/tests/run/py_unicode_type.pyx
@@ -123,8 +123,8 @@ def unicode_type_methods(Py_UNICODE uchar):
uchar.isupper(),
]
-@cython.test_assert_path_exists('//PythonCapiCallNode')
-@cython.test_fail_if_path_exists('//SimpleCallNode')
+#@cython.test_assert_path_exists('//PythonCapiCallNode')
+#@cython.test_fail_if_path_exists('//SimpleCallNode')
def unicode_methods(Py_UNICODE uchar):
"""
>>> unicode_methods(ord('A')) == ['a', 'A', 'A']