diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-01-03 09:49:35 +0100 |
---|---|---|
committer | Stefan Behnel <stefan_ml@behnel.de> | 2019-01-03 09:50:23 +0100 |
commit | 4b64bbe1c792c0b0f40d97bb98e4192e28923ca2 (patch) | |
tree | 514fea12a5cf1e93ad14f5586ec570957889096d | |
parent | 8103d0552c8129211241f047f3bfa10c43e3ab2a (diff) | |
download | cython-4b64bbe1c792c0b0f40d97bb98e4192e28923ca2.tar.gz |
De-optimise the unicode methods ".upper()", ".lower()" and ".title()" on single character values since they must still be able to return multiple characters.
-rw-r--r-- | CHANGES.rst | 5 | ||||
-rw-r--r-- | Cython/Compiler/Optimize.py | 9 | ||||
-rw-r--r-- | tests/run/py_ucs4_type.pyx | 29 | ||||
-rw-r--r-- | tests/run/py_unicode_type.pyx | 4 |
4 files changed, 32 insertions, 15 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index a7468576b..a848617ef 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -20,6 +20,11 @@ Features added Bugs fixed ---------- +* The unicode methods ``.upper()``, ``.lower()`` and ``.title()`` were + incorrectly optimised for single character input values and only returned + the first character if multiple characters should have been returned. + They now use the original Python methods again. + * The ``Py_hash_t`` type failed to accept arbitrary "index" values. (Github issue #2752) diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py index 4c306d566..a577facf1 100644 --- a/Cython/Compiler/Optimize.py +++ b/Cython/Compiler/Optimize.py @@ -3374,6 +3374,8 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin, PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None), ]) + # DISABLED: Return value can only be one character, which is not correct. + ''' def _inject_unicode_character_conversion(self, node, function, args, is_unbound_method): if is_unbound_method or len(args) != 1: return node @@ -3392,9 +3394,10 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin, func_call = func_call.coerce_to_pyobject(self.current_env) return func_call - _handle_simple_method_unicode_lower = _inject_unicode_character_conversion - _handle_simple_method_unicode_upper = _inject_unicode_character_conversion - _handle_simple_method_unicode_title = _inject_unicode_character_conversion + #_handle_simple_method_unicode_lower = _inject_unicode_character_conversion + #_handle_simple_method_unicode_upper = _inject_unicode_character_conversion + #_handle_simple_method_unicode_title = _inject_unicode_character_conversion + ''' PyUnicode_Splitlines_func_type = PyrexTypes.CFuncType( Builtin.list_type, [ diff --git a/tests/run/py_ucs4_type.pyx b/tests/run/py_ucs4_type.pyx index 7193319c6..afd45fca3 100644 --- a/tests/run/py_ucs4_type.pyx +++ b/tests/run/py_ucs4_type.pyx @@ -132,15 +132,24 @@ def unicode_type_methods(Py_UCS4 uchar): uchar.isupper(), ] -@cython.test_assert_path_exists('//PythonCapiCallNode') -@cython.test_fail_if_path_exists('//SimpleCallNode') +#@cython.test_assert_path_exists('//PythonCapiCallNode') +#@cython.test_fail_if_path_exists('//SimpleCallNode') def unicode_methods(Py_UCS4 uchar): """ - >>> unicode_methods(ord('A')) == ['a', 'A', 'A'] + >>> unicode_methods(ord('A')) == ['a', 'A', 'A'] or unicode_methods(ord('A')) + True + >>> unicode_methods(ord('a')) == ['a', 'A', 'A'] or unicode_methods(ord('a')) True - >>> unicode_methods(ord('a')) == ['a', 'A', 'A'] + >>> unicode_methods(0x1E9E) == [u'\\xdf', u'\\u1e9e', u'\\u1e9e'] or unicode_methods(0x1E9E) + True + >>> unicode_methods(0x0130) in ( + ... [u'i\\u0307', u'\\u0130', u'\\u0130'], # Py3 + ... [u'i', u'\\u0130', u'\\u0130'], # Py2 + ... ) or unicode_methods(0x0130) True """ + # \u1E9E == 'LATIN CAPITAL LETTER SHARP S' + # \u0130 == 'LATIN CAPITAL LETTER I WITH DOT ABOVE' return [ # character conversion uchar.lower(), @@ -149,11 +158,11 @@ def unicode_methods(Py_UCS4 uchar): ] -@cython.test_assert_path_exists('//PythonCapiCallNode') -@cython.test_fail_if_path_exists( - '//SimpleCallNode', - '//CoerceFromPyTypeNode', -) +#@cython.test_assert_path_exists('//PythonCapiCallNode') +#@cython.test_fail_if_path_exists( +# '//SimpleCallNode', +# '//CoerceFromPyTypeNode', +#) def unicode_method_return_type(Py_UCS4 uchar): """ >>> unicode_method_return_type(ord('A')) @@ -366,5 +375,5 @@ def uchar_lookup_in_dict(obj, Py_UCS4 uchar): _WARNINGS = """ -364:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour. +373:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour. """ diff --git a/tests/run/py_unicode_type.pyx b/tests/run/py_unicode_type.pyx index d8d172bc9..0d33be927 100644 --- a/tests/run/py_unicode_type.pyx +++ b/tests/run/py_unicode_type.pyx @@ -123,8 +123,8 @@ def unicode_type_methods(Py_UNICODE uchar): uchar.isupper(), ] -@cython.test_assert_path_exists('//PythonCapiCallNode') -@cython.test_fail_if_path_exists('//SimpleCallNode') +#@cython.test_assert_path_exists('//PythonCapiCallNode') +#@cython.test_fail_if_path_exists('//SimpleCallNode') def unicode_methods(Py_UNICODE uchar): """ >>> unicode_methods(ord('A')) == ['a', 'A', 'A'] |