diff options
author | Stefan Behnel <stefan_ml@behnel.de> | 2019-01-20 21:05:04 +0100 |
---|---|---|
committer | Stefan Behnel <stefan_ml@behnel.de> | 2019-01-22 19:13:35 +0100 |
commit | 525d348874ee137c97e405e0779cb7ccc8cfb183 (patch) | |
tree | 63cf8117320424c846593490a5d59d82a92bae22 | |
parent | b48cf655522e632ab0193cd80b615a30cffe0dc3 (diff) | |
download | cython-faster_pymultiply.tar.gz |
Speed up multiplication of Python numbers with small integers (<= 2**30).faster_pymultiply
-rw-r--r-- | CHANGES.rst | 2 | ||||
-rw-r--r-- | Cython/Compiler/Optimize.py | 3 | ||||
-rw-r--r-- | Cython/Utility/Optimize.c | 45 | ||||
-rw-r--r-- | tests/run/consts.pyx | 11 | ||||
-rw-r--r-- | tests/run/mulop.pyx | 166 |
5 files changed, 214 insertions, 13 deletions
diff --git a/CHANGES.rst b/CHANGES.rst index cdbf4234c..8d41b86ff 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -17,6 +17,8 @@ Features added * Properties can be defined for external extension types. Patch by Matti Picus. (Github issue #2640) +* Multiplication of Python numbers with small constant integers is faster. + * The attributes ``gen.gi_frame`` and ``coro.cr_frame`` of Cython compiled generators and coroutines now return an actual frame object for introspection. (Github issue #2306) diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py index a577facf1..fd7ef75c7 100644 --- a/Cython/Compiler/Optimize.py +++ b/Cython/Compiler/Optimize.py @@ -3177,6 +3177,9 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin, def _handle_simple_method_object___sub__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Subtract', node, function, args, is_unbound_method) + def _handle_simple_method_object___mul__(self, node, function, args, is_unbound_method): + return self._optimise_num_binop('Multiply', node, function, args, is_unbound_method) + def _handle_simple_method_object___eq__(self, node, function, args, is_unbound_method): return self._optimise_num_binop('Eq', node, function, args, is_unbound_method) diff --git a/Cython/Utility/Optimize.c b/Cython/Utility/Optimize.c index 6209fe814..db249063c 100644 --- a/Cython/Utility/Optimize.c +++ b/Cython/Utility/Optimize.c @@ -806,7 +806,7 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op {{py: slot_name = {'TrueDivide': 'true_divide', 'FloorDivide': 'floor_divide'}.get(op, op.lower()) }} {{py: c_op = { - 'Add': '+', 'Subtract': '-', 'Remainder': '%', 'TrueDivide': '/', 'FloorDivide': '/', + 'Add': '+', 'Subtract': '-', 'Multiply': '*', 'Remainder': '%', 'TrueDivide': '/', 'FloorDivide': '/', 'Or': '|', 'Xor': '^', 'And': '&', 'Rshift': '>>', 'Lshift': '<<', 'Eq': '==', 'Ne': '!=', }[op] @@ -868,6 +868,19 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op if (likely(b < (long) (sizeof(long)*8) && a == (a << b) >> b) || !a) { return PyInt_FromLong(a {{c_op}} b); } + {{elif c_op == '*'}} +#ifdef HAVE_LONG_LONG + if (sizeof(PY_LONG_LONG) > sizeof(long)) { + PY_LONG_LONG result = (PY_LONG_LONG)a {{c_op}} (PY_LONG_LONG)b; + return (result >= LONG_MIN && result <= LONG_MAX) ? + PyInt_FromLong((long)result) : PyLong_FromLongLong(result); + } +#endif +#if CYTHON_USE_TYPE_SLOTS + return PyInt_Type.tp_as_number->nb_{{slot_name}}(op1, op2); +#else + return PyNumber_{{op}}(op1, op2); +#endif {{else}} // other operations are safe, no overflow return PyInt_FromLong(a {{c_op}} b); @@ -893,13 +906,13 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op return PyLong_FromLong(likely(size) ? digits[0] & intval : 0); } {{endif}} - // special cases for 0: + - % / // | ^ & >> << + // special cases for 0: + - * % / // | ^ & >> << if (unlikely(size == 0)) { {{if order == 'CObj' and c_op in '+-|^>><<'}} // x == x+0 == x-0 == x|0 == x^0 == x>>0 == x<<0 return __Pyx_NewRef(op1); - {{elif order == 'CObj' and c_op in '&'}} - // 0 == x&0 + {{elif order == 'CObj' and c_op in '*&'}} + // 0 == x*0 == x&0 return __Pyx_NewRef(op2); {{elif order == 'ObjC' and c_op in '+|^'}} // x == 0+x == 0|x == 0^x @@ -907,8 +920,8 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op {{elif order == 'ObjC' and c_op == '-'}} // -x == 0-x return PyLong_FromLong(-intval); - {{elif order == 'ObjC' and (c_op in '%&>><<' or op == 'FloorDivide')}} - // 0 == 0%x == 0&x == 0>>x == 0<<x == 0//x + {{elif order == 'ObjC' and (c_op in '*%&>><<' or op == 'FloorDivide')}} + // 0 == 0*x == 0%x == 0&x == 0>>x == 0<<x == 0//x return __Pyx_NewRef(op1); {{endif}} } @@ -921,15 +934,15 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op {{for _size in range(2, 5)}} {{for _case in (-_size, _size)}} case {{_case}}: - if (8 * sizeof(long) - 1 > {{_size}} * PyLong_SHIFT{{if op == 'TrueDivide'}} && {{_size-1}} * PyLong_SHIFT < 53{{endif}}) { + if (8 * sizeof(long) - 1 > {{_size}} * PyLong_SHIFT{{if c_op == '*'}}+30{{endif}}{{if op == 'TrueDivide'}} && {{_size-1}} * PyLong_SHIFT < 53{{endif}}) { {{ival}} = {{'-' if _case < 0 else ''}}(long) {{pylong_join(_size, 'digits')}}; break; {{if op not in ('Eq', 'Ne', 'TrueDivide')}} -#ifdef HAVE_LONG_LONG - } else if (8 * sizeof(PY_LONG_LONG) - 1 > {{_size}} * PyLong_SHIFT) { + #ifdef HAVE_LONG_LONG + } else if (8 * sizeof(PY_LONG_LONG) - 1 > {{_size}} * PyLong_SHIFT{{if c_op == '*'}}+30{{endif}}) { ll{{ival}} = {{'-' if _case < 0 else ''}}(PY_LONG_LONG) {{pylong_join(_size, 'digits', 'unsigned PY_LONG_LONG')}}; goto long_long; -#endif + #endif {{endif}} } // if size doesn't fit into a long or PY_LONG_LONG anymore, fall through to default @@ -958,7 +971,15 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op {{return_false}}; } {{else}} - {{if c_op == '%'}} + {{if c_op == '*'}} + (void)a; (void)b; + #ifdef HAVE_LONG_LONG + ll{{ival}} = {{ival}}; + goto long_long; + #else + return PyLong_Type.tp_as_number->nb_{{slot_name}}(op1, op2); + #endif + {{elif c_op == '%'}} // see ExprNodes.py :: mod_int_utility_code x = a % b; x += ((x != 0) & ((x ^ b) < 0)) * b; @@ -1021,7 +1042,7 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op } #endif - {{if c_op in '+-' or op in ('TrueDivide', 'Eq', 'Ne')}} + {{if c_op in '+-*' or op in ('TrueDivide', 'Eq', 'Ne')}} if (PyFloat_CheckExact({{pyval}})) { const long {{'a' if order == 'CObj' else 'b'}} = intval; double {{ival}} = PyFloat_AS_DOUBLE({{pyval}}); diff --git a/tests/run/consts.pyx b/tests/run/consts.pyx index 37772eaef..5f70e3ac2 100644 --- a/tests/run/consts.pyx +++ b/tests/run/consts.pyx @@ -156,8 +156,17 @@ def multiplied_lists_nonconst_left(x): """ return x * [1,2,3] + +@cython.test_fail_if_path_exists("//MulNode") +def multiplied_nonconst_list_const_int(x): + """ + >>> multiplied_nonconst_list_const_int(2) + [1, 2, 3, 1, 2, 3] + """ + return [1,x,3] * 2 + + @cython.test_fail_if_path_exists("//MulNode//ListNode") -@cython.test_assert_path_exists("//MulNode") def multiplied_lists_nonconst_expression(x): """ >>> multiplied_lists_nonconst_expression(5) == [1,2,3] * (5 * 2) diff --git a/tests/run/mulop.pyx b/tests/run/mulop.pyx new file mode 100644 index 000000000..0fba7dba9 --- /dev/null +++ b/tests/run/mulop.pyx @@ -0,0 +1,166 @@ +# mode: run +# tag: multiply + +import sys +IS_PY2 = sys.version_info[0] < 3 + + +def print_long(x): + if IS_PY2: + x = str(x).rstrip('L') + print(x) + + +def mul_10_obj(x): + """ + >>> mul_10_obj(0) + 0 + >>> mul_10_obj(10) + 100 + >>> mul_10_obj(-10) + -100 + >>> 10 * (2**14) + 163840 + >>> mul_10_obj(2**14) + 163840 + >>> mul_10_obj(-2**14) + -163840 + >>> print_long(10 * (2**29)) + 5368709120 + >>> print_long(mul_10_obj(2**29)) + 5368709120 + >>> print_long(mul_10_obj(-2**29)) + -5368709120 + >>> print_long(10 * (2**30)) + 10737418240 + >>> print_long(mul_10_obj(2**30)) + 10737418240 + >>> print_long(mul_10_obj(-2**30)) + -10737418240 + >>> print_long(10 * (2**63)) + 92233720368547758080 + >>> print_long(mul_10_obj(2**63)) + 92233720368547758080 + >>> print_long(mul_10_obj(-2**63)) + -92233720368547758080 + >>> print_long(10 * (2**128)) + 3402823669209384634633746074317682114560 + >>> print_long(mul_10_obj(2**128)) + 3402823669209384634633746074317682114560 + >>> print_long(mul_10_obj(-2**128)) + -3402823669209384634633746074317682114560 + """ + result = 10 * x + return result + + +def mul_obj_10(x): + """ + >>> mul_obj_10(0) + 0 + >>> mul_obj_10(10) + 100 + >>> mul_obj_10(-10) + -100 + >>> 10 * (2**14) + 163840 + >>> mul_obj_10(2**14) + 163840 + >>> mul_obj_10(-2**14) + -163840 + >>> print_long(10 * (2**29)) + 5368709120 + >>> print_long(mul_obj_10(2**29)) + 5368709120 + >>> print_long(mul_obj_10(-2**29)) + -5368709120 + >>> print_long(10 * (2**30)) + 10737418240 + >>> print_long(mul_obj_10(2**30)) + 10737418240 + >>> print_long(mul_obj_10(-2**30)) + -10737418240 + >>> print_long(10 * (2**63)) + 92233720368547758080 + >>> print_long(mul_obj_10(2**63)) + 92233720368547758080 + >>> print_long(mul_obj_10(-2**63)) + -92233720368547758080 + >>> print_long(10 * (2**128)) + 3402823669209384634633746074317682114560 + >>> print_long(mul_obj_10(2**128)) + 3402823669209384634633746074317682114560 + >>> print_long(mul_obj_10(-2**128)) + -3402823669209384634633746074317682114560 + """ + result = x * 10 + return result + + +def mul_bigint_obj(x): + """ + >>> mul_bigint_obj(0) + 0 + >>> print_long(mul_bigint_obj(1)) + 536870912 + >>> print_long(mul_bigint_obj(2)) + 1073741824 + >>> print_long(mul_bigint_obj(2**29)) + 288230376151711744 + >>> print_long(mul_bigint_obj(-2**29)) + -288230376151711744 + >>> print_long(mul_bigint_obj(2**30)) + 576460752303423488 + >>> print_long(mul_bigint_obj(-2**30)) + -576460752303423488 + >>> print_long(mul_bigint_obj(2**59)) + 309485009821345068724781056 + >>> print_long(mul_bigint_obj(-2**59)) + -309485009821345068724781056 + """ + result = (2**29) * x + return result + + +def mul_obj_float(x): + """ + >>> mul_obj_float(-0.0) + -0.0 + >>> mul_obj_float(0) + 0.0 + >>> mul_obj_float(1.0) + 2.0 + >>> mul_obj_float(-2.0) + -4.0 + >>> mul_obj_float(-0.5) + -1.0 + """ + result = x * 2.0 + return result + + +def mul_float_obj(x): + """ + >>> mul_float_obj(0) + 0.0 + >>> mul_float_obj(2) + 4.0 + >>> mul_float_obj(-2) + -4.0 + >>> 2.0 * (2**30-1) + 2147483646.0 + >>> mul_float_obj(2**30-1) + 2147483646.0 + >>> mul_float_obj(-(2**30-1)) + -2147483646.0 + >>> mul_float_obj(-0.0) + -0.0 + >>> mul_float_obj(1.0) + 2.0 + >>> mul_float_obj(-2.0) + -4.0 + >>> mul_float_obj(-0.5) + -1.0 + """ + result = 2.0 * x + return result |