Speed up multiplication of Python numbers with small integers (<= 2**30).faster_pymultiply

author: Stefan Behnel <stefan_ml@behnel.de> 2019-01-20 21:05:04 +0100
committer: Stefan Behnel <stefan_ml@behnel.de> 2019-01-22 19:13:35 +0100
commit: 525d348874ee137c97e405e0779cb7ccc8cfb183 (patch)
tree: 63cf8117320424c846593490a5d59d82a92bae22
parent: b48cf655522e632ab0193cd80b615a30cffe0dc3 (diff)
download: cython-faster_pymultiply.tar.gz
5 files changed, 214 insertions, 13 deletions
diff --git a/CHANGES.rst b/CHANGES.rst
index cdbf4234c..8d41b86ff 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -17,6 +17,8 @@ Features added
 * Properties can be defined for external extension types.
   Patch by Matti Picus.  (Github issue #2640)
 
+* Multiplication of Python numbers with small constant integers is faster.
+
 * The attributes ``gen.gi_frame`` and ``coro.cr_frame`` of Cython compiled
   generators and coroutines now return an actual frame object for introspection.
   (Github issue #2306)
diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py
index a577facf1..fd7ef75c7 100644
--- a/Cython/Compiler/Optimize.py
+++ b/Cython/Compiler/Optimize.py
@@ -3177,6 +3177,9 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
     def _handle_simple_method_object___sub__(self, node, function, args, is_unbound_method):
         return self._optimise_num_binop('Subtract', node, function, args, is_unbound_method)
 
+    def _handle_simple_method_object___mul__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Multiply', node, function, args, is_unbound_method)
+
     def _handle_simple_method_object___eq__(self, node, function, args, is_unbound_method):
         return self._optimise_num_binop('Eq', node, function, args, is_unbound_method)
 
diff --git a/Cython/Utility/Optimize.c b/Cython/Utility/Optimize.c
index 6209fe814..db249063c 100644
--- a/Cython/Utility/Optimize.c
+++ b/Cython/Utility/Optimize.c
@@ -806,7 +806,7 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op
 {{py: slot_name = {'TrueDivide': 'true_divide', 'FloorDivide': 'floor_divide'}.get(op, op.lower()) }}
 {{py:
 c_op = {
-    'Add': '+', 'Subtract': '-', 'Remainder': '%', 'TrueDivide': '/', 'FloorDivide': '/',
+    'Add': '+', 'Subtract': '-', 'Multiply': '*', 'Remainder': '%', 'TrueDivide': '/', 'FloorDivide': '/',
     'Or': '|', 'Xor': '^', 'And': '&', 'Rshift': '>>', 'Lshift': '<<',
     'Eq': '==', 'Ne': '!=',
     }[op]
@@ -868,6 +868,19 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op
             if (likely(b < (long) (sizeof(long)*8) && a == (a << b) >> b) || !a) {
                 return PyInt_FromLong(a {{c_op}} b);
             }
+        {{elif c_op == '*'}}
+#ifdef HAVE_LONG_LONG
+            if (sizeof(PY_LONG_LONG) > sizeof(long)) {
+                PY_LONG_LONG result = (PY_LONG_LONG)a {{c_op}} (PY_LONG_LONG)b;
+                return (result >= LONG_MIN && result <= LONG_MAX) ?
+                    PyInt_FromLong((long)result) : PyLong_FromLongLong(result);
+            }
+#endif
+#if CYTHON_USE_TYPE_SLOTS
+            return PyInt_Type.tp_as_number->nb_{{slot_name}}(op1, op2);
+#else
+            return PyNumber_{{op}}(op1, op2);
+#endif
         {{else}}
             // other operations are safe, no overflow
             return PyInt_FromLong(a {{c_op}} b);
@@ -893,13 +906,13 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op
             return PyLong_FromLong(likely(size) ? digits[0] & intval : 0);
         }
         {{endif}}
-        // special cases for 0: + - % / // | ^ & >> <<
+        // special cases for 0: + - * % / // | ^ & >> <<
         if (unlikely(size == 0)) {
             {{if order == 'CObj' and c_op in '+-|^>><<'}}
             // x == x+0 == x-0 == x|0 == x^0 == x>>0 == x<<0
             return __Pyx_NewRef(op1);
-            {{elif order == 'CObj' and c_op in '&'}}
-            // 0 == x&0
+            {{elif order == 'CObj' and c_op in '*&'}}
+            // 0 == x*0 == x&0
             return __Pyx_NewRef(op2);
             {{elif order == 'ObjC' and c_op in '+|^'}}
             // x == 0+x == 0|x == 0^x
@@ -907,8 +920,8 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op
             {{elif order == 'ObjC' and c_op == '-'}}
             // -x == 0-x
             return PyLong_FromLong(-intval);
-            {{elif order == 'ObjC' and (c_op in '%&>><<' or op == 'FloorDivide')}}
-            // 0 == 0%x == 0&x == 0>>x == 0<<x == 0//x
+            {{elif order == 'ObjC' and (c_op in '*%&>><<' or op == 'FloorDivide')}}
+            // 0 == 0*x == 0%x == 0&x == 0>>x == 0<<x == 0//x
             return __Pyx_NewRef(op1);
             {{endif}}
         }
@@ -921,15 +934,15 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op
                 {{for _size in range(2, 5)}}
                 {{for _case in (-_size, _size)}}
                 case {{_case}}:
-                    if (8 * sizeof(long) - 1 > {{_size}} * PyLong_SHIFT{{if op == 'TrueDivide'}} && {{_size-1}} * PyLong_SHIFT < 53{{endif}}) {
+                    if (8 * sizeof(long) - 1 > {{_size}} * PyLong_SHIFT{{if c_op == '*'}}+30{{endif}}{{if op == 'TrueDivide'}} && {{_size-1}} * PyLong_SHIFT < 53{{endif}}) {
                         {{ival}} = {{'-' if _case < 0 else ''}}(long) {{pylong_join(_size, 'digits')}};
                         break;
                     {{if op not in ('Eq', 'Ne', 'TrueDivide')}}
-#ifdef HAVE_LONG_LONG
-                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > {{_size}} * PyLong_SHIFT) {
+                    #ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > {{_size}} * PyLong_SHIFT{{if c_op == '*'}}+30{{endif}}) {
                         ll{{ival}} = {{'-' if _case < 0 else ''}}(PY_LONG_LONG) {{pylong_join(_size, 'digits', 'unsigned PY_LONG_LONG')}};
                         goto long_long;
-#endif
+                    #endif
                     {{endif}}
                     }
                     // if size doesn't fit into a long or PY_LONG_LONG anymore, fall through to default
@@ -958,7 +971,15 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op
                 {{return_false}};
             }
         {{else}}
-            {{if c_op == '%'}}
+            {{if c_op == '*'}}
+                (void)a; (void)b;
+                #ifdef HAVE_LONG_LONG
+                ll{{ival}} = {{ival}};
+                goto long_long;
+                #else
+                return PyLong_Type.tp_as_number->nb_{{slot_name}}(op1, op2);
+                #endif
+            {{elif c_op == '%'}}
                 // see ExprNodes.py :: mod_int_utility_code
                 x = a % b;
                 x += ((x != 0) & ((x ^ b) < 0)) * b;
@@ -1021,7 +1042,7 @@ static {{c_ret_type}} __Pyx_PyInt_{{'' if ret_type.is_pyobject else 'Bool'}}{{op
     }
     #endif
 
-    {{if c_op in '+-' or op in ('TrueDivide', 'Eq', 'Ne')}}
+    {{if c_op in '+-*' or op in ('TrueDivide', 'Eq', 'Ne')}}
     if (PyFloat_CheckExact({{pyval}})) {
         const long {{'a' if order == 'CObj' else 'b'}} = intval;
         double {{ival}} = PyFloat_AS_DOUBLE({{pyval}});
diff --git a/tests/run/consts.pyx b/tests/run/consts.pyx
index 37772eaef..5f70e3ac2 100644
--- a/tests/run/consts.pyx
+++ b/tests/run/consts.pyx
@@ -156,8 +156,17 @@ def multiplied_lists_nonconst_left(x):
     """
     return x * [1,2,3]
 
+
+@cython.test_fail_if_path_exists("//MulNode")
+def multiplied_nonconst_list_const_int(x):
+    """
+    >>> multiplied_nonconst_list_const_int(2)
+    [1, 2, 3, 1, 2, 3]
+    """
+    return [1,x,3] * 2
+
+
 @cython.test_fail_if_path_exists("//MulNode//ListNode")
-@cython.test_assert_path_exists("//MulNode")
 def multiplied_lists_nonconst_expression(x):
     """
     >>> multiplied_lists_nonconst_expression(5) == [1,2,3] * (5 * 2)
diff --git a/tests/run/mulop.pyx b/tests/run/mulop.pyx
new file mode 100644
index 000000000..0fba7dba9
--- /dev/null
+++ b/tests/run/mulop.pyx
@@ -0,0 +1,166 @@
+# mode: run
+# tag: multiply
+
+import sys
+IS_PY2 = sys.version_info[0] < 3
+
+
+def print_long(x):
+    if IS_PY2:
+        x = str(x).rstrip('L')
+    print(x)
+
+
+def mul_10_obj(x):
+    """
+    >>> mul_10_obj(0)
+    0
+    >>> mul_10_obj(10)
+    100
+    >>> mul_10_obj(-10)
+    -100
+    >>> 10 * (2**14)
+    163840
+    >>> mul_10_obj(2**14)
+    163840
+    >>> mul_10_obj(-2**14)
+    -163840
+    >>> print_long(10 * (2**29))
+    5368709120
+    >>> print_long(mul_10_obj(2**29))
+    5368709120
+    >>> print_long(mul_10_obj(-2**29))
+    -5368709120
+    >>> print_long(10 * (2**30))
+    10737418240
+    >>> print_long(mul_10_obj(2**30))
+    10737418240
+    >>> print_long(mul_10_obj(-2**30))
+    -10737418240
+    >>> print_long(10 * (2**63))
+    92233720368547758080
+    >>> print_long(mul_10_obj(2**63))
+    92233720368547758080
+    >>> print_long(mul_10_obj(-2**63))
+    -92233720368547758080
+    >>> print_long(10 * (2**128))
+    3402823669209384634633746074317682114560
+    >>> print_long(mul_10_obj(2**128))
+    3402823669209384634633746074317682114560
+    >>> print_long(mul_10_obj(-2**128))
+    -3402823669209384634633746074317682114560
+    """
+    result = 10 * x
+    return result
+
+
+def mul_obj_10(x):
+    """
+    >>> mul_obj_10(0)
+    0
+    >>> mul_obj_10(10)
+    100
+    >>> mul_obj_10(-10)
+    -100
+    >>> 10 * (2**14)
+    163840
+    >>> mul_obj_10(2**14)
+    163840
+    >>> mul_obj_10(-2**14)
+    -163840
+    >>> print_long(10 * (2**29))
+    5368709120
+    >>> print_long(mul_obj_10(2**29))
+    5368709120
+    >>> print_long(mul_obj_10(-2**29))
+    -5368709120
+    >>> print_long(10 * (2**30))
+    10737418240
+    >>> print_long(mul_obj_10(2**30))
+    10737418240
+    >>> print_long(mul_obj_10(-2**30))
+    -10737418240
+    >>> print_long(10 * (2**63))
+    92233720368547758080
+    >>> print_long(mul_obj_10(2**63))
+    92233720368547758080
+    >>> print_long(mul_obj_10(-2**63))
+    -92233720368547758080
+    >>> print_long(10 * (2**128))
+    3402823669209384634633746074317682114560
+    >>> print_long(mul_obj_10(2**128))
+    3402823669209384634633746074317682114560
+    >>> print_long(mul_obj_10(-2**128))
+    -3402823669209384634633746074317682114560
+    """
+    result = x * 10
+    return result
+
+
+def mul_bigint_obj(x):
+    """
+    >>> mul_bigint_obj(0)
+    0
+    >>> print_long(mul_bigint_obj(1))
+    536870912
+    >>> print_long(mul_bigint_obj(2))
+    1073741824
+    >>> print_long(mul_bigint_obj(2**29))
+    288230376151711744
+    >>> print_long(mul_bigint_obj(-2**29))
+    -288230376151711744
+    >>> print_long(mul_bigint_obj(2**30))
+    576460752303423488
+    >>> print_long(mul_bigint_obj(-2**30))
+    -576460752303423488
+    >>> print_long(mul_bigint_obj(2**59))
+    309485009821345068724781056
+    >>> print_long(mul_bigint_obj(-2**59))
+    -309485009821345068724781056
+    """
+    result = (2**29) * x
+    return result
+
+
+def mul_obj_float(x):
+    """
+    >>> mul_obj_float(-0.0)
+    -0.0
+    >>> mul_obj_float(0)
+    0.0
+    >>> mul_obj_float(1.0)
+    2.0
+    >>> mul_obj_float(-2.0)
+    -4.0
+    >>> mul_obj_float(-0.5)
+    -1.0
+    """
+    result = x * 2.0
+    return result
+
+
+def mul_float_obj(x):
+    """
+    >>> mul_float_obj(0)
+    0.0
+    >>> mul_float_obj(2)
+    4.0
+    >>> mul_float_obj(-2)
+    -4.0
+    >>> 2.0 * (2**30-1)
+    2147483646.0
+    >>> mul_float_obj(2**30-1)
+    2147483646.0
+    >>> mul_float_obj(-(2**30-1))
+    -2147483646.0
+    >>> mul_float_obj(-0.0)
+    -0.0
+    >>> mul_float_obj(1.0)
+    2.0
+    >>> mul_float_obj(-2.0)
+    -4.0
+    >>> mul_float_obj(-0.5)
+    -1.0
+    """
+    result = 2.0 * x
+    return result
author	Stefan Behnel <stefan_ml@behnel.de>	2019-01-20 21:05:04 +0100
committer	Stefan Behnel <stefan_ml@behnel.de>	2019-01-22 19:13:35 +0100
commit	525d348874ee137c97e405e0779cb7ccc8cfb183 (patch)
tree	63cf8117320424c846593490a5d59d82a92bae22
parent	b48cf655522e632ab0193cd80b615a30cffe0dc3 (diff)
download	cython-faster_pymultiply.tar.gz