ENH: add scalarmathmodule.h.src with integer overflow functions

and use them in scalarmathmodule.c instead of the old (disabled) ones.
author: Julian Taylor <jtaylor.debian@googlemail.com> 2013-10-14 21:55:33 +0200
committer: Julian Taylor <jtaylor.debian@googlemail.com> 2013-10-15 00:37:50 +0200
commit: 3f5ef54558e3546134be32edfdc2ed510a9cc6ca (patch)
tree: c8ec10523881eed4b409ee1869589a1a09d4a181 /numpy/core/src
parent: 10ce5b29cf1fd507f1ba8d072724388089774ac4 (diff)
download: numpy-3f5ef54558e3546134be32edfdc2ed510a9cc6ca.tar.gz
5 files changed, 46 insertions, 150 deletions
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index ffb571b2e..f05698b9e 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -84,30 +84,6 @@ npy_is_aligned(const void * p, const npy_uintp alignment)
     }
 }
 
-/*
- * writes result of a * b into r
- * returns 1 if a * b overflowed else returns 0
- */
-static NPY_INLINE int
-npy_mul_with_overflow_intp(npy_intp * r, npy_intp a, npy_intp b)
-{
-    const npy_intp half_sz = (((npy_intp)1 << (sizeof(a) * 8 / 2)) - 1);
-
-    *r = a * b;
-
-    /*
-     * avoid expensive division on common no overflow case
-     * could be improved via compiler intrinsics e.g. via clang
-     * __builtin_mul_with_overflow, gcc __int128 or cpu overflow flags
-     */
-    if (NPY_UNLIKELY((a | b) >= half_sz) &&
-        a != 0 && b > NPY_MAX_INTP / a) {
-        return 1;
-    }
-
-    return 0;
-}
-
 #include "ucsnarrow.h"
 
 #endif
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index bef3feec1..1b7e1c428 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -25,6 +25,7 @@
 #include "datetime_strings.h"
 #include "array_assign.h"
 #include "mapping.h" /* for array_item_asarray */
+#include "scalarmathmodule.h" /* for npy_mul_with_overflow_intp */
 
 /*
  * Reading from a file or a string.
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index f0ada8618..ea879c226 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -54,6 +54,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
 #include "array_assign.h"
 #include "common.h"
 #include "ufunc_override.h"
+#include "scalarmathmodule.h" /* for npy_mul_with_overflow_intp */
 
 /* Only here for API compatibility */
 NPY_NO_EXPORT PyTypeObject PyBigArray_Type;
diff --git a/numpy/core/src/private/scalarmathmodule.h.src b/numpy/core/src/private/scalarmathmodule.h.src
new file mode 100644
index 000000000..48507a54b
--- /dev/null
+++ b/numpy/core/src/private/scalarmathmodule.h.src
@@ -0,0 +1,42 @@
+/*
+ * some overflow checking integer arithmetic
+ */
+#include <numpy/npy_common.h>
+
+#ifndef __NPY_SCALARMATHMODULE_H__
+#define __NPY_SCALARMATHMODULE_H__
+
+/**begin repeat
+ *  #name = int, uint, long, ulong,
+ *          longlong, ulonglong, intp#
+ *  #type = npy_int, npy_uint, npy_long, npy_ulong,
+ *          npy_longlong, npy_ulonglong, npy_intp#
+ *  #MAX = NPY_MAX_INT, NPY_MAX_UINT, NPY_MAX_LONG, NPY_MAX_ULONG,
+ *         NPY_MAX_LONGLONG, NPY_MAX_ULONGLONG, NPY_MAX_INTP#
+ */
+
+/*
+ * writes result of a * b into r
+ * returns 1 if a * b overflowed else returns 0
+ */
+static NPY_INLINE int
+npy_mul_with_overflow_@name@(@type@ * r, @type@ a, @type@ b)
+{
+    const @type@ half_sz = (((@type@)1 << (sizeof(a) * 8 / 2)) - 1);
+
+    *r = a * b;
+    /*
+     * avoid expensive division on common no overflow case
+     * could be improved via compiler intrinsics e.g. via clang
+     * __builtin_mul_with_overflow, gcc __int128 or cpu overflow flags
+     */
+    if (NPY_UNLIKELY((a | b) >= half_sz) &&
+        a != 0 && b > @MAX@ / a) {
+        return 1;
+    }
+
+    return 0;
+}
+/**end repeat**/
+
+#endif
diff --git a/numpy/core/src/scalarmathmodule.c.src b/numpy/core/src/scalarmathmodule.c.src
index d789a3dd4..fac8aa399 100644
--- a/numpy/core/src/scalarmathmodule.c.src
+++ b/numpy/core/src/scalarmathmodule.c.src
@@ -16,129 +16,7 @@
 #include "npy_pycompat.h"
 
 #include "numpy/halffloat.h"
-
-/** numarray adapted routines.... **/
-
-/*
- * Note that the C standard requires signed/unsigned integral
- * types of the same rank to have the same width.
- */
-
-#if NPY_SIZEOF_LONGLONG == 64
-
-static int
-ulonglong_overflow(npy_ulonglong a, npy_ulonglong b)
-{
-    npy_ulonglong ah, al, bh, bl, w, x, y, z;
-    unsigned long long mask = 0xFFFFFFFFL;
-
-    ah = (a >> 32);
-    al = (a & mask);
-    bh = (b >> 32);
-    bl = (b & mask);
-
-    /* 128-bit product:  z*2**64 + (x+y)*2**32 + w  */
-    w = al*bl;
-    x = bh*al;
-    y = ah*bl;
-    z = ah*bh;
-
-    /* *c = ((x + y)<<32) + w; */
-    return z || (x >> 32) || (y >> 32) ||
-        (((x & mask) + (y & mask) + (w >> 32)) >> 32);
-}
-
-static int
-slonglong_overflow(npy_longlong a0, npy_longlong b0)
-{
-    npy_ulonglong a, b;
-    npy_ulonglong ah, al, bh, bl, w, x, y, z;
-    long long mask = 0xFFFFFFFFL;
-
-    a = (a0 < 0) ? -a0 : a0;
-    b = (b0 < 0) ? -b0 : b0;
-
-    ah = (a >> 32);
-    al = (a & mask);
-    bh = (b >> 32);
-    bl = (b & mask);
-
-    w = al*bl;
-    x = bh*al;
-    y = ah*bl;
-    z = ah*bh;
-
-    return z || (x >> 31) || (y >> 31) ||
-        (((x & mask) + (y & mask) + (w >> 32)) >> 31);
-}
-
-#elif NPY_SIZEOF_LONGLONG == 128
-
-static int
-ulonglong_overflow(npy_ulonglong a, npy_ulonglong b)
-{
-    npy_ulonglong ah, al, bh, bl, w, x, y, z;
-    unsigned long long mask = 0xFFFFFFFFFFFFFFFFL;
-
-    ah = (a >> 64);
-    al = (a & mask);
-    bh = (b >> 64);
-    bl = (b & mask);
-
-    /* 128-bit product:  z*2**64 + (x+y)*2**32 + w  */
-    w = al*bl;
-    x = bh*al;
-    y = ah*bl;
-    z = ah*bh;
-
-    /* *c = ((x + y)<<32) + w; */
-    return z || (x >> 64) || (y >> 64) ||
-        (((x & mask) + (y & mask) + (w >> 64)) >> 64);
-}
-
-static int
-slonglong_overflow(npy_longlong a0, npy_longlong b0)
-{
-    npy_ulonglong a, b;
-    npy_ulonglong ah, al, bh, bl, w, x, y, z;
-    long long mask = 0xFFFFFFFFFFFFFFFFL;
-
-    a = (a0 < 0) ? -a0 : a0;
-    b = (b0 < 0) ? -b0 : b0;
-
-    ah = (a >> 64);
-    al = (a & mask);
-    bh = (b >> 64);
-    bl = (b & mask);
-
-    w = al*bl;
-    x = bh*al;
-    y = ah*bl;
-    z = ah*bh;
-
-    return z || (x >> 63) || (y >> 63) ||
-        (((x & mask) + (y & mask) + (w >> 64)) >> 63);
-}
-
-#else
-
-static int
-ulonglong_overflow(npy_ulonglong NPY_UNUSED(a), npy_ulonglong NPY_UNUSED(b))
-{
-        return 0;
-}
-
-static int
-slonglong_overflow(npy_longlong NPY_UNUSED(a0), npy_longlong NPY_UNUSED(b0))
-{
-    return 0;
-}
-
-#endif
-
-
-/** end direct numarray code **/
-
+#include "scalarmathmodule.h"
 
 /* Basic operations:
  *
@@ -245,13 +123,11 @@ static void
  * #type = npy_int, npy_uint, npy_long, npy_ulong,
  *         npy_longlong, npy_ulonglong#
  * #SIZE = INT*2, LONG*2, LONGLONG*2#
- * #char = (s, u)*3#
  */
 #if NPY_SIZEOF_LONGLONG == NPY_SIZEOF_@SIZE@
 static void
 @name@_ctype_multiply(@type@ a, @type@ b, @type@ *out) {
-    *out = a * b;
-    if (@char@longlong_overflow(a, b)) {
+    if (npy_mul_with_overflow_@name@(out, a, b)) {
         npy_set_floatstatus_overflow();
     }
     return;
author	Julian Taylor <jtaylor.debian@googlemail.com>	2013-10-14 21:55:33 +0200
committer	Julian Taylor <jtaylor.debian@googlemail.com>	2013-10-15 00:37:50 +0200
commit	3f5ef54558e3546134be32edfdc2ed510a9cc6ca (patch)
tree	c8ec10523881eed4b409ee1869589a1a09d4a181 /numpy/core/src
parent	10ce5b29cf1fd507f1ba8d072724388089774ac4 (diff)
download	numpy-3f5ef54558e3546134be32edfdc2ed510a9cc6ca.tar.gz