diff options
author | Julian Taylor <jtaylor.debian@googlemail.com> | 2013-10-14 21:55:33 +0200 |
---|---|---|
committer | Julian Taylor <jtaylor.debian@googlemail.com> | 2013-10-15 00:37:50 +0200 |
commit | 3f5ef54558e3546134be32edfdc2ed510a9cc6ca (patch) | |
tree | c8ec10523881eed4b409ee1869589a1a09d4a181 /numpy/core/src | |
parent | 10ce5b29cf1fd507f1ba8d072724388089774ac4 (diff) | |
download | numpy-3f5ef54558e3546134be32edfdc2ed510a9cc6ca.tar.gz |
ENH: add scalarmathmodule.h.src with integer overflow functions
and use them in scalarmathmodule.c instead of the old (disabled) ones.
Diffstat (limited to 'numpy/core/src')
-rw-r--r-- | numpy/core/src/multiarray/common.h | 24 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 1 | ||||
-rw-r--r-- | numpy/core/src/private/scalarmathmodule.h.src | 42 | ||||
-rw-r--r-- | numpy/core/src/scalarmathmodule.c.src | 128 |
5 files changed, 46 insertions, 150 deletions
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index ffb571b2e..f05698b9e 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -84,30 +84,6 @@ npy_is_aligned(const void * p, const npy_uintp alignment) } } -/* - * writes result of a * b into r - * returns 1 if a * b overflowed else returns 0 - */ -static NPY_INLINE int -npy_mul_with_overflow_intp(npy_intp * r, npy_intp a, npy_intp b) -{ - const npy_intp half_sz = (((npy_intp)1 << (sizeof(a) * 8 / 2)) - 1); - - *r = a * b; - - /* - * avoid expensive division on common no overflow case - * could be improved via compiler intrinsics e.g. via clang - * __builtin_mul_with_overflow, gcc __int128 or cpu overflow flags - */ - if (NPY_UNLIKELY((a | b) >= half_sz) && - a != 0 && b > NPY_MAX_INTP / a) { - return 1; - } - - return 0; -} - #include "ucsnarrow.h" #endif diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index bef3feec1..1b7e1c428 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -25,6 +25,7 @@ #include "datetime_strings.h" #include "array_assign.h" #include "mapping.h" /* for array_item_asarray */ +#include "scalarmathmodule.h" /* for npy_mul_with_overflow_intp */ /* * Reading from a file or a string. diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index f0ada8618..ea879c226 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -54,6 +54,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; #include "array_assign.h" #include "common.h" #include "ufunc_override.h" +#include "scalarmathmodule.h" /* for npy_mul_with_overflow_intp */ /* Only here for API compatibility */ NPY_NO_EXPORT PyTypeObject PyBigArray_Type; diff --git a/numpy/core/src/private/scalarmathmodule.h.src b/numpy/core/src/private/scalarmathmodule.h.src new file mode 100644 index 000000000..48507a54b --- /dev/null +++ b/numpy/core/src/private/scalarmathmodule.h.src @@ -0,0 +1,42 @@ +/* + * some overflow checking integer arithmetic + */ +#include <numpy/npy_common.h> + +#ifndef __NPY_SCALARMATHMODULE_H__ +#define __NPY_SCALARMATHMODULE_H__ + +/**begin repeat + * #name = int, uint, long, ulong, + * longlong, ulonglong, intp# + * #type = npy_int, npy_uint, npy_long, npy_ulong, + * npy_longlong, npy_ulonglong, npy_intp# + * #MAX = NPY_MAX_INT, NPY_MAX_UINT, NPY_MAX_LONG, NPY_MAX_ULONG, + * NPY_MAX_LONGLONG, NPY_MAX_ULONGLONG, NPY_MAX_INTP# + */ + +/* + * writes result of a * b into r + * returns 1 if a * b overflowed else returns 0 + */ +static NPY_INLINE int +npy_mul_with_overflow_@name@(@type@ * r, @type@ a, @type@ b) +{ + const @type@ half_sz = (((@type@)1 << (sizeof(a) * 8 / 2)) - 1); + + *r = a * b; + /* + * avoid expensive division on common no overflow case + * could be improved via compiler intrinsics e.g. via clang + * __builtin_mul_with_overflow, gcc __int128 or cpu overflow flags + */ + if (NPY_UNLIKELY((a | b) >= half_sz) && + a != 0 && b > @MAX@ / a) { + return 1; + } + + return 0; +} +/**end repeat**/ + +#endif diff --git a/numpy/core/src/scalarmathmodule.c.src b/numpy/core/src/scalarmathmodule.c.src index d789a3dd4..fac8aa399 100644 --- a/numpy/core/src/scalarmathmodule.c.src +++ b/numpy/core/src/scalarmathmodule.c.src @@ -16,129 +16,7 @@ #include "npy_pycompat.h" #include "numpy/halffloat.h" - -/** numarray adapted routines.... **/ - -/* - * Note that the C standard requires signed/unsigned integral - * types of the same rank to have the same width. - */ - -#if NPY_SIZEOF_LONGLONG == 64 - -static int -ulonglong_overflow(npy_ulonglong a, npy_ulonglong b) -{ - npy_ulonglong ah, al, bh, bl, w, x, y, z; - unsigned long long mask = 0xFFFFFFFFL; - - ah = (a >> 32); - al = (a & mask); - bh = (b >> 32); - bl = (b & mask); - - /* 128-bit product: z*2**64 + (x+y)*2**32 + w */ - w = al*bl; - x = bh*al; - y = ah*bl; - z = ah*bh; - - /* *c = ((x + y)<<32) + w; */ - return z || (x >> 32) || (y >> 32) || - (((x & mask) + (y & mask) + (w >> 32)) >> 32); -} - -static int -slonglong_overflow(npy_longlong a0, npy_longlong b0) -{ - npy_ulonglong a, b; - npy_ulonglong ah, al, bh, bl, w, x, y, z; - long long mask = 0xFFFFFFFFL; - - a = (a0 < 0) ? -a0 : a0; - b = (b0 < 0) ? -b0 : b0; - - ah = (a >> 32); - al = (a & mask); - bh = (b >> 32); - bl = (b & mask); - - w = al*bl; - x = bh*al; - y = ah*bl; - z = ah*bh; - - return z || (x >> 31) || (y >> 31) || - (((x & mask) + (y & mask) + (w >> 32)) >> 31); -} - -#elif NPY_SIZEOF_LONGLONG == 128 - -static int -ulonglong_overflow(npy_ulonglong a, npy_ulonglong b) -{ - npy_ulonglong ah, al, bh, bl, w, x, y, z; - unsigned long long mask = 0xFFFFFFFFFFFFFFFFL; - - ah = (a >> 64); - al = (a & mask); - bh = (b >> 64); - bl = (b & mask); - - /* 128-bit product: z*2**64 + (x+y)*2**32 + w */ - w = al*bl; - x = bh*al; - y = ah*bl; - z = ah*bh; - - /* *c = ((x + y)<<32) + w; */ - return z || (x >> 64) || (y >> 64) || - (((x & mask) + (y & mask) + (w >> 64)) >> 64); -} - -static int -slonglong_overflow(npy_longlong a0, npy_longlong b0) -{ - npy_ulonglong a, b; - npy_ulonglong ah, al, bh, bl, w, x, y, z; - long long mask = 0xFFFFFFFFFFFFFFFFL; - - a = (a0 < 0) ? -a0 : a0; - b = (b0 < 0) ? -b0 : b0; - - ah = (a >> 64); - al = (a & mask); - bh = (b >> 64); - bl = (b & mask); - - w = al*bl; - x = bh*al; - y = ah*bl; - z = ah*bh; - - return z || (x >> 63) || (y >> 63) || - (((x & mask) + (y & mask) + (w >> 64)) >> 63); -} - -#else - -static int -ulonglong_overflow(npy_ulonglong NPY_UNUSED(a), npy_ulonglong NPY_UNUSED(b)) -{ - return 0; -} - -static int -slonglong_overflow(npy_longlong NPY_UNUSED(a0), npy_longlong NPY_UNUSED(b0)) -{ - return 0; -} - -#endif - - -/** end direct numarray code **/ - +#include "scalarmathmodule.h" /* Basic operations: * @@ -245,13 +123,11 @@ static void * #type = npy_int, npy_uint, npy_long, npy_ulong, * npy_longlong, npy_ulonglong# * #SIZE = INT*2, LONG*2, LONGLONG*2# - * #char = (s, u)*3# */ #if NPY_SIZEOF_LONGLONG == NPY_SIZEOF_@SIZE@ static void @name@_ctype_multiply(@type@ a, @type@ b, @type@ *out) { - *out = a * b; - if (@char@longlong_overflow(a, b)) { + if (npy_mul_with_overflow_@name@(out, a, b)) { npy_set_floatstatus_overflow(); } return; |