summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2014-11-29 11:19:22 -0500
committerCharles Harris <charlesr.harris@gmail.com>2014-11-29 11:19:22 -0500
commit0afa5fc05032cacae38280063544ca5d315d6d1d (patch)
treed6e82eac6d0ee3f0f623f6fe58633088a9ac6655
parent710be5b4c61aded0d92a057bf488d71af86869f1 (diff)
parent668668600d0097d26b41861f9dd3cdbe24cf7472 (diff)
downloadnumpy-0afa5fc05032cacae38280063544ca5d315d6d1d.tar.gz
Merge pull request #5304 from juliantaylor/mul-overflow
ENH: use gcc intrinsic for overflow checked multiplication
-rw-r--r--numpy/core/setup_common.py1
-rw-r--r--numpy/core/src/private/templ_common.h.src6
2 files changed, 5 insertions, 2 deletions
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index e51797c03..0b18bc6c6 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -118,6 +118,7 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'),
("__builtin_bswap32", '5u'),
("__builtin_bswap64", '5u'),
("__builtin_expect", '5, 0'),
+ ("__builtin_mul_overflow", '5, 5, (int*)5'),
("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE
("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2
]
diff --git a/numpy/core/src/private/templ_common.h.src b/numpy/core/src/private/templ_common.h.src
index 8b71f2c3e..dd6c7bf23 100644
--- a/numpy/core/src/private/templ_common.h.src
+++ b/numpy/core/src/private/templ_common.h.src
@@ -21,13 +21,14 @@
static NPY_INLINE int
npy_mul_with_overflow_@name@(@type@ * r, @type@ a, @type@ b)
{
+#ifdef HAVE___BUILTIN_MUL_OVERFLOW
+ return __builtin_mul_overflow(a, b, r);
+#else
const @type@ half_sz = (((@type@)1 << (sizeof(a) * 8 / 2)) - 1);
*r = a * b;
/*
* avoid expensive division on common no overflow case
- * could be improved via compiler intrinsics e.g. via clang
- * __builtin_mul_with_overflow, gcc __int128 or cpu overflow flags
*/
if (NPY_UNLIKELY((a | b) >= half_sz) &&
a != 0 && b > @MAX@ / a) {
@@ -35,6 +36,7 @@ npy_mul_with_overflow_@name@(@type@ * r, @type@ a, @type@ b)
}
return 0;
+#endif
}
/**end repeat**/