summaryrefslogtreecommitdiff
path: root/numpy/core/src
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2013-10-14 10:58:47 -0700
committerCharles Harris <charlesr.harris@gmail.com>2013-10-14 10:58:47 -0700
commit681025d3f601dd2ac4ec3b24f728b4e319218c8d (patch)
tree44a0b4a2a43365c3dc51de441aa2700e5df0ba35 /numpy/core/src
parent4dbf3fe9ae3c0f06dd129646baa7a4f6e498291d (diff)
parent19d344bfd4c0b348e6c978a6c90a1da4372d8f82 (diff)
downloadnumpy-681025d3f601dd2ac4ec3b24f728b4e319218c8d.tar.gz
Merge pull request #3820 from juliantaylor/microopt
A couple micro optimizations
Diffstat (limited to 'numpy/core/src')
-rw-r--r--numpy/core/src/multiarray/common.c13
-rw-r--r--numpy/core/src/multiarray/common.h23
-rw-r--r--numpy/core/src/multiarray/ctors.c9
-rw-r--r--numpy/core/src/multiarray/multiarraymodule.c5
4 files changed, 33 insertions, 17 deletions
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 5a7a58b33..4e2d64be3 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -675,7 +675,8 @@ _zerofill(PyArrayObject *ret)
NPY_NO_EXPORT int
_IsAligned(PyArrayObject *ap)
{
- unsigned int i, aligned = 1;
+ unsigned int i;
+ npy_uintp aligned;
const unsigned int alignment = PyArray_DESCR(ap)->alignment;
/* The special casing for STRING and VOID types was removed
@@ -688,24 +689,24 @@ _IsAligned(PyArrayObject *ap)
if (alignment == 1) {
return 1;
}
- aligned = npy_is_aligned(PyArray_DATA(ap), alignment);
+ aligned = (npy_uintp)PyArray_DATA(ap);
for (i = 0; i < PyArray_NDIM(ap); i++) {
#if NPY_RELAXED_STRIDES_CHECKING
+ /* skip dim == 1 as it is not required to have stride 0 */
if (PyArray_DIM(ap, i) > 1) {
/* if shape[i] == 1, the stride is never used */
- aligned &= npy_is_aligned((void*)PyArray_STRIDES(ap)[i],
- alignment);
+ aligned |= (npy_uintp)PyArray_STRIDES(ap)[i];
}
else if (PyArray_DIM(ap, i) == 0) {
/* an array with zero elements is always aligned */
return 1;
}
#else /* not NPY_RELAXED_STRIDES_CHECKING */
- aligned &= npy_is_aligned((void*)PyArray_STRIDES(ap)[i], alignment);
+ aligned |= (npy_uintp)PyArray_STRIDES(ap)[i];
#endif /* not NPY_RELAXED_STRIDES_CHECKING */
}
- return aligned != 0;
+ return npy_is_aligned(aligned, alignment);
}
NPY_NO_EXPORT npy_bool
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index 9df644210..ffb571b2e 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -84,6 +84,29 @@ npy_is_aligned(const void * p, const npy_uintp alignment)
}
}
+/*
+ * writes result of a * b into r
+ * returns 1 if a * b overflowed else returns 0
+ */
+static NPY_INLINE int
+npy_mul_with_overflow_intp(npy_intp * r, npy_intp a, npy_intp b)
+{
+ const npy_intp half_sz = (((npy_intp)1 << (sizeof(a) * 8 / 2)) - 1);
+
+ *r = a * b;
+
+ /*
+ * avoid expensive division on common no overflow case
+ * could be improved via compiler intrinsics e.g. via clang
+ * __builtin_mul_with_overflow, gcc __int128 or cpu overflow flags
+ */
+ if (NPY_UNLIKELY((a | b) >= half_sz) &&
+ a != 0 && b > NPY_MAX_INTP / a) {
+ return 1;
+ }
+
+ return 0;
+}
#include "ucsnarrow.h"
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index d0b75b47a..bef3feec1 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -888,7 +888,6 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
PyArrayObject_fields *fa;
int i;
size_t sd;
- npy_intp largest;
npy_intp size;
if (descr->subarray) {
@@ -937,7 +936,6 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
}
}
- largest = NPY_MAX_INTP / sd;
for (i = 0; i < nd; i++) {
npy_intp dim = dims[i];
@@ -960,17 +958,14 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd,
/*
* Care needs to be taken to avoid integer overflow when
* multiplying the dimensions together to get the total size of the
- * array. Hence before each multiplication we first check that the
- * product will not exceed the maximum allowable size.
+ * array.
*/
- if (dim > largest) {
+ if (npy_mul_with_overflow_intp(&size, size, dim)) {
PyErr_SetString(PyExc_ValueError,
"array is too big.");
Py_DECREF(descr);
return NULL;
}
- size *= dim;
- largest /= dim;
}
fa = (PyArrayObject_fields *) subtype->tp_alloc(subtype, 0);
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 2eccd8153..f0ada8618 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -115,7 +115,6 @@ NPY_NO_EXPORT npy_intp
PyArray_OverflowMultiplyList(npy_intp *l1, int n)
{
npy_intp prod = 1;
- npy_intp imax = NPY_MAX_INTP;
int i;
for (i = 0; i < n; i++) {
@@ -124,11 +123,9 @@ PyArray_OverflowMultiplyList(npy_intp *l1, int n)
if (dim == 0) {
return 0;
}
- if (dim > imax) {
+ if (npy_mul_with_overflow_intp(&prod, prod, dim)) {
return -1;
}
- imax /= dim;
- prod *= dim;
}
return prod;
}