summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authormattip <matti.picus@gmail.com>2023-01-30 19:54:44 +0200
committermattip <matti.picus@gmail.com>2023-01-30 23:25:01 +0200
commiteb21b25093c79ac0bc135aed3d192c06079c5bbf (patch)
treebfb88814504e8025e56cc38ca77f104cd0b082b6 /numpy
parent7910e2bf14b8bef95c3180fbf667458e29426a0e (diff)
downloadnumpy-eb21b25093c79ac0bc135aed3d192c06079c5bbf.tar.gz
ENH: use an indexed loop if possible in ufunc_at
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/src/umath/dispatching.c4
-rw-r--r--numpy/core/src/umath/loops.c.src6
-rw-r--r--numpy/core/src/umath/loops_arithm_fp.dispatch.c.src2
-rw-r--r--numpy/core/src/umath/loops_arithmetic.dispatch.c.src4
-rw-r--r--numpy/core/src/umath/scalarmath.c.src2
-rw-r--r--numpy/core/src/umath/ufunc_object.c61
6 files changed, 63 insertions, 16 deletions
diff --git a/numpy/core/src/umath/dispatching.c b/numpy/core/src/umath/dispatching.c
index 6d6c481fb..e09568d69 100644
--- a/numpy/core/src/umath/dispatching.c
+++ b/numpy/core/src/umath/dispatching.c
@@ -914,8 +914,8 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
int nin = ufunc->nin, nargs = ufunc->nargs;
/*
- * Get the actual DTypes we operate with by mixing the operand array
- * ones with the passed signature.
+ * Get the actual DTypes we operate with by setting op_dtypes[i] from
+ * signature[i].
*/
for (int i = 0; i < nargs; i++) {
if (signature[i] != NULL) {
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index d4e246783..cd5a431cd 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -547,7 +547,7 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ int
char *ip1 = args[0];
npy_intp *indx = (npy_intp *)args[1];
char *value = args[2];
- npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
+ npy_intp is1 = steps[0], is2 = steps[1] / sizeof(npy_intp), os1 = steps[2];
npy_intp n = dimensions[0];
npy_intp i;
@type@ *indexed;
@@ -1559,7 +1559,7 @@ LONGDOUBLE_@kind@_indexed(char **args, npy_intp const *dimensions, npy_intp cons
char *ip1 = args[0];
npy_intp *indx = (npy_intp *)args[1];
char *value = args[2];
- npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
+ npy_intp is1 = steps[0], is2 = steps[1] / sizeof(npy_intp), os1 = steps[2];
npy_intp n = dimensions[0];
npy_intp i;
npy_longdouble *indexed;
@@ -1681,7 +1681,7 @@ HALF_@kind@_indexed(void *NPY_UNUSED(context), char **args, npy_intp const *dime
char *ip1 = args[0];
npy_intp *indx = (npy_intp *)args[1];
char *value = args[2];
- npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
+ npy_intp is1 = steps[0], is2 = steps[1] / sizeof(npy_intp), os1 = steps[2];
npy_intp n = dimensions[0];
npy_intp i;
npy_half *indexed;
diff --git a/numpy/core/src/umath/loops_arithm_fp.dispatch.c.src b/numpy/core/src/umath/loops_arithm_fp.dispatch.c.src
index 1874573ce..d37facef8 100644
--- a/numpy/core/src/umath/loops_arithm_fp.dispatch.c.src
+++ b/numpy/core/src/umath/loops_arithm_fp.dispatch.c.src
@@ -553,7 +553,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@_indexed)
char *ip1 = args[0];
npy_intp *indx = (npy_intp *)args[1];
char *value = args[2];
- npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
+ npy_intp is1 = steps[0], is2 = steps[1] / sizeof(npy_intp), os1 = steps[2];
npy_intp n = dimensions[0];
npy_intp i;
@type@ *indexed;
diff --git a/numpy/core/src/umath/loops_arithmetic.dispatch.c.src b/numpy/core/src/umath/loops_arithmetic.dispatch.c.src
index 573590b1f..cef2adb34 100644
--- a/numpy/core/src/umath/loops_arithmetic.dispatch.c.src
+++ b/numpy/core/src/umath/loops_arithmetic.dispatch.c.src
@@ -402,7 +402,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(@TYPE@_divide_indexed)
char *ip1 = args[0];
npy_intp *indx = (npy_intp *)args[1];
char *value = args[2];
- npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
+ npy_intp is1 = steps[0], is2 = steps[1] / sizeof(npy_intp), os1 = steps[2];
npy_intp n = dimensions[0];
npy_intp i;
@type@ *indexed;
@@ -488,7 +488,7 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(@TYPE@_divide_indexed)
char *ip1 = args[0];
npy_intp *indx = (npy_intp *)args[1];
char *value = args[2];
- npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2];
+ npy_intp is1 = steps[0], is2 = steps[1] / sizeof(npy_intp), os1 = steps[2];
npy_intp n = dimensions[0];
npy_intp i;
@type@ *indexed;
diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src
index 2e6da3cd2..47d42b899 100644
--- a/numpy/core/src/umath/scalarmath.c.src
+++ b/numpy/core/src/umath/scalarmath.c.src
@@ -453,7 +453,7 @@ static inline int
@name@_ctype_divide(@type@ a, @type@ b, @type@ *out)
{
char *args[3] = {(char *)&a, (char *)&b, (char *)out};
- npy_intp steps[3];
+ npy_intp steps[3] = {0, 0, 0};
npy_intp size = 1;
@TYPE@_divide(args, &size, steps, NULL);
return 0;
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 8739a5095..d088e22de 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -5893,13 +5893,45 @@ new_array_op(PyArrayObject *op_array, char *data)
return (PyArrayObject *)r;
}
+/*
+ * If operands are 1D we can use the indexed loop to do the work
+ * Returns 0 if successful, -1 otherwise
+ */
+static int
+try_trivial_at_loop(PyArrayMethodObject *ufuncimpl, NPY_ARRAYMETHOD_FLAGS flags,
+ PyArrayMapIterObject *iter,
+ PyArrayObject *op1_array, PyArrayObject *op2_array,
+ PyArrayMethod_Context *context)
+{
+ if (PyArray_NDIM(op1_array) != 1) {
+ return -1;
+ }
+ /* check that nop == 3 */
+ if (ufuncimpl->nin >1 && PyArray_NDIM(op2_array) != 1) {
+ return -1;
+ }
+ if (iter->numiter > 1) {
+ return -1;
+ }
+ char *args[3];
+ npy_intp dimensions = iter->size;
+ npy_intp steps[3];
+ args[0] = (char *) iter->baseoffset;
+ args[1] = (char *) iter->outer_ptrs[0];
+ args[2] = (char *)PyArray_DATA(op2_array);
+ steps[0] = iter->fancy_strides[0];
+ steps[1] = iter->outer_strides[0];
+ steps[2] = PyArray_STRIDE(op2_array, 0);
+ ufuncimpl->contiguous_indexed_loop(context, args, &dimensions, steps, NULL);
+ return 0;
+}
+
static int
ufunc_at__fast_iter(PyUFuncObject *ufunc, NPY_ARRAYMETHOD_FLAGS flags,
PyArrayMapIterObject *iter, PyArrayIterObject *iter2,
PyArrayObject *op1_array, PyArrayObject *op2_array,
PyArrayMethod_StridedLoop *strided_loop,
PyArrayMethod_Context *context,
- npy_intp strides[3],
NpyAuxData *auxdata
)
{
@@ -5921,6 +5953,7 @@ ufunc_at__fast_iter(PyUFuncObject *ufunc, NPY_ARRAYMETHOD_FLAGS flags,
NPY_BEGIN_THREADS;
}
+ npy_intp strides[3] = {0, 0, 0};
/*
* Iterate over first and second operands and call ufunc
* for each pair of inputs
@@ -5972,7 +6005,6 @@ ufunc_at__slow_iter(PyUFuncObject *ufunc, NPY_ARRAYMETHOD_FLAGS flags,
PyArray_Descr *operation_descrs[3],
PyArrayMethod_StridedLoop *strided_loop,
PyArrayMethod_Context *context,
- npy_intp strides[3],
NpyAuxData *auxdata
)
{
@@ -6070,6 +6102,7 @@ ufunc_at__slow_iter(PyUFuncObject *ufunc, NPY_ARRAYMETHOD_FLAGS flags,
npy_clear_floatstatus_barrier((char*)&iter);
}
+ npy_intp strides[3] = {0, 0, 0};
if (!needs_api) {
NPY_BEGIN_THREADS;
}
@@ -6305,7 +6338,7 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
}
}
- /*
+ /*
* Create a map iterator (there is no multi-mapiter, so we need at least 2
* iterators: one for the mapping, and another for the second operand)
*/
@@ -6383,11 +6416,25 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
}
}
if (fast_path == 1) {
- res = ufunc_at__fast_iter(ufunc, flags, iter, iter2, op1_array, op2_array,
- strided_loop, &context, strides, auxdata);
+ /*
+ * Try to use trivial loop (1d, no casting, aligned) if
+ * - the matching info has a indexed loop
+ * - all operands are 1d
+ */
+ if (ufuncimpl->contiguous_indexed_loop != NULL) {
+ res = try_trivial_at_loop(ufuncimpl, flags, iter, op1_array,
+ op2_array, &context);
+
+ }
+ if (res == -1) {
+ /* Couldn't use the fastest path, use the faster path */
+ res = ufunc_at__fast_iter(ufunc, flags, iter, iter2, op1_array,
+ op2_array, strided_loop, &context, auxdata);
+ }
} else {
- res = ufunc_at__slow_iter(ufunc, flags, iter, iter2, op1_array, op2_array,
- operation_descrs, strided_loop, &context, strides, auxdata);
+ res = ufunc_at__slow_iter(ufunc, flags, iter, iter2, op1_array,
+ op2_array, operation_descrs, strided_loop, &context,
+ auxdata);
}
fail: