summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathaniel J. Smith <njs@pobox.com>2016-03-17 18:47:27 +0000
committerNathaniel J. Smith <njs@pobox.com>2016-03-17 18:47:27 +0000
commit4d0875aa31355066c7acf2b8de1bbb366fc46d18 (patch)
tree738d41ca44c4e3d3f0e722e34ac84aa10403f725
parentfdfc4807cbe2de71f74053c45ae0de383332c4e1 (diff)
parent19655d1d80f299bd76982d485b74aa8b4b0018d6 (diff)
downloadnumpy-4d0875aa31355066c7acf2b8de1bbb366fc46d18.tar.gz
Merge pull request #7198 from seberg/unlock_gil_gufuncs
ENH: Unlock the GIL for gufuncs
-rw-r--r--doc/release/1.12.0-notes.rst5
-rw-r--r--numpy/core/src/umath/ufunc_object.c19
2 files changed, 24 insertions, 0 deletions
diff --git a/doc/release/1.12.0-notes.rst b/doc/release/1.12.0-notes.rst
index 35ce3ae4a..f55573156 100644
--- a/doc/release/1.12.0-notes.rst
+++ b/doc/release/1.12.0-notes.rst
@@ -147,6 +147,11 @@ masked out when the reduce method was used. The new identity is -1, which
should work properly on twos complement machines as all bits will be set to
one.
+Generalized Ufuncs will now unlock the GIL
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Generalized Ufuncs, including most of the linalg module, will now unlock
+the Python global interpreter lock.
+
Changes
=======
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index c4a2b8560..6eb0aae55 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -1951,6 +1951,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
npy_intp iter_shape[NPY_MAXARGS];
NpyIter *iter = NULL;
npy_uint32 iter_flags;
+ npy_intp total_problem_size;
/* These parameters come from extobj= or from a TLS global */
int buffersize = 0, errormask = 0;
@@ -2346,6 +2347,16 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
}
}
+ total_problem_size = NpyIter_GetIterSize(iter);
+ if (total_problem_size < 0) {
+ /*
+ * Only used for threading, if negative (this means that it is
+ * larger then ssize_t before axes removal) assume that the actual
+ * problem is large enough to be threaded usefully.
+ */
+ total_problem_size = 1000;
+ }
+
/* Remove all the core output dimensions from the iterator */
for (i = broadcast_ndim; i < iter_ndim; ++i) {
if (NpyIter_RemoveAxis(iter, broadcast_ndim) != NPY_SUCCEED) {
@@ -2387,6 +2398,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
NpyIter_IterNextFunc *iternext;
char **dataptr;
npy_intp *count_ptr;
+ NPY_BEGIN_THREADS_DEF;
/* Get the variables needed for the loop */
iternext = NpyIter_GetIterNext(iter, NULL);
@@ -2397,10 +2409,17 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
dataptr = NpyIter_GetDataPtrArray(iter);
count_ptr = NpyIter_GetInnerLoopSizePtr(iter);
+ if (!needs_api && !NpyIter_IterationNeedsAPI(iter)) {
+ NPY_BEGIN_THREADS_THRESHOLDED(total_problem_size);
+ }
do {
inner_dimensions[0] = *count_ptr;
innerloop(dataptr, inner_dimensions, inner_strides, innerloopdata);
} while (iternext(iter));
+
+ if (!needs_api && !NpyIter_IterationNeedsAPI(iter)) {
+ NPY_END_THREADS;
+ }
} else {
/**
* For each output operand, check if it has non-zero size,