diff options
author | Nathaniel J. Smith <njs@pobox.com> | 2016-03-17 18:47:27 +0000 |
---|---|---|
committer | Nathaniel J. Smith <njs@pobox.com> | 2016-03-17 18:47:27 +0000 |
commit | 4d0875aa31355066c7acf2b8de1bbb366fc46d18 (patch) | |
tree | 738d41ca44c4e3d3f0e722e34ac84aa10403f725 | |
parent | fdfc4807cbe2de71f74053c45ae0de383332c4e1 (diff) | |
parent | 19655d1d80f299bd76982d485b74aa8b4b0018d6 (diff) | |
download | numpy-4d0875aa31355066c7acf2b8de1bbb366fc46d18.tar.gz |
Merge pull request #7198 from seberg/unlock_gil_gufuncs
ENH: Unlock the GIL for gufuncs
-rw-r--r-- | doc/release/1.12.0-notes.rst | 5 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 19 |
2 files changed, 24 insertions, 0 deletions
diff --git a/doc/release/1.12.0-notes.rst b/doc/release/1.12.0-notes.rst index 35ce3ae4a..f55573156 100644 --- a/doc/release/1.12.0-notes.rst +++ b/doc/release/1.12.0-notes.rst @@ -147,6 +147,11 @@ masked out when the reduce method was used. The new identity is -1, which should work properly on twos complement machines as all bits will be set to one. +Generalized Ufuncs will now unlock the GIL +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Generalized Ufuncs, including most of the linalg module, will now unlock +the Python global interpreter lock. + Changes ======= diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index c4a2b8560..6eb0aae55 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -1951,6 +1951,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, npy_intp iter_shape[NPY_MAXARGS]; NpyIter *iter = NULL; npy_uint32 iter_flags; + npy_intp total_problem_size; /* These parameters come from extobj= or from a TLS global */ int buffersize = 0, errormask = 0; @@ -2346,6 +2347,16 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, } } + total_problem_size = NpyIter_GetIterSize(iter); + if (total_problem_size < 0) { + /* + * Only used for threading, if negative (this means that it is + * larger then ssize_t before axes removal) assume that the actual + * problem is large enough to be threaded usefully. + */ + total_problem_size = 1000; + } + /* Remove all the core output dimensions from the iterator */ for (i = broadcast_ndim; i < iter_ndim; ++i) { if (NpyIter_RemoveAxis(iter, broadcast_ndim) != NPY_SUCCEED) { @@ -2387,6 +2398,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, NpyIter_IterNextFunc *iternext; char **dataptr; npy_intp *count_ptr; + NPY_BEGIN_THREADS_DEF; /* Get the variables needed for the loop */ iternext = NpyIter_GetIterNext(iter, NULL); @@ -2397,10 +2409,17 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, dataptr = NpyIter_GetDataPtrArray(iter); count_ptr = NpyIter_GetInnerLoopSizePtr(iter); + if (!needs_api && !NpyIter_IterationNeedsAPI(iter)) { + NPY_BEGIN_THREADS_THRESHOLDED(total_problem_size); + } do { inner_dimensions[0] = *count_ptr; innerloop(dataptr, inner_dimensions, inner_strides, innerloopdata); } while (iternext(iter)); + + if (!needs_api && !NpyIter_IterationNeedsAPI(iter)) { + NPY_END_THREADS; + } } else { /** * For each output operand, check if it has non-zero size, |