diff options
author | Raul Cota <rcota@hotmail.com> | 2013-01-22 11:32:26 -0700 |
---|---|---|
committer | Raul Cota <rcota@hotmail.com> | 2013-01-22 11:32:26 -0700 |
commit | cc70e5a0db562c96a4813f95990be72357591465 (patch) | |
tree | 4b6742e7f09b523af6d874ccc4a759081efaa822 /numpy/core/src/scalarmathmodule.c.src | |
parent | ce289ab57cd5249c41c6166c478b9d054fca4ddf (diff) | |
download | numpy-cc70e5a0db562c96a4813f95990be72357591465.tar.gz |
Avoid conversion to NumPy Scalar
After profiling I noticed that a bottleneck for NumPy scalar operations
occurs when trying to extract the underlying C value from a Python float
because it first converts the Python scalar into its matching NumPy
scalar (e.g. PyFloat -> float64) and then it extracts the C value from
the NumPy scalar.
For some types, it is a lot faster to just extract the value directly
from the Python scalar.
I only did for PyFloat in this modified code but the code is laid out
such that it can be easily extended to other types such as Integers. I
did not do them because I was unsure if there was a special scenario to
handle across OS and/or between 32 and 64 bit platforms. The ratio of
speed to do different operations are listed below (Old time / New time
with modifications). In other words, the bigger the number, the bigger
the speed up we get.
Tested in Python 2.6 Windows
RATIO TEST
1.1 Array * Array
1.1 PyFloat * Array
1.1 Float64 * Array
1.0 PyFloat + Array
1.3 Float64 + Array
1.1 PyFloat * PyFloat
1.0 Float64 * Float64
4.0 PyFloat * Float64
2.9 PyFloat * vector1[1]
3.9 PyFloat + Float64
9.8 PyFloat < Float64
9.9 PyFloat < Float64
1.0 Create array from list
1.0 Assign PyFloat to all
1.0 Assign Float64 to all
4.2 Float64 * pyFloat * pyFloat * pyFloat * pyFloat
1.0 pyFloat * pyFloat * pyFloat * pyFloat * pyFloat
1.0 Float64 * Float64 * Float64 * Float64 * Float64
1.0 Float64 ** 2
1.0 pyFloat ** 2
Diffstat (limited to 'numpy/core/src/scalarmathmodule.c.src')
-rw-r--r-- | numpy/core/src/scalarmathmodule.c.src | 65 |
1 files changed, 61 insertions, 4 deletions
diff --git a/numpy/core/src/scalarmathmodule.c.src b/numpy/core/src/scalarmathmodule.c.src index 57c610b9e..6fc3f4541 100644 --- a/numpy/core/src/scalarmathmodule.c.src +++ b/numpy/core/src/scalarmathmodule.c.src @@ -654,19 +654,19 @@ static void /**begin repeat * #name = byte, ubyte, short, ushort, int, uint, * long, ulong, longlong, ulonglong, - * half, float, double, longdouble, + * half, float, longdouble, * cfloat, cdouble, clongdouble# * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, * npy_long, npy_ulong, npy_longlong, npy_ulonglong, - * npy_half, npy_float, npy_double, npy_longdouble, + * npy_half, npy_float, npy_longdouble, * npy_cfloat, npy_cdouble, npy_clongdouble# * #Name = Byte, UByte, Short, UShort, Int, UInt, * Long, ULong, LongLong, ULongLong, - * Half, Float, Double, LongDouble, + * Half, Float, LongDouble, * CFloat, CDouble, CLongDouble# * #TYPE = NPY_BYTE, NPY_UBYTE, NPY_SHORT, NPY_USHORT, NPY_INT, NPY_UINT, * NPY_LONG, NPY_ULONG, NPY_LONGLONG, NPY_ULONGLONG, - * NPY_HALF, NPY_FLOAT, NPY_DOUBLE, NPY_LONGDOUBLE, + * NPY_HALF, NPY_FLOAT, NPY_LONGDOUBLE, * NPY_CFLOAT, NPY_CDOUBLE, NPY_CLONGDOUBLE# */ @@ -711,6 +711,63 @@ _@name@_convert_to_ctype(PyObject *a, @type@ *arg1) /**end repeat**/ +/* Same as above but added exact checks against known python types for speed */ + +/**begin repeat + * #name = double# + * #type = npy_double# + * #Name = Double# + * #TYPE = NPY_DOUBLE# + * #PYCHECKEXACT = PyFloat_CheckExact# + * #PYEXTRACTCTYPE = PyFloat_AS_DOUBLE# + */ + +static int +_@name@_convert_to_ctype(PyObject *a, @type@ *arg1) +{ + PyObject *temp; + + if (@PYCHECKEXACT@(a)){ + *arg1 = @PYEXTRACTCTYPE@(a); + return 0; + } + + if (PyArray_IsScalar(a, @Name@)) { + *arg1 = PyArrayScalar_VAL(a, @Name@); + return 0; + } + else if (PyArray_IsScalar(a, Generic)) { + PyArray_Descr *descr1; + + if (!PyArray_IsScalar(a, Number)) { + return -1; + } + descr1 = PyArray_DescrFromTypeObject((PyObject *)Py_TYPE(a)); + if (PyArray_CanCastSafely(descr1->type_num, @TYPE@)) { + PyArray_CastScalarDirect(a, descr1, arg1, @TYPE@); + Py_DECREF(descr1); + return 0; + } + else { + Py_DECREF(descr1); + return -1; + } + } + else if (PyArray_GetPriority(a, NPY_PRIORITY) > NPY_PRIORITY) { + return -2; + } + else if ((temp = PyArray_ScalarFromObject(a)) != NULL) { + int retval = _@name@_convert_to_ctype(temp, arg1); + + Py_DECREF(temp); + return retval; + } + return -2; +} + +/**end repeat**/ + + /**begin repeat * #name = byte, ubyte, short, ushort, int, uint, * long, ulong, longlong, ulonglong, |