summaryrefslogtreecommitdiff
path: root/numpy/core/src/scalarmathmodule.c.src
diff options
context:
space:
mode:
authorRaul Cota <rcota@hotmail.com>2013-01-22 11:32:26 -0700
committerRaul Cota <rcota@hotmail.com>2013-01-22 11:32:26 -0700
commitcc70e5a0db562c96a4813f95990be72357591465 (patch)
tree4b6742e7f09b523af6d874ccc4a759081efaa822 /numpy/core/src/scalarmathmodule.c.src
parentce289ab57cd5249c41c6166c478b9d054fca4ddf (diff)
downloadnumpy-cc70e5a0db562c96a4813f95990be72357591465.tar.gz
Avoid conversion to NumPy Scalar
After profiling I noticed that a bottleneck for NumPy scalar operations occurs when trying to extract the underlying C value from a Python float because it first converts the Python scalar into its matching NumPy scalar (e.g. PyFloat -> float64) and then it extracts the C value from the NumPy scalar. For some types, it is a lot faster to just extract the value directly from the Python scalar. I only did for PyFloat in this modified code but the code is laid out such that it can be easily extended to other types such as Integers. I did not do them because I was unsure if there was a special scenario to handle across OS and/or between 32 and 64 bit platforms. The ratio of speed to do different operations are listed below (Old time / New time with modifications). In other words, the bigger the number, the bigger the speed up we get. Tested in Python 2.6 Windows RATIO TEST 1.1 Array * Array 1.1 PyFloat * Array 1.1 Float64 * Array 1.0 PyFloat + Array 1.3 Float64 + Array 1.1 PyFloat * PyFloat 1.0 Float64 * Float64 4.0 PyFloat * Float64 2.9 PyFloat * vector1[1] 3.9 PyFloat + Float64 9.8 PyFloat < Float64 9.9 PyFloat < Float64 1.0 Create array from list 1.0 Assign PyFloat to all 1.0 Assign Float64 to all 4.2 Float64 * pyFloat * pyFloat * pyFloat * pyFloat 1.0 pyFloat * pyFloat * pyFloat * pyFloat * pyFloat 1.0 Float64 * Float64 * Float64 * Float64 * Float64 1.0 Float64 ** 2 1.0 pyFloat ** 2
Diffstat (limited to 'numpy/core/src/scalarmathmodule.c.src')
-rw-r--r--numpy/core/src/scalarmathmodule.c.src65
1 files changed, 61 insertions, 4 deletions
diff --git a/numpy/core/src/scalarmathmodule.c.src b/numpy/core/src/scalarmathmodule.c.src
index 57c610b9e..6fc3f4541 100644
--- a/numpy/core/src/scalarmathmodule.c.src
+++ b/numpy/core/src/scalarmathmodule.c.src
@@ -654,19 +654,19 @@ static void
/**begin repeat
* #name = byte, ubyte, short, ushort, int, uint,
* long, ulong, longlong, ulonglong,
- * half, float, double, longdouble,
+ * half, float, longdouble,
* cfloat, cdouble, clongdouble#
* #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
* npy_long, npy_ulong, npy_longlong, npy_ulonglong,
- * npy_half, npy_float, npy_double, npy_longdouble,
+ * npy_half, npy_float, npy_longdouble,
* npy_cfloat, npy_cdouble, npy_clongdouble#
* #Name = Byte, UByte, Short, UShort, Int, UInt,
* Long, ULong, LongLong, ULongLong,
- * Half, Float, Double, LongDouble,
+ * Half, Float, LongDouble,
* CFloat, CDouble, CLongDouble#
* #TYPE = NPY_BYTE, NPY_UBYTE, NPY_SHORT, NPY_USHORT, NPY_INT, NPY_UINT,
* NPY_LONG, NPY_ULONG, NPY_LONGLONG, NPY_ULONGLONG,
- * NPY_HALF, NPY_FLOAT, NPY_DOUBLE, NPY_LONGDOUBLE,
+ * NPY_HALF, NPY_FLOAT, NPY_LONGDOUBLE,
* NPY_CFLOAT, NPY_CDOUBLE, NPY_CLONGDOUBLE#
*/
@@ -711,6 +711,63 @@ _@name@_convert_to_ctype(PyObject *a, @type@ *arg1)
/**end repeat**/
+/* Same as above but added exact checks against known python types for speed */
+
+/**begin repeat
+ * #name = double#
+ * #type = npy_double#
+ * #Name = Double#
+ * #TYPE = NPY_DOUBLE#
+ * #PYCHECKEXACT = PyFloat_CheckExact#
+ * #PYEXTRACTCTYPE = PyFloat_AS_DOUBLE#
+ */
+
+static int
+_@name@_convert_to_ctype(PyObject *a, @type@ *arg1)
+{
+ PyObject *temp;
+
+ if (@PYCHECKEXACT@(a)){
+ *arg1 = @PYEXTRACTCTYPE@(a);
+ return 0;
+ }
+
+ if (PyArray_IsScalar(a, @Name@)) {
+ *arg1 = PyArrayScalar_VAL(a, @Name@);
+ return 0;
+ }
+ else if (PyArray_IsScalar(a, Generic)) {
+ PyArray_Descr *descr1;
+
+ if (!PyArray_IsScalar(a, Number)) {
+ return -1;
+ }
+ descr1 = PyArray_DescrFromTypeObject((PyObject *)Py_TYPE(a));
+ if (PyArray_CanCastSafely(descr1->type_num, @TYPE@)) {
+ PyArray_CastScalarDirect(a, descr1, arg1, @TYPE@);
+ Py_DECREF(descr1);
+ return 0;
+ }
+ else {
+ Py_DECREF(descr1);
+ return -1;
+ }
+ }
+ else if (PyArray_GetPriority(a, NPY_PRIORITY) > NPY_PRIORITY) {
+ return -2;
+ }
+ else if ((temp = PyArray_ScalarFromObject(a)) != NULL) {
+ int retval = _@name@_convert_to_ctype(temp, arg1);
+
+ Py_DECREF(temp);
+ return retval;
+ }
+ return -2;
+}
+
+/**end repeat**/
+
+
/**begin repeat
* #name = byte, ubyte, short, ushort, int, uint,
* long, ulong, longlong, ulonglong,