diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2014-08-17 15:10:08 -0600 |
---|---|---|
committer | Julian Taylor <jtaylor.debian@googlemail.com> | 2014-09-04 21:45:48 +0200 |
commit | 93feabba42a6c18a66224dae170ecee08e0f10f9 (patch) | |
tree | 01d327659e1f20c1a29fcbb8b55a194662228cb9 | |
parent | fee3c926fbd48f477121e95b86ede32fcdb30ea9 (diff) | |
download | numpy-93feabba42a6c18a66224dae170ecee08e0f10f9.tar.gz |
ENH: Move dotblas_innerproduct down into multiarray.
Move the dotblas_innerproduct function from the _dotblas.c file to a new
cblasfuncs.c file in multiarray and rename it cblas_innerproduct.
Modify it so that it can called directly from PyArray_InnerProduct and
do so. Fix up numeric.py and the tests to reflect these changes.
-rw-r--r-- | numpy/core/blasdot/_dotblas.c | 289 | ||||
-rw-r--r-- | numpy/core/numeric.py | 5 | ||||
-rw-r--r-- | numpy/core/src/multiarray/cblasfuncs.c | 83 | ||||
-rw-r--r-- | numpy/core/src/multiarray/cblasfuncs.h | 3 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 12 | ||||
-rw-r--r-- | numpy/core/tests/test_blasdot.py | 1 |
6 files changed, 48 insertions, 345 deletions
diff --git a/numpy/core/blasdot/_dotblas.c b/numpy/core/blasdot/_dotblas.c index 4fc645358..7d7526626 100644 --- a/numpy/core/blasdot/_dotblas.c +++ b/numpy/core/blasdot/_dotblas.c @@ -9,7 +9,6 @@ #include "numpy/arrayobject.h" #include "npy_config.h" #include "npy_pycompat.h" -#include "ufunc_override.h" #ifndef CBLAS_HEADER #define CBLAS_HEADER "cblas.h" #endif @@ -49,75 +48,6 @@ static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0}; /* - * Helper: dispatch to appropriate cblas_?gemm for typenum. - */ -static void -gemm(int typenum, enum CBLAS_ORDER order, - enum CBLAS_TRANSPOSE transA, enum CBLAS_TRANSPOSE transB, - int m, int n, int k, - PyArrayObject *A, int lda, PyArrayObject *B, int ldb, PyArrayObject *R) -{ - const void *Adata = PyArray_DATA(A), *Bdata = PyArray_DATA(B); - void *Rdata = PyArray_DATA(R); - - int ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1; - - switch (typenum) { - case NPY_DOUBLE: - cblas_dgemm(order, transA, transB, m, n, k, 1., - Adata, lda, Bdata, ldb, 0., Rdata, ldc); - break; - case NPY_FLOAT: - cblas_sgemm(order, transA, transB, m, n, k, 1.f, - Adata, lda, Bdata, ldb, 0.f, Rdata, ldc); - break; - case NPY_CDOUBLE: - cblas_zgemm(order, transA, transB, m, n, k, oneD, - Adata, lda, Bdata, ldb, zeroD, Rdata, ldc); - break; - case NPY_CFLOAT: - cblas_cgemm(order, transA, transB, m, n, k, oneF, - Adata, lda, Bdata, ldb, zeroF, Rdata, ldc); - break; - } -} - - -/* - * Helper: dispatch to appropriate cblas_?gemv for typenum. - */ -static void -gemv(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, - PyArrayObject *A, int lda, PyArrayObject *X, int incX, - PyArrayObject *R) -{ - const void *Adata = PyArray_DATA(A), *Xdata = PyArray_DATA(X); - void *Rdata = PyArray_DATA(R); - - int m = PyArray_DIM(A, 0), n = PyArray_DIM(A, 1); - - switch (typenum) { - case NPY_DOUBLE: - cblas_dgemv(order, trans, m, n, 1., Adata, lda, Xdata, incX, - 0., Rdata, 1); - break; - case NPY_FLOAT: - cblas_sgemv(order, trans, m, n, 1.f, Adata, lda, Xdata, incX, - 0.f, Rdata, 1); - break; - case NPY_CDOUBLE: - cblas_zgemv(order, trans, m, n, oneD, Adata, lda, Xdata, incX, - zeroD, Rdata, 1); - break; - case NPY_CFLOAT: - cblas_cgemv(order, trans, m, n, oneF, Adata, lda, Xdata, incX, - zeroF, Rdata, 1); - break; - } -} - - -/* * Initialize oldFunctions table. */ static void @@ -148,222 +78,6 @@ init_oldFunctions(void) typedef enum {_scalar, _column, _row, _matrix} MatrixShape; -static MatrixShape -_select_matrix_shape(PyArrayObject *array) -{ - switch (PyArray_NDIM(array)) { - case 0: - return _scalar; - case 1: - if (PyArray_DIM(array, 0) > 1) - return _column; - return _scalar; - case 2: - if (PyArray_DIM(array, 0) > 1) { - if (PyArray_DIM(array, 1) == 1) - return _column; - else - return _matrix; - } - if (PyArray_DIM(array, 1) == 1) - return _scalar; - return _row; - } - return _matrix; -} - - -/* - * This also makes sure that the data segment is aligned with - * an itemsize address as well by returning one if not true. - */ -static int -_bad_strides(PyArrayObject *ap) -{ - int itemsize = PyArray_ITEMSIZE(ap); - int i, N=PyArray_NDIM(ap); - npy_intp *strides = PyArray_STRIDES(ap); - - if (((npy_intp)(PyArray_DATA(ap)) % itemsize) != 0) { - return 1; - } - for (i = 0; i < N; i++) { - if ((strides[i] < 0) || (strides[i] % itemsize) != 0) { - return 1; - } - } - - return 0; -} - -/* - * innerproduct(a,b) - * - * Returns the inner product of a and b for arrays of - * floating point types. Like the generic NumPy equivalent the product - * sum is over the last dimension of a and b. - * NB: The first argument is not conjugated. - */ - -static PyObject * -dotblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) -{ - PyObject *op1, *op2; - PyArrayObject *ap1, *ap2, *ret; - int j, l, lda, ldb; - int typenum, nd; - npy_intp dimensions[NPY_MAXDIMS]; - PyTypeObject *subtype; - double prior1, prior2; - - if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) { - return NULL; - } - - /* - * Inner product using the BLAS. The product sum is taken along the last - * dimensions of the two arrays. - * Only speeds things up for float double and complex types. - */ - - - typenum = PyArray_ObjectType(op1, 0); - typenum = PyArray_ObjectType(op2, typenum); - - /* This function doesn't handle other types */ - if ((typenum != NPY_DOUBLE && typenum != NPY_CDOUBLE && - typenum != NPY_FLOAT && typenum != NPY_CFLOAT)) { - return PyArray_Return((PyArrayObject *)PyArray_InnerProduct(op1, op2)); - } - - ret = NULL; - ap1 = (PyArrayObject *)PyArray_ContiguousFromObject(op1, typenum, 0, 0); - if (ap1 == NULL) { - return NULL; - } - ap2 = (PyArrayObject *)PyArray_ContiguousFromObject(op2, typenum, 0, 0); - if (ap2 == NULL) { - goto fail; - } - - if ((PyArray_NDIM(ap1) > 2) || (PyArray_NDIM(ap2) > 2)) { - /* This function doesn't handle dimensions greater than 2 */ - ret = (PyArrayObject *)PyArray_InnerProduct((PyObject *)ap1, - (PyObject *)ap2); - Py_DECREF(ap1); - Py_DECREF(ap2); - return PyArray_Return(ret); - } - - if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) { - /* One of ap1 or ap2 is a scalar */ - if (PyArray_NDIM(ap1) == 0) { - /* Make ap2 the scalar */ - PyArrayObject *t = ap1; - ap1 = ap2; - ap2 = t; - } - for (l = 1, j = 0; j < PyArray_NDIM(ap1); j++) { - dimensions[j] = PyArray_DIM(ap1, j); - l *= dimensions[j]; - } - nd = PyArray_NDIM(ap1); - } - else { - /* - * (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2) - * Both ap1 and ap2 are vectors or matrices - */ - l = PyArray_DIM(ap1, PyArray_NDIM(ap1)-1); - - if (PyArray_DIM(ap2, PyArray_NDIM(ap2)-1) != l) { - PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); - goto fail; - } - nd = PyArray_NDIM(ap1)+PyArray_NDIM(ap2)-2; - - if (nd == 1) - dimensions[0] = (PyArray_NDIM(ap1) == 2) ? PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 0); - else if (nd == 2) { - dimensions[0] = PyArray_DIM(ap1, 0); - dimensions[1] = PyArray_DIM(ap2, 0); - } - } - - /* Choose which subtype to return */ - prior2 = PyArray_GetPriority((PyObject *)ap2, 0.0); - prior1 = PyArray_GetPriority((PyObject *)ap1, 0.0); - subtype = (prior2 > prior1 ? Py_TYPE(ap2) : Py_TYPE(ap1)); - - ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions, - typenum, NULL, NULL, 0, 0, - (PyObject *)\ - (prior2 > prior1 ? ap2 : ap1)); - - if (ret == NULL) { - goto fail; - } - NPY_BEGIN_ALLOW_THREADS - memset(PyArray_DATA(ret), 0, PyArray_NBYTES(ret)); - - if (PyArray_NDIM(ap2) == 0) { - /* Multiplication by a scalar -- Level 1 BLAS */ - if (typenum == NPY_DOUBLE) { - cblas_daxpy(l, *((double *)PyArray_DATA(ap2)), (double *)PyArray_DATA(ap1), 1, - (double *)PyArray_DATA(ret), 1); - } - else if (typenum == NPY_CDOUBLE) { - cblas_zaxpy(l, (double *)PyArray_DATA(ap2), (double *)PyArray_DATA(ap1), 1, - (double *)PyArray_DATA(ret), 1); - } - else if (typenum == NPY_FLOAT) { - cblas_saxpy(l, *((float *)PyArray_DATA(ap2)), (float *)PyArray_DATA(ap1), 1, - (float *)PyArray_DATA(ret), 1); - } - else if (typenum == NPY_CFLOAT) { - cblas_caxpy(l, (float *)PyArray_DATA(ap2), (float *)PyArray_DATA(ap1), 1, - (float *)PyArray_DATA(ret), 1); - } - } - else if (PyArray_NDIM(ap1) == 1 && PyArray_NDIM(ap2) == 1) { - /* Dot product between two vectors -- Level 1 BLAS */ - blas_dot(typenum, l, PyArray_DATA(ap1), PyArray_ITEMSIZE(ap1), - PyArray_DATA(ap2), PyArray_ITEMSIZE(ap2), PyArray_DATA(ret)); - } - else if (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 1) { - /* Matrix-vector multiplication -- Level 2 BLAS */ - lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1); - gemv(typenum, CblasRowMajor, CblasNoTrans, ap1, lda, ap2, 1, ret); - } - else if (PyArray_NDIM(ap1) == 1 && PyArray_NDIM(ap2) == 2) { - /* Vector matrix multiplication -- Level 2 BLAS */ - lda = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1); - gemv(typenum, CblasRowMajor, CblasNoTrans, ap2, lda, ap1, 1, ret); - } - else { - /* - * (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 2) - * Matrix matrix multiplication -- Level 3 BLAS - */ - lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1); - ldb = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1); - gemm(typenum, CblasRowMajor, CblasNoTrans, CblasTrans, - PyArray_DIM(ap1, 0), PyArray_DIM(ap2, 0), PyArray_DIM(ap1, 1), - ap1, lda, ap2, ldb, ret); - } - NPY_END_ALLOW_THREADS - Py_DECREF(ap1); - Py_DECREF(ap2); - return PyArray_Return(ret); - - fail: - Py_XDECREF(ap1); - Py_XDECREF(ap2); - Py_XDECREF(ret); - return NULL; -} - - /* * vdot(a,b) * @@ -473,9 +187,6 @@ static PyObject *dotblas_vdot(PyObject *NPY_UNUSED(dummy), PyObject *args) { } static struct PyMethodDef dotblas_module_methods[] = { - {"inner", - (PyCFunction)dotblas_innerproduct, - 1, NULL}, {"vdot", (PyCFunction)dotblas_vdot, 1, NULL}, diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index af53ac417..8db3705f7 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -4,7 +4,7 @@ import os import sys import warnings import collections -from . import multiarray +from numpy.core import multiarray from . import umath from .umath import (invert, sin, UFUNC_BUFSIZE_DEFAULT, ERR_IGNORE, ERR_WARN, ERR_RAISE, ERR_CALL, ERR_PRINT, ERR_LOG, @@ -1084,7 +1084,7 @@ try: os.environ['openblas_main_free'] = '1' if 'gotoblas_main_free' not in os.environ: os.environ['gotoblas_main_free'] = '1' - from ._dotblas import vdot, inner + from ._dotblas import vdot except ImportError: # docstrings are in add_newdocs.py inner = multiarray.inner @@ -1096,6 +1096,7 @@ finally: del envbak dot = multiarray.dot +inner = multiarray.inner def alterdot(): warnings.warn("alterdot no longer does anything.", DeprecationWarning) diff --git a/numpy/core/src/multiarray/cblasfuncs.c b/numpy/core/src/multiarray/cblasfuncs.c index 3de9d0838..36504c3f9 100644 --- a/numpy/core/src/multiarray/cblasfuncs.c +++ b/numpy/core/src/multiarray/cblasfuncs.c @@ -665,7 +665,6 @@ fail: return NULL; } -#if 0 /* * innerproduct(a,b) @@ -674,57 +673,24 @@ fail: * floating point types. Like the generic NumPy equivalent the product * sum is over the last dimension of a and b. * NB: The first argument is not conjugated. + * + * This is for use by PyArray_InnerProduct. It is assumed on entry that the + * arrays ap1 and ap2 have a common data type given by typenum that is + * float, double, cfloat, or cdouble and have dimension <= 2, and have the + * contiguous flag set. The * __numpy_ufunc__ nonsense is also assumed to + * have been taken care of. */ NPY_NO_EXPORT PyObject * -cblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) +cblas_innerproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2) { - PyObject *op1, *op2; - PyArrayObject *ap1, *ap2, *ret; int j, l, lda, ldb; - int typenum, nd; + int nd; + double prior1, prior2; + PyArrayObject *ret; npy_intp dimensions[NPY_MAXDIMS]; PyTypeObject *subtype; - double prior1, prior2; - - if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) { - return NULL; - } - /* - * Inner product using the BLAS. The product sum is taken along the last - * dimensions of the two arrays. - * Only speeds things up for float double and complex types. - */ - - - typenum = PyArray_ObjectType(op1, 0); - typenum = PyArray_ObjectType(op2, typenum); - - /* This function doesn't handle other types */ - if ((typenum != NPY_DOUBLE && typenum != NPY_CDOUBLE && - typenum != NPY_FLOAT && typenum != NPY_CFLOAT)) { - return PyArray_Return((PyArrayObject *)PyArray_InnerProduct(op1, op2)); - } - - ret = NULL; - ap1 = (PyArrayObject *)PyArray_ContiguousFromObject(op1, typenum, 0, 0); - if (ap1 == NULL) { - return NULL; - } - ap2 = (PyArrayObject *)PyArray_ContiguousFromObject(op2, typenum, 0, 0); - if (ap2 == NULL) { - goto fail; - } - - if ((PyArray_NDIM(ap1) > 2) || (PyArray_NDIM(ap2) > 2)) { - /* This function doesn't handle dimensions greater than 2 */ - ret = (PyArrayObject *)PyArray_InnerProduct((PyObject *)ap1, - (PyObject *)ap2); - Py_DECREF(ap1); - Py_DECREF(ap2); - return PyArray_Return(ret); - } if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) { /* One of ap1 or ap2 is a scalar */ @@ -748,7 +714,8 @@ cblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) l = PyArray_DIM(ap1, PyArray_NDIM(ap1)-1); if (PyArray_DIM(ap2, PyArray_NDIM(ap2)-1) != l) { - PyErr_SetString(PyExc_ValueError, "matrices are not aligned"); + PyErr_SetString(PyExc_ValueError, + "matrices are not aligned"); goto fail; } nd = PyArray_NDIM(ap1)+PyArray_NDIM(ap2)-2; @@ -769,7 +736,8 @@ cblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions, typenum, NULL, NULL, 0, 0, - (PyObject *)(prior2 > prior1 ? ap2 : ap1)); + (PyObject *) + (prior2 > prior1 ? ap2 : ap1)); if (ret == NULL) { goto fail; @@ -780,26 +748,36 @@ cblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) if (PyArray_NDIM(ap2) == 0) { /* Multiplication by a scalar -- Level 1 BLAS */ if (typenum == NPY_DOUBLE) { - cblas_daxpy(l, *((double *)PyArray_DATA(ap2)), (double *)PyArray_DATA(ap1), 1, + cblas_daxpy(l, + *((double *)PyArray_DATA(ap2)), + (double *)PyArray_DATA(ap1), 1, (double *)PyArray_DATA(ret), 1); } else if (typenum == NPY_CDOUBLE) { - cblas_zaxpy(l, (double *)PyArray_DATA(ap2), (double *)PyArray_DATA(ap1), 1, + cblas_zaxpy(l, + (double *)PyArray_DATA(ap2), + (double *)PyArray_DATA(ap1), 1, (double *)PyArray_DATA(ret), 1); } else if (typenum == NPY_FLOAT) { - cblas_saxpy(l, *((float *)PyArray_DATA(ap2)), (float *)PyArray_DATA(ap1), 1, + cblas_saxpy(l, + *((float *)PyArray_DATA(ap2)), + (float *)PyArray_DATA(ap1), 1, (float *)PyArray_DATA(ret), 1); } else if (typenum == NPY_CFLOAT) { - cblas_caxpy(l, (float *)PyArray_DATA(ap2), (float *)PyArray_DATA(ap1), 1, + cblas_caxpy(l, + (float *)PyArray_DATA(ap2), + (float *)PyArray_DATA(ap1), 1, (float *)PyArray_DATA(ret), 1); } } else if (PyArray_NDIM(ap1) == 1 && PyArray_NDIM(ap2) == 1) { /* Dot product between two vectors -- Level 1 BLAS */ - blas_dot(typenum, l, PyArray_DATA(ap1), PyArray_ITEMSIZE(ap1), - PyArray_DATA(ap2), PyArray_ITEMSIZE(ap2), PyArray_DATA(ret)); + blas_dot(typenum, l, + PyArray_DATA(ap1), PyArray_ITEMSIZE(ap1), + PyArray_DATA(ap2), PyArray_ITEMSIZE(ap2), + PyArray_DATA(ret)); } else if (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 1) { /* Matrix-vector multiplication -- Level 2 BLAS */ @@ -834,6 +812,7 @@ cblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args) return NULL; } +#if 0 /* * vdot(a,b) diff --git a/numpy/core/src/multiarray/cblasfuncs.h b/numpy/core/src/multiarray/cblasfuncs.h index 66ce4ca5b..d3ec08db6 100644 --- a/numpy/core/src/multiarray/cblasfuncs.h +++ b/numpy/core/src/multiarray/cblasfuncs.h @@ -4,4 +4,7 @@ NPY_NO_EXPORT PyObject * cblas_matrixproduct(int, PyArrayObject *, PyArrayObject *, PyArrayObject *); +NPY_NO_EXPORT PyObject * +cblas_innerproduct(int, PyArrayObject *, PyArrayObject *); + #endif diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 8b8c1b8a7..909559851 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -836,8 +836,18 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2) ap2 = (PyArrayObject *)PyArray_FromAny(op2, typec, 0, 0, NPY_ARRAY_ALIGNED, NULL); if (ap2 == NULL) { - goto fail; + Py_DECREF(ap1); + return NULL; } + +#if defined(HAVE_CBLAS) + if (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2 && + (NPY_DOUBLE == typenum || NPY_CDOUBLE == typenum || + NPY_FLOAT == typenum || NPY_CFLOAT == typenum)) { + return cblas_innerproduct(typenum, ap1, ap2); + } +#endif + if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) { ret = (PyArray_NDIM(ap1) == 0 ? ap1 : ap2); ret = (PyArrayObject *)Py_TYPE(ret)->tp_as_number->nb_multiply( diff --git a/numpy/core/tests/test_blasdot.py b/numpy/core/tests/test_blasdot.py index e37ca711f..9a89396cc 100644 --- a/numpy/core/tests/test_blasdot.py +++ b/numpy/core/tests/test_blasdot.py @@ -28,7 +28,6 @@ except ImportError: def test_blasdot_used(): from numpy.core import vdot, inner assert_(vdot is _dotblas.vdot, "vdot") - assert_(inner is _dotblas.inner, "inner") def test_dot_2args(): |