summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2014-08-17 15:10:08 -0600
committerJulian Taylor <jtaylor.debian@googlemail.com>2014-09-04 21:45:48 +0200
commit93feabba42a6c18a66224dae170ecee08e0f10f9 (patch)
tree01d327659e1f20c1a29fcbb8b55a194662228cb9
parentfee3c926fbd48f477121e95b86ede32fcdb30ea9 (diff)
downloadnumpy-93feabba42a6c18a66224dae170ecee08e0f10f9.tar.gz
ENH: Move dotblas_innerproduct down into multiarray.
Move the dotblas_innerproduct function from the _dotblas.c file to a new cblasfuncs.c file in multiarray and rename it cblas_innerproduct. Modify it so that it can called directly from PyArray_InnerProduct and do so. Fix up numeric.py and the tests to reflect these changes.
-rw-r--r--numpy/core/blasdot/_dotblas.c289
-rw-r--r--numpy/core/numeric.py5
-rw-r--r--numpy/core/src/multiarray/cblasfuncs.c83
-rw-r--r--numpy/core/src/multiarray/cblasfuncs.h3
-rw-r--r--numpy/core/src/multiarray/multiarraymodule.c12
-rw-r--r--numpy/core/tests/test_blasdot.py1
6 files changed, 48 insertions, 345 deletions
diff --git a/numpy/core/blasdot/_dotblas.c b/numpy/core/blasdot/_dotblas.c
index 4fc645358..7d7526626 100644
--- a/numpy/core/blasdot/_dotblas.c
+++ b/numpy/core/blasdot/_dotblas.c
@@ -9,7 +9,6 @@
#include "numpy/arrayobject.h"
#include "npy_config.h"
#include "npy_pycompat.h"
-#include "ufunc_override.h"
#ifndef CBLAS_HEADER
#define CBLAS_HEADER "cblas.h"
#endif
@@ -49,75 +48,6 @@ static const float oneF[2] = {1.0, 0.0}, zeroF[2] = {0.0, 0.0};
/*
- * Helper: dispatch to appropriate cblas_?gemm for typenum.
- */
-static void
-gemm(int typenum, enum CBLAS_ORDER order,
- enum CBLAS_TRANSPOSE transA, enum CBLAS_TRANSPOSE transB,
- int m, int n, int k,
- PyArrayObject *A, int lda, PyArrayObject *B, int ldb, PyArrayObject *R)
-{
- const void *Adata = PyArray_DATA(A), *Bdata = PyArray_DATA(B);
- void *Rdata = PyArray_DATA(R);
-
- int ldc = PyArray_DIM(R, 1) > 1 ? PyArray_DIM(R, 1) : 1;
-
- switch (typenum) {
- case NPY_DOUBLE:
- cblas_dgemm(order, transA, transB, m, n, k, 1.,
- Adata, lda, Bdata, ldb, 0., Rdata, ldc);
- break;
- case NPY_FLOAT:
- cblas_sgemm(order, transA, transB, m, n, k, 1.f,
- Adata, lda, Bdata, ldb, 0.f, Rdata, ldc);
- break;
- case NPY_CDOUBLE:
- cblas_zgemm(order, transA, transB, m, n, k, oneD,
- Adata, lda, Bdata, ldb, zeroD, Rdata, ldc);
- break;
- case NPY_CFLOAT:
- cblas_cgemm(order, transA, transB, m, n, k, oneF,
- Adata, lda, Bdata, ldb, zeroF, Rdata, ldc);
- break;
- }
-}
-
-
-/*
- * Helper: dispatch to appropriate cblas_?gemv for typenum.
- */
-static void
-gemv(int typenum, enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans,
- PyArrayObject *A, int lda, PyArrayObject *X, int incX,
- PyArrayObject *R)
-{
- const void *Adata = PyArray_DATA(A), *Xdata = PyArray_DATA(X);
- void *Rdata = PyArray_DATA(R);
-
- int m = PyArray_DIM(A, 0), n = PyArray_DIM(A, 1);
-
- switch (typenum) {
- case NPY_DOUBLE:
- cblas_dgemv(order, trans, m, n, 1., Adata, lda, Xdata, incX,
- 0., Rdata, 1);
- break;
- case NPY_FLOAT:
- cblas_sgemv(order, trans, m, n, 1.f, Adata, lda, Xdata, incX,
- 0.f, Rdata, 1);
- break;
- case NPY_CDOUBLE:
- cblas_zgemv(order, trans, m, n, oneD, Adata, lda, Xdata, incX,
- zeroD, Rdata, 1);
- break;
- case NPY_CFLOAT:
- cblas_cgemv(order, trans, m, n, oneF, Adata, lda, Xdata, incX,
- zeroF, Rdata, 1);
- break;
- }
-}
-
-
-/*
* Initialize oldFunctions table.
*/
static void
@@ -148,222 +78,6 @@ init_oldFunctions(void)
typedef enum {_scalar, _column, _row, _matrix} MatrixShape;
-static MatrixShape
-_select_matrix_shape(PyArrayObject *array)
-{
- switch (PyArray_NDIM(array)) {
- case 0:
- return _scalar;
- case 1:
- if (PyArray_DIM(array, 0) > 1)
- return _column;
- return _scalar;
- case 2:
- if (PyArray_DIM(array, 0) > 1) {
- if (PyArray_DIM(array, 1) == 1)
- return _column;
- else
- return _matrix;
- }
- if (PyArray_DIM(array, 1) == 1)
- return _scalar;
- return _row;
- }
- return _matrix;
-}
-
-
-/*
- * This also makes sure that the data segment is aligned with
- * an itemsize address as well by returning one if not true.
- */
-static int
-_bad_strides(PyArrayObject *ap)
-{
- int itemsize = PyArray_ITEMSIZE(ap);
- int i, N=PyArray_NDIM(ap);
- npy_intp *strides = PyArray_STRIDES(ap);
-
- if (((npy_intp)(PyArray_DATA(ap)) % itemsize) != 0) {
- return 1;
- }
- for (i = 0; i < N; i++) {
- if ((strides[i] < 0) || (strides[i] % itemsize) != 0) {
- return 1;
- }
- }
-
- return 0;
-}
-
-/*
- * innerproduct(a,b)
- *
- * Returns the inner product of a and b for arrays of
- * floating point types. Like the generic NumPy equivalent the product
- * sum is over the last dimension of a and b.
- * NB: The first argument is not conjugated.
- */
-
-static PyObject *
-dotblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args)
-{
- PyObject *op1, *op2;
- PyArrayObject *ap1, *ap2, *ret;
- int j, l, lda, ldb;
- int typenum, nd;
- npy_intp dimensions[NPY_MAXDIMS];
- PyTypeObject *subtype;
- double prior1, prior2;
-
- if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) {
- return NULL;
- }
-
- /*
- * Inner product using the BLAS. The product sum is taken along the last
- * dimensions of the two arrays.
- * Only speeds things up for float double and complex types.
- */
-
-
- typenum = PyArray_ObjectType(op1, 0);
- typenum = PyArray_ObjectType(op2, typenum);
-
- /* This function doesn't handle other types */
- if ((typenum != NPY_DOUBLE && typenum != NPY_CDOUBLE &&
- typenum != NPY_FLOAT && typenum != NPY_CFLOAT)) {
- return PyArray_Return((PyArrayObject *)PyArray_InnerProduct(op1, op2));
- }
-
- ret = NULL;
- ap1 = (PyArrayObject *)PyArray_ContiguousFromObject(op1, typenum, 0, 0);
- if (ap1 == NULL) {
- return NULL;
- }
- ap2 = (PyArrayObject *)PyArray_ContiguousFromObject(op2, typenum, 0, 0);
- if (ap2 == NULL) {
- goto fail;
- }
-
- if ((PyArray_NDIM(ap1) > 2) || (PyArray_NDIM(ap2) > 2)) {
- /* This function doesn't handle dimensions greater than 2 */
- ret = (PyArrayObject *)PyArray_InnerProduct((PyObject *)ap1,
- (PyObject *)ap2);
- Py_DECREF(ap1);
- Py_DECREF(ap2);
- return PyArray_Return(ret);
- }
-
- if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) {
- /* One of ap1 or ap2 is a scalar */
- if (PyArray_NDIM(ap1) == 0) {
- /* Make ap2 the scalar */
- PyArrayObject *t = ap1;
- ap1 = ap2;
- ap2 = t;
- }
- for (l = 1, j = 0; j < PyArray_NDIM(ap1); j++) {
- dimensions[j] = PyArray_DIM(ap1, j);
- l *= dimensions[j];
- }
- nd = PyArray_NDIM(ap1);
- }
- else {
- /*
- * (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2)
- * Both ap1 and ap2 are vectors or matrices
- */
- l = PyArray_DIM(ap1, PyArray_NDIM(ap1)-1);
-
- if (PyArray_DIM(ap2, PyArray_NDIM(ap2)-1) != l) {
- PyErr_SetString(PyExc_ValueError, "matrices are not aligned");
- goto fail;
- }
- nd = PyArray_NDIM(ap1)+PyArray_NDIM(ap2)-2;
-
- if (nd == 1)
- dimensions[0] = (PyArray_NDIM(ap1) == 2) ? PyArray_DIM(ap1, 0) : PyArray_DIM(ap2, 0);
- else if (nd == 2) {
- dimensions[0] = PyArray_DIM(ap1, 0);
- dimensions[1] = PyArray_DIM(ap2, 0);
- }
- }
-
- /* Choose which subtype to return */
- prior2 = PyArray_GetPriority((PyObject *)ap2, 0.0);
- prior1 = PyArray_GetPriority((PyObject *)ap1, 0.0);
- subtype = (prior2 > prior1 ? Py_TYPE(ap2) : Py_TYPE(ap1));
-
- ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions,
- typenum, NULL, NULL, 0, 0,
- (PyObject *)\
- (prior2 > prior1 ? ap2 : ap1));
-
- if (ret == NULL) {
- goto fail;
- }
- NPY_BEGIN_ALLOW_THREADS
- memset(PyArray_DATA(ret), 0, PyArray_NBYTES(ret));
-
- if (PyArray_NDIM(ap2) == 0) {
- /* Multiplication by a scalar -- Level 1 BLAS */
- if (typenum == NPY_DOUBLE) {
- cblas_daxpy(l, *((double *)PyArray_DATA(ap2)), (double *)PyArray_DATA(ap1), 1,
- (double *)PyArray_DATA(ret), 1);
- }
- else if (typenum == NPY_CDOUBLE) {
- cblas_zaxpy(l, (double *)PyArray_DATA(ap2), (double *)PyArray_DATA(ap1), 1,
- (double *)PyArray_DATA(ret), 1);
- }
- else if (typenum == NPY_FLOAT) {
- cblas_saxpy(l, *((float *)PyArray_DATA(ap2)), (float *)PyArray_DATA(ap1), 1,
- (float *)PyArray_DATA(ret), 1);
- }
- else if (typenum == NPY_CFLOAT) {
- cblas_caxpy(l, (float *)PyArray_DATA(ap2), (float *)PyArray_DATA(ap1), 1,
- (float *)PyArray_DATA(ret), 1);
- }
- }
- else if (PyArray_NDIM(ap1) == 1 && PyArray_NDIM(ap2) == 1) {
- /* Dot product between two vectors -- Level 1 BLAS */
- blas_dot(typenum, l, PyArray_DATA(ap1), PyArray_ITEMSIZE(ap1),
- PyArray_DATA(ap2), PyArray_ITEMSIZE(ap2), PyArray_DATA(ret));
- }
- else if (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 1) {
- /* Matrix-vector multiplication -- Level 2 BLAS */
- lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
- gemv(typenum, CblasRowMajor, CblasNoTrans, ap1, lda, ap2, 1, ret);
- }
- else if (PyArray_NDIM(ap1) == 1 && PyArray_NDIM(ap2) == 2) {
- /* Vector matrix multiplication -- Level 2 BLAS */
- lda = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
- gemv(typenum, CblasRowMajor, CblasNoTrans, ap2, lda, ap1, 1, ret);
- }
- else {
- /*
- * (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 2)
- * Matrix matrix multiplication -- Level 3 BLAS
- */
- lda = (PyArray_DIM(ap1, 1) > 1 ? PyArray_DIM(ap1, 1) : 1);
- ldb = (PyArray_DIM(ap2, 1) > 1 ? PyArray_DIM(ap2, 1) : 1);
- gemm(typenum, CblasRowMajor, CblasNoTrans, CblasTrans,
- PyArray_DIM(ap1, 0), PyArray_DIM(ap2, 0), PyArray_DIM(ap1, 1),
- ap1, lda, ap2, ldb, ret);
- }
- NPY_END_ALLOW_THREADS
- Py_DECREF(ap1);
- Py_DECREF(ap2);
- return PyArray_Return(ret);
-
- fail:
- Py_XDECREF(ap1);
- Py_XDECREF(ap2);
- Py_XDECREF(ret);
- return NULL;
-}
-
-
/*
* vdot(a,b)
*
@@ -473,9 +187,6 @@ static PyObject *dotblas_vdot(PyObject *NPY_UNUSED(dummy), PyObject *args) {
}
static struct PyMethodDef dotblas_module_methods[] = {
- {"inner",
- (PyCFunction)dotblas_innerproduct,
- 1, NULL},
{"vdot",
(PyCFunction)dotblas_vdot,
1, NULL},
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index af53ac417..8db3705f7 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -4,7 +4,7 @@ import os
import sys
import warnings
import collections
-from . import multiarray
+from numpy.core import multiarray
from . import umath
from .umath import (invert, sin, UFUNC_BUFSIZE_DEFAULT, ERR_IGNORE,
ERR_WARN, ERR_RAISE, ERR_CALL, ERR_PRINT, ERR_LOG,
@@ -1084,7 +1084,7 @@ try:
os.environ['openblas_main_free'] = '1'
if 'gotoblas_main_free' not in os.environ:
os.environ['gotoblas_main_free'] = '1'
- from ._dotblas import vdot, inner
+ from ._dotblas import vdot
except ImportError:
# docstrings are in add_newdocs.py
inner = multiarray.inner
@@ -1096,6 +1096,7 @@ finally:
del envbak
dot = multiarray.dot
+inner = multiarray.inner
def alterdot():
warnings.warn("alterdot no longer does anything.", DeprecationWarning)
diff --git a/numpy/core/src/multiarray/cblasfuncs.c b/numpy/core/src/multiarray/cblasfuncs.c
index 3de9d0838..36504c3f9 100644
--- a/numpy/core/src/multiarray/cblasfuncs.c
+++ b/numpy/core/src/multiarray/cblasfuncs.c
@@ -665,7 +665,6 @@ fail:
return NULL;
}
-#if 0
/*
* innerproduct(a,b)
@@ -674,57 +673,24 @@ fail:
* floating point types. Like the generic NumPy equivalent the product
* sum is over the last dimension of a and b.
* NB: The first argument is not conjugated.
+ *
+ * This is for use by PyArray_InnerProduct. It is assumed on entry that the
+ * arrays ap1 and ap2 have a common data type given by typenum that is
+ * float, double, cfloat, or cdouble and have dimension <= 2, and have the
+ * contiguous flag set. The * __numpy_ufunc__ nonsense is also assumed to
+ * have been taken care of.
*/
NPY_NO_EXPORT PyObject *
-cblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args)
+cblas_innerproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2)
{
- PyObject *op1, *op2;
- PyArrayObject *ap1, *ap2, *ret;
int j, l, lda, ldb;
- int typenum, nd;
+ int nd;
+ double prior1, prior2;
+ PyArrayObject *ret;
npy_intp dimensions[NPY_MAXDIMS];
PyTypeObject *subtype;
- double prior1, prior2;
-
- if (!PyArg_ParseTuple(args, "OO", &op1, &op2)) {
- return NULL;
- }
- /*
- * Inner product using the BLAS. The product sum is taken along the last
- * dimensions of the two arrays.
- * Only speeds things up for float double and complex types.
- */
-
-
- typenum = PyArray_ObjectType(op1, 0);
- typenum = PyArray_ObjectType(op2, typenum);
-
- /* This function doesn't handle other types */
- if ((typenum != NPY_DOUBLE && typenum != NPY_CDOUBLE &&
- typenum != NPY_FLOAT && typenum != NPY_CFLOAT)) {
- return PyArray_Return((PyArrayObject *)PyArray_InnerProduct(op1, op2));
- }
-
- ret = NULL;
- ap1 = (PyArrayObject *)PyArray_ContiguousFromObject(op1, typenum, 0, 0);
- if (ap1 == NULL) {
- return NULL;
- }
- ap2 = (PyArrayObject *)PyArray_ContiguousFromObject(op2, typenum, 0, 0);
- if (ap2 == NULL) {
- goto fail;
- }
-
- if ((PyArray_NDIM(ap1) > 2) || (PyArray_NDIM(ap2) > 2)) {
- /* This function doesn't handle dimensions greater than 2 */
- ret = (PyArrayObject *)PyArray_InnerProduct((PyObject *)ap1,
- (PyObject *)ap2);
- Py_DECREF(ap1);
- Py_DECREF(ap2);
- return PyArray_Return(ret);
- }
if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) {
/* One of ap1 or ap2 is a scalar */
@@ -748,7 +714,8 @@ cblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args)
l = PyArray_DIM(ap1, PyArray_NDIM(ap1)-1);
if (PyArray_DIM(ap2, PyArray_NDIM(ap2)-1) != l) {
- PyErr_SetString(PyExc_ValueError, "matrices are not aligned");
+ PyErr_SetString(PyExc_ValueError,
+ "matrices are not aligned");
goto fail;
}
nd = PyArray_NDIM(ap1)+PyArray_NDIM(ap2)-2;
@@ -769,7 +736,8 @@ cblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args)
ret = (PyArrayObject *)PyArray_New(subtype, nd, dimensions,
typenum, NULL, NULL, 0, 0,
- (PyObject *)(prior2 > prior1 ? ap2 : ap1));
+ (PyObject *)
+ (prior2 > prior1 ? ap2 : ap1));
if (ret == NULL) {
goto fail;
@@ -780,26 +748,36 @@ cblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args)
if (PyArray_NDIM(ap2) == 0) {
/* Multiplication by a scalar -- Level 1 BLAS */
if (typenum == NPY_DOUBLE) {
- cblas_daxpy(l, *((double *)PyArray_DATA(ap2)), (double *)PyArray_DATA(ap1), 1,
+ cblas_daxpy(l,
+ *((double *)PyArray_DATA(ap2)),
+ (double *)PyArray_DATA(ap1), 1,
(double *)PyArray_DATA(ret), 1);
}
else if (typenum == NPY_CDOUBLE) {
- cblas_zaxpy(l, (double *)PyArray_DATA(ap2), (double *)PyArray_DATA(ap1), 1,
+ cblas_zaxpy(l,
+ (double *)PyArray_DATA(ap2),
+ (double *)PyArray_DATA(ap1), 1,
(double *)PyArray_DATA(ret), 1);
}
else if (typenum == NPY_FLOAT) {
- cblas_saxpy(l, *((float *)PyArray_DATA(ap2)), (float *)PyArray_DATA(ap1), 1,
+ cblas_saxpy(l,
+ *((float *)PyArray_DATA(ap2)),
+ (float *)PyArray_DATA(ap1), 1,
(float *)PyArray_DATA(ret), 1);
}
else if (typenum == NPY_CFLOAT) {
- cblas_caxpy(l, (float *)PyArray_DATA(ap2), (float *)PyArray_DATA(ap1), 1,
+ cblas_caxpy(l,
+ (float *)PyArray_DATA(ap2),
+ (float *)PyArray_DATA(ap1), 1,
(float *)PyArray_DATA(ret), 1);
}
}
else if (PyArray_NDIM(ap1) == 1 && PyArray_NDIM(ap2) == 1) {
/* Dot product between two vectors -- Level 1 BLAS */
- blas_dot(typenum, l, PyArray_DATA(ap1), PyArray_ITEMSIZE(ap1),
- PyArray_DATA(ap2), PyArray_ITEMSIZE(ap2), PyArray_DATA(ret));
+ blas_dot(typenum, l,
+ PyArray_DATA(ap1), PyArray_ITEMSIZE(ap1),
+ PyArray_DATA(ap2), PyArray_ITEMSIZE(ap2),
+ PyArray_DATA(ret));
}
else if (PyArray_NDIM(ap1) == 2 && PyArray_NDIM(ap2) == 1) {
/* Matrix-vector multiplication -- Level 2 BLAS */
@@ -834,6 +812,7 @@ cblas_innerproduct(PyObject *NPY_UNUSED(dummy), PyObject *args)
return NULL;
}
+#if 0
/*
* vdot(a,b)
diff --git a/numpy/core/src/multiarray/cblasfuncs.h b/numpy/core/src/multiarray/cblasfuncs.h
index 66ce4ca5b..d3ec08db6 100644
--- a/numpy/core/src/multiarray/cblasfuncs.h
+++ b/numpy/core/src/multiarray/cblasfuncs.h
@@ -4,4 +4,7 @@
NPY_NO_EXPORT PyObject *
cblas_matrixproduct(int, PyArrayObject *, PyArrayObject *, PyArrayObject *);
+NPY_NO_EXPORT PyObject *
+cblas_innerproduct(int, PyArrayObject *, PyArrayObject *);
+
#endif
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 8b8c1b8a7..909559851 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -836,8 +836,18 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2)
ap2 = (PyArrayObject *)PyArray_FromAny(op2, typec, 0, 0,
NPY_ARRAY_ALIGNED, NULL);
if (ap2 == NULL) {
- goto fail;
+ Py_DECREF(ap1);
+ return NULL;
}
+
+#if defined(HAVE_CBLAS)
+ if (PyArray_NDIM(ap1) <= 2 && PyArray_NDIM(ap2) <= 2 &&
+ (NPY_DOUBLE == typenum || NPY_CDOUBLE == typenum ||
+ NPY_FLOAT == typenum || NPY_CFLOAT == typenum)) {
+ return cblas_innerproduct(typenum, ap1, ap2);
+ }
+#endif
+
if (PyArray_NDIM(ap1) == 0 || PyArray_NDIM(ap2) == 0) {
ret = (PyArray_NDIM(ap1) == 0 ? ap1 : ap2);
ret = (PyArrayObject *)Py_TYPE(ret)->tp_as_number->nb_multiply(
diff --git a/numpy/core/tests/test_blasdot.py b/numpy/core/tests/test_blasdot.py
index e37ca711f..9a89396cc 100644
--- a/numpy/core/tests/test_blasdot.py
+++ b/numpy/core/tests/test_blasdot.py
@@ -28,7 +28,6 @@ except ImportError:
def test_blasdot_used():
from numpy.core import vdot, inner
assert_(vdot is _dotblas.vdot, "vdot")
- assert_(inner is _dotblas.inner, "inner")
def test_dot_2args():