summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wiebe <mwwiebe@gmail.com>2011-08-17 19:18:56 -0700
committerCharles Harris <charlesr.harris@gmail.com>2011-08-27 07:26:56 -0600
commitbfda229ec93d37b1ee2cdd8b9443ec4e34536bbf (patch)
tree1315cce82b32c73eb9390e3fa46d8a8e0ddecebe
parentd9b3f90de3213ece9a78b77088fdec17910e81d9 (diff)
downloadnumpy-bfda229ec93d37b1ee2cdd8b9443ec4e34536bbf.tar.gz
ENH: missingdata: Create count_reduce_items function
This function either cheaply returns the product of the sizes of all the reduction axes, or counts the number of items which will be used in a reduction operation when skipna is True. Its purpose is to make it easy to do functions like np.mean and np.std.
-rw-r--r--doc/source/reference/routines.sort.rst1
-rw-r--r--numpy/add_newdocs.py57
-rw-r--r--numpy/core/code_generators/numpy_api.py2
-rw-r--r--numpy/core/numeric.py7
-rw-r--r--numpy/core/src/multiarray/item_selection.c2
-rw-r--r--numpy/core/src/multiarray/multiarraymodule.c42
-rw-r--r--numpy/core/src/multiarray/reduction.c144
-rw-r--r--numpy/core/src/multiarray/reduction.h22
-rw-r--r--numpy/core/tests/test_maskna.py74
9 files changed, 339 insertions, 12 deletions
diff --git a/doc/source/reference/routines.sort.rst b/doc/source/reference/routines.sort.rst
index c10252c69..517ea5897 100644
--- a/doc/source/reference/routines.sort.rst
+++ b/doc/source/reference/routines.sort.rst
@@ -37,3 +37,4 @@ Counting
:toctree: generated/
count_nonzero
+ count_reduce_items
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py
index 82f59c6b0..8596b9c9c 100644
--- a/numpy/add_newdocs.py
+++ b/numpy/add_newdocs.py
@@ -915,6 +915,56 @@ add_newdoc('numpy.core.multiarray', 'count_nonzero',
5
""")
+add_newdoc('numpy.core.multiarray', 'count_reduce_items',
+ """
+ count_reduce_items(arr, axis=None, skipna=False)
+
+ Counts the number of items a reduction with the same `axis`
+ and `skipna` parameter values would use. The purpose of this
+ function is for the creation of reduction operations
+ which use the item count, such as :func:`mean`.
+
+ When `skipna` is False or `arr` doesn't have an NA mask,
+ the result is simply the product of the reduction axis
+ sizes, returned as a single scalar.
+
+ Parameters
+ ----------
+ arr : array_like
+ The array for which to count the reduce items.
+ axis : None or int or tuple of ints, optional
+ Axis or axes along which a reduction is performed.
+ The default (`axis` = None) is perform a reduction over all
+ the dimensions of the input array.
+ skipna : bool, optional
+ If this is set to True, any NA elements in the array are not
+ counted. The only time this function does any actual counting
+ instead of a cheap multiply of a few sizes is when `skipna` is
+ true and `arr` has an NA mask.
+
+ Returns
+ -------
+ count : intp or array of intp
+ Number of items that would be used in a reduction with the
+ same `axis` and `skipna` parameter values.
+
+ Examples
+ --------
+ >>> a = np.array([[1,np.NA,1], [1,1,np.NA]])
+
+ >>> np.count_reduce_items(a)
+ 6
+ >>> np.count_reduce_items(a, skipna=True)
+ 4
+ >>> np.sum(a, skipna=True)
+ 4
+
+ >>> np.count_reduce_items(a, axis=0, skipna=True)
+ array([2, 1, 1])
+ >>> np.sum(a, axis=0, skipna=True)
+ array([2, 1, 1])
+ """)
+
add_newdoc('numpy.core.multiarray','set_typeDict',
"""set_typeDict(dict)
@@ -5280,7 +5330,7 @@ add_newdoc('numpy.core', 'ufunc', ('types',
add_newdoc('numpy.core', 'ufunc', ('reduce',
"""
- reduce(a, axis=0, dtype=None, out=None)
+ reduce(a, axis=0, dtype=None, out=None, skipna=False)
Reduces `a`'s dimension by one, by applying ufunc along one axis.
@@ -5325,6 +5375,11 @@ add_newdoc('numpy.core', 'ufunc', ('reduce',
out : ndarray, optional
A location into which the result is stored. If not provided, a
freshly-allocated array is returned.
+ skipna : bool, optional
+ If this is set to True, the reduction is done as if any NA elements
+ were not counted in the array. The default, False, causes the
+ NA values to propagate, so if any element in a set of elements
+ being reduced is NA, the result will be NA.
Returns
-------
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index f963fa783..f2470a58f 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -334,7 +334,7 @@ multiarray_funcs_api = {
'PyArray_AssignArray': 295,
'PyArray_CreateReduceResult': 296,
'PyArray_InitializeReduceResult': 297,
- 'PyArray_RemoveAxesInPlace': 298
+ 'PyArray_RemoveAxesInPlace': 298,
}
ufunc_types_api = {
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index 9b7c20d76..00184505d 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -1,7 +1,7 @@
__all__ = ['newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc',
- 'arange', 'array', 'zeros', 'count_nonzero', 'empty', 'broadcast',
- 'dtype', 'fromstring', 'fromfile', 'frombuffer',
- 'int_asbuffer', 'where', 'argwhere', 'copyto',
+ 'arange', 'array', 'zeros', 'count_nonzero', 'count_reduce_items',
+ 'empty', 'broadcast', 'dtype', 'fromstring', 'fromfile',
+ 'frombuffer', 'int_asbuffer', 'where', 'argwhere', 'copyto',
'concatenate', 'fastCopyAndTranspose', 'lexsort', 'set_numeric_ops',
'can_cast', 'promote_types', 'min_scalar_type', 'result_type',
'asarray', 'asanyarray', 'ascontiguousarray', 'asfortranarray',
@@ -144,6 +144,7 @@ arange = multiarray.arange
array = multiarray.array
zeros = multiarray.zeros
count_nonzero = multiarray.count_nonzero
+count_reduce_items = multiarray.count_reduce_items
empty = multiarray.empty
empty_like = multiarray.empty_like
fromstring = multiarray.fromstring
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 88b81f43d..5257aec8e 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -2009,7 +2009,7 @@ PyArray_ReduceCountNonzero(PyArrayObject *arr, PyArrayObject *out,
&assign_reduce_unit_zero,
&reduce_count_nonzero_inner_loop,
&reduce_count_nonzero_masked_inner_loop,
- nonzero, "count_nonzero");
+ nonzero, 0, "count_nonzero");
Py_DECREF(dtype);
if (out == NULL && result != NULL) {
return PyArray_Return(result);
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index e3cb3709e..e21369e6f 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -54,6 +54,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
#include "ctors.h"
#include "na_singleton.h"
#include "na_mask.h"
+#include "reduction.h"
/* Only here for API compatibility */
NPY_NO_EXPORT PyTypeObject PyBigArray_Type;
@@ -2211,6 +2212,44 @@ array_count_nonzero(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
}
static PyObject *
+array_count_reduce_items(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
+{
+ static char *kwlist[] = {"arr", "axis", "skipna", NULL};
+
+ PyObject *array_in, *axis_in = NULL;
+ PyObject *ret = NULL;
+ PyArrayObject *array;
+ npy_bool axis_flags[NPY_MAXDIMS];
+ int skipna = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds,
+ "O|Oi:count_reduce_items", kwlist,
+ &array_in,
+ &axis_in,
+ &skipna)) {
+ return NULL;
+ }
+
+ array = (PyArrayObject *)PyArray_FromAny(array_in, NULL,
+ 0, 0, NPY_ARRAY_ALLOWNA, NULL);
+ if (array == NULL) {
+ return NULL;
+ }
+
+ if (PyArray_ConvertMultiAxis(axis_in, PyArray_NDIM(array),
+ axis_flags) != NPY_SUCCEED) {
+ Py_DECREF(array);
+ return NULL;
+ }
+
+ ret = PyArray_CountReduceItems(array, axis_flags, skipna);
+
+ Py_DECREF(array);
+
+ return ret;
+}
+
+static PyObject *
array_fromstring(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *keywds)
{
char *data;
@@ -3756,6 +3795,9 @@ static struct PyMethodDef array_module_methods[] = {
{"count_nonzero",
(PyCFunction)array_count_nonzero,
METH_VARARGS|METH_KEYWORDS, NULL},
+ {"count_reduce_items",
+ (PyCFunction)array_count_reduce_items,
+ METH_VARARGS|METH_KEYWORDS, NULL},
{"empty",
(PyCFunction)array_empty,
METH_VARARGS|METH_KEYWORDS, NULL},
diff --git a/numpy/core/src/multiarray/reduction.c b/numpy/core/src/multiarray/reduction.c
index 30fb33bed..1866e3cba 100644
--- a/numpy/core/src/multiarray/reduction.c
+++ b/numpy/core/src/multiarray/reduction.c
@@ -17,6 +17,7 @@
#include "npy_config.h"
#include "numpy/npy_3kcompat.h"
+#include "lowlevel_strided_loops.h"
#include "reduction.h"
/*
@@ -418,6 +419,7 @@ PyArray_InitializeReduceResult(
* inner_loop : The inner loop which does the reduction.
* masked_inner_loop: The inner loop which does the reduction with a mask.
* data : Data which is passed to assign_unit and the inner loop.
+ * buffersize : Buffer size for the iterator. For the default, pass in 0.
* funcname : The name of the reduction function, for error messages.
*/
NPY_NO_EXPORT PyArrayObject *
@@ -428,7 +430,7 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
PyArray_AssignReduceUnitFunc *assign_unit,
PyArray_ReduceInnerLoopFunc *inner_loop,
PyArray_ReduceInnerLoopFunc *masked_inner_loop,
- void *data, const char *funcname)
+ void *data, npy_intp buffersize, const char *funcname)
{
int use_maskna;
PyArrayObject *result = NULL, *op_view = NULL;
@@ -549,10 +551,11 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
op_flags[1] |= NPY_ITER_IGNORE_MASKNA;
}
- iter = NpyIter_MultiNew(2, op, flags,
+ iter = NpyIter_AdvancedNew(2, op, flags,
NPY_KEEPORDER, NPY_SAME_KIND_CASTING,
op_flags,
- op_dtypes);
+ op_dtypes,
+ 0, NULL, NULL, buffersize);
if (iter == NULL) {
Py_DECREF(result);
Py_DECREF(op_dtypes[0]);
@@ -633,3 +636,138 @@ fail:
return NULL;
}
+
+/*
+ * This function counts the number of elements that a reduction
+ * will see along the reduction directions, given the provided options.
+ *
+ * If the reduction operand has no NA mask or 'skipna' is false, this
+ * is simply the prod`uct of all the reduction axis sizes. A NumPy
+ * scalar is returned in this case.
+ *
+ * If the reduction operand has an NA mask and 'skipna' is true, this
+ * counts the number of elements which are not NA along the reduction
+ * dimensions, and returns an array with the counts.
+ */
+NPY_NO_EXPORT PyObject *
+PyArray_CountReduceItems(PyArrayObject *operand,
+ npy_bool *axis_flags, int skipna)
+{
+ int idim, ndim = PyArray_NDIM(operand);
+
+ /* The product of the reduction dimensions in this case */
+ if (!skipna || !PyArray_HASMASKNA(operand)) {
+ npy_intp count = 1, *shape = PyArray_SHAPE(operand);
+ PyArray_Descr *dtype;
+ PyObject *ret;
+
+ for (idim = 0; idim < ndim; ++idim) {
+ if (axis_flags[idim]) {
+ count *= shape[idim];
+ }
+ }
+
+ dtype = PyArray_DescrFromType(NPY_INTP);
+ if (dtype == NULL) {
+ return NULL;
+ }
+ ret = PyArray_Scalar(&count, dtype, NULL);
+ Py_DECREF(dtype);
+ return ret;
+ }
+ /* Otherwise we need to do a count based on the NA mask */
+ else {
+ npy_intp *strides;
+ PyArrayObject *result;
+ PyArray_Descr *result_dtype;
+
+ npy_intp i, coord[NPY_MAXDIMS];
+ npy_intp shape_it[NPY_MAXDIMS];
+ npy_intp operand_strides_it[NPY_MAXDIMS];
+ npy_intp result_strides_it[NPY_MAXDIMS];
+ char *operand_data = NULL, *result_data = NULL;
+
+ /*
+ * To support field-NA, we would create a result type
+ * with an INTP matching each field, then separately count
+ * the available elements per-field.
+ */
+ if (PyArray_HASFIELDS(operand)) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "field-NA isn't implemented yet");
+ return NULL;
+ }
+
+ /*
+ * TODO: The loop below is specialized for NPY_BOOL masks,
+ * will need another version for NPY_MASK masks.
+ */
+ if (PyArray_MASKNA_DTYPE(operand)->type_num != NPY_BOOL) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "multi-NA isn't implemented yet");
+ return NULL;
+ }
+
+ /* Allocate an array for the reduction counting */
+ result_dtype = PyArray_DescrFromType(NPY_INTP);
+ if (result_dtype == NULL) {
+ return NULL;
+ }
+ result = PyArray_CreateReduceResult(operand, NULL,
+ result_dtype, axis_flags, 0,
+ "count_reduce_items");
+ if (result == NULL) {
+ return NULL;
+ }
+
+ /* Initialize result to all zeros */
+ if (PyArray_AssignZero(result, NULL, 0, NULL) < 0) {
+ Py_DECREF(result);
+ return NULL;
+ }
+
+ /*
+ * Set all the reduction strides to 0 in result so
+ * we can use them for raw iteration
+ */
+ strides = PyArray_STRIDES(result);
+ for (idim = 0; idim < ndim; ++idim) {
+ if (axis_flags[idim]) {
+ strides[idim] = 0;
+ }
+ }
+
+ /*
+ * Sort axes based on 'operand', which has more non-zero strides,
+ * by making it the first operand here
+ */
+ if (PyArray_PrepareTwoRawArrayIter(ndim, PyArray_SHAPE(operand),
+ PyArray_MASKNA_DATA(operand), PyArray_MASKNA_STRIDES(operand),
+ PyArray_DATA(result), PyArray_STRIDES(result),
+ &ndim, shape_it,
+ &operand_data, operand_strides_it,
+ &result_data, result_strides_it) < 0) {
+ Py_DECREF(result);
+ return NULL;
+ }
+
+ /*
+ * NOTE: The following only works for NPY_BOOL masks.
+ */
+ NPY_RAW_ITER_START(idim, ndim, coord, shape_it) {
+ char *operand_d = operand_data, *result_d = result_data;
+ for (i = 0; i < shape_it[0]; ++i) {
+ *(npy_intp *)result_d += *operand_d;
+
+ operand_d += operand_strides_it[0];
+ result_d += result_strides_it[0];
+ }
+ } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it,
+ operand_data, operand_strides_it,
+ result_data, result_strides_it);
+
+ /* Remove the reduction axes and return the result */
+ PyArray_RemoveAxesInPlace(result, axis_flags);
+ return PyArray_Return(result);
+ }
+}
diff --git a/numpy/core/src/multiarray/reduction.h b/numpy/core/src/multiarray/reduction.h
index b2334f896..849db256b 100644
--- a/numpy/core/src/multiarray/reduction.h
+++ b/numpy/core/src/multiarray/reduction.h
@@ -100,8 +100,7 @@ typedef void (PyArray_ReduceInnerLoopFunc)(NpyIter *iter,
* inner_loop : The inner loop which does the reduction.
* masked_inner_loop: The inner loop which does the reduction with a mask.
* data : Data which is passed to assign_unit and the inner loop.
- * needs_api : Should be 1 if the inner loop might call a Python API
- * function like PyErr_SetString.
+ * buffersize : Buffer size for the iterator. For the default, pass in 0.
* funcname : The name of the reduction function, for error messages.
*/
NPY_NO_EXPORT PyArrayObject *
@@ -112,6 +111,23 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
PyArray_AssignReduceUnitFunc *assign_unit,
PyArray_ReduceInnerLoopFunc *inner_loop,
PyArray_ReduceInnerLoopFunc *masked_inner_loop,
- void *data, const char *funcname);
+ void *data, npy_intp buffersize, const char *funcname);
+
+/*
+ * This function counts the number of elements that a reduction
+ * will see along the reduction directions, given the provided options.
+ *
+ * If the reduction operand has no NA mask or 'skipna' is false, this
+ * is simply the prod`uct of all the reduction axis sizes. A NumPy
+ * scalar is returned in this case.
+ *
+ * If the reduction operand has an NA mask and 'skipna' is true, this
+ * counts the number of elements which are not NA along the reduction
+ * dimensions, and returns an array with the counts.
+ */
+NPY_NO_EXPORT PyObject *
+PyArray_CountReduceItems(PyArrayObject *operand,
+ npy_bool *axis_flags, int skipna);
+
#endif
diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py
index bee959a6d..f946e8073 100644
--- a/numpy/core/tests/test_maskna.py
+++ b/numpy/core/tests/test_maskna.py
@@ -792,6 +792,80 @@ def check_ufunc_max_1D(max_func):
a[...] = np.NA
assert_raises(ValueError, max_func, a, skipna=True)
+def test_count_reduce_items():
+ # np.count_reduce_items
+
+ # When skipna is False, it should always return the
+ # product of the reduction axes as a NumPy intp scalar
+ a = np.zeros((2,3,4))
+
+ res = np.count_reduce_items(a)
+ assert_equal(res, 24)
+ assert_equal(type(res), np.intp)
+
+ res = np.count_reduce_items(a, axis=0)
+ assert_equal(res, 2)
+ assert_equal(type(res), np.intp)
+
+ res = np.count_reduce_items(a, axis=(1,2))
+ assert_equal(res, 12)
+ assert_equal(type(res), np.intp)
+
+ # This still holds if 'a' has an NA mask and some NA values
+ a = np.zeros((2,3,4), maskna=True)
+ a[1,2,2] = np.NA
+ a[0,1,2] = np.NA
+ a[1,0,3] = np.NA
+
+ res = np.count_reduce_items(a)
+ assert_equal(res, 24)
+ assert_equal(type(res), np.intp)
+
+ res = np.count_reduce_items(a, axis=0)
+ assert_equal(res, 2)
+ assert_equal(type(res), np.intp)
+
+ res = np.count_reduce_items(a, axis=(1,2))
+ assert_equal(res, 12)
+ assert_equal(type(res), np.intp)
+
+ # If skipna is True, but the array has no NA mask, the result
+ # should still be the product of the reduction axes
+ a = np.zeros((2,3,4))
+
+ res = np.count_reduce_items(a, skipna=True)
+ assert_equal(res, 24)
+ assert_equal(type(res), np.intp)
+
+ res = np.count_reduce_items(a, axis=0, skipna=True)
+ assert_equal(res, 2)
+ assert_equal(type(res), np.intp)
+
+ res = np.count_reduce_items(a, axis=(1,2), skipna=True)
+ assert_equal(res, 12)
+ assert_equal(type(res), np.intp)
+
+ # Finally, when skipna is True AND the array has an NA mask,
+ # we get an array of counts
+ a = np.zeros((2,3,4), maskna=True)
+ a[1,2,2] = np.NA
+ a[0,1,2] = np.NA
+ a[1,0,3] = np.NA
+
+ # When doing a full reduction, should still get the scalar
+ res = np.count_reduce_items(a, skipna=True)
+ assert_equal(res, 21)
+ assert_equal(res.dtype, np.dtype(np.intp))
+
+ res = np.count_reduce_items(a, axis=0, skipna=True)
+ assert_equal(res, [[2,2,2,1], [2,2,1,2], [2,2,1,2]])
+ assert_equal(res.dtype, np.dtype(np.intp))
+
+ res = np.count_reduce_items(a, axis=(1,2), skipna=True)
+ assert_equal(res, [11,10])
+ assert_equal(res.dtype, np.dtype(np.intp))
+
+
def test_array_maskna_clip_method():
# ndarray.clip
a = np.array([2, np.NA, 10, 4, np.NA, 7], maskna=True)