diff options
author | Mark Wiebe <mwwiebe@gmail.com> | 2011-08-17 19:18:56 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-08-27 07:26:56 -0600 |
commit | bfda229ec93d37b1ee2cdd8b9443ec4e34536bbf (patch) | |
tree | 1315cce82b32c73eb9390e3fa46d8a8e0ddecebe | |
parent | d9b3f90de3213ece9a78b77088fdec17910e81d9 (diff) | |
download | numpy-bfda229ec93d37b1ee2cdd8b9443ec4e34536bbf.tar.gz |
ENH: missingdata: Create count_reduce_items function
This function either cheaply returns the product of the sizes of
all the reduction axes, or counts the number of items which will
be used in a reduction operation when skipna is True. Its purpose
is to make it easy to do functions like np.mean and np.std.
-rw-r--r-- | doc/source/reference/routines.sort.rst | 1 | ||||
-rw-r--r-- | numpy/add_newdocs.py | 57 | ||||
-rw-r--r-- | numpy/core/code_generators/numpy_api.py | 2 | ||||
-rw-r--r-- | numpy/core/numeric.py | 7 | ||||
-rw-r--r-- | numpy/core/src/multiarray/item_selection.c | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 42 | ||||
-rw-r--r-- | numpy/core/src/multiarray/reduction.c | 144 | ||||
-rw-r--r-- | numpy/core/src/multiarray/reduction.h | 22 | ||||
-rw-r--r-- | numpy/core/tests/test_maskna.py | 74 |
9 files changed, 339 insertions, 12 deletions
diff --git a/doc/source/reference/routines.sort.rst b/doc/source/reference/routines.sort.rst index c10252c69..517ea5897 100644 --- a/doc/source/reference/routines.sort.rst +++ b/doc/source/reference/routines.sort.rst @@ -37,3 +37,4 @@ Counting :toctree: generated/ count_nonzero + count_reduce_items diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 82f59c6b0..8596b9c9c 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -915,6 +915,56 @@ add_newdoc('numpy.core.multiarray', 'count_nonzero', 5 """) +add_newdoc('numpy.core.multiarray', 'count_reduce_items', + """ + count_reduce_items(arr, axis=None, skipna=False) + + Counts the number of items a reduction with the same `axis` + and `skipna` parameter values would use. The purpose of this + function is for the creation of reduction operations + which use the item count, such as :func:`mean`. + + When `skipna` is False or `arr` doesn't have an NA mask, + the result is simply the product of the reduction axis + sizes, returned as a single scalar. + + Parameters + ---------- + arr : array_like + The array for which to count the reduce items. + axis : None or int or tuple of ints, optional + Axis or axes along which a reduction is performed. + The default (`axis` = None) is perform a reduction over all + the dimensions of the input array. + skipna : bool, optional + If this is set to True, any NA elements in the array are not + counted. The only time this function does any actual counting + instead of a cheap multiply of a few sizes is when `skipna` is + true and `arr` has an NA mask. + + Returns + ------- + count : intp or array of intp + Number of items that would be used in a reduction with the + same `axis` and `skipna` parameter values. + + Examples + -------- + >>> a = np.array([[1,np.NA,1], [1,1,np.NA]]) + + >>> np.count_reduce_items(a) + 6 + >>> np.count_reduce_items(a, skipna=True) + 4 + >>> np.sum(a, skipna=True) + 4 + + >>> np.count_reduce_items(a, axis=0, skipna=True) + array([2, 1, 1]) + >>> np.sum(a, axis=0, skipna=True) + array([2, 1, 1]) + """) + add_newdoc('numpy.core.multiarray','set_typeDict', """set_typeDict(dict) @@ -5280,7 +5330,7 @@ add_newdoc('numpy.core', 'ufunc', ('types', add_newdoc('numpy.core', 'ufunc', ('reduce', """ - reduce(a, axis=0, dtype=None, out=None) + reduce(a, axis=0, dtype=None, out=None, skipna=False) Reduces `a`'s dimension by one, by applying ufunc along one axis. @@ -5325,6 +5375,11 @@ add_newdoc('numpy.core', 'ufunc', ('reduce', out : ndarray, optional A location into which the result is stored. If not provided, a freshly-allocated array is returned. + skipna : bool, optional + If this is set to True, the reduction is done as if any NA elements + were not counted in the array. The default, False, causes the + NA values to propagate, so if any element in a set of elements + being reduced is NA, the result will be NA. Returns ------- diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py index f963fa783..f2470a58f 100644 --- a/numpy/core/code_generators/numpy_api.py +++ b/numpy/core/code_generators/numpy_api.py @@ -334,7 +334,7 @@ multiarray_funcs_api = { 'PyArray_AssignArray': 295, 'PyArray_CreateReduceResult': 296, 'PyArray_InitializeReduceResult': 297, - 'PyArray_RemoveAxesInPlace': 298 + 'PyArray_RemoveAxesInPlace': 298, } ufunc_types_api = { diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index 9b7c20d76..00184505d 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -1,7 +1,7 @@ __all__ = ['newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc', - 'arange', 'array', 'zeros', 'count_nonzero', 'empty', 'broadcast', - 'dtype', 'fromstring', 'fromfile', 'frombuffer', - 'int_asbuffer', 'where', 'argwhere', 'copyto', + 'arange', 'array', 'zeros', 'count_nonzero', 'count_reduce_items', + 'empty', 'broadcast', 'dtype', 'fromstring', 'fromfile', + 'frombuffer', 'int_asbuffer', 'where', 'argwhere', 'copyto', 'concatenate', 'fastCopyAndTranspose', 'lexsort', 'set_numeric_ops', 'can_cast', 'promote_types', 'min_scalar_type', 'result_type', 'asarray', 'asanyarray', 'ascontiguousarray', 'asfortranarray', @@ -144,6 +144,7 @@ arange = multiarray.arange array = multiarray.array zeros = multiarray.zeros count_nonzero = multiarray.count_nonzero +count_reduce_items = multiarray.count_reduce_items empty = multiarray.empty empty_like = multiarray.empty_like fromstring = multiarray.fromstring diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 88b81f43d..5257aec8e 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -2009,7 +2009,7 @@ PyArray_ReduceCountNonzero(PyArrayObject *arr, PyArrayObject *out, &assign_reduce_unit_zero, &reduce_count_nonzero_inner_loop, &reduce_count_nonzero_masked_inner_loop, - nonzero, "count_nonzero"); + nonzero, 0, "count_nonzero"); Py_DECREF(dtype); if (out == NULL && result != NULL) { return PyArray_Return(result); diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index e3cb3709e..e21369e6f 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -54,6 +54,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; #include "ctors.h" #include "na_singleton.h" #include "na_mask.h" +#include "reduction.h" /* Only here for API compatibility */ NPY_NO_EXPORT PyTypeObject PyBigArray_Type; @@ -2211,6 +2212,44 @@ array_count_nonzero(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) } static PyObject * +array_count_reduce_items(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"arr", "axis", "skipna", NULL}; + + PyObject *array_in, *axis_in = NULL; + PyObject *ret = NULL; + PyArrayObject *array; + npy_bool axis_flags[NPY_MAXDIMS]; + int skipna = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O|Oi:count_reduce_items", kwlist, + &array_in, + &axis_in, + &skipna)) { + return NULL; + } + + array = (PyArrayObject *)PyArray_FromAny(array_in, NULL, + 0, 0, NPY_ARRAY_ALLOWNA, NULL); + if (array == NULL) { + return NULL; + } + + if (PyArray_ConvertMultiAxis(axis_in, PyArray_NDIM(array), + axis_flags) != NPY_SUCCEED) { + Py_DECREF(array); + return NULL; + } + + ret = PyArray_CountReduceItems(array, axis_flags, skipna); + + Py_DECREF(array); + + return ret; +} + +static PyObject * array_fromstring(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *keywds) { char *data; @@ -3756,6 +3795,9 @@ static struct PyMethodDef array_module_methods[] = { {"count_nonzero", (PyCFunction)array_count_nonzero, METH_VARARGS|METH_KEYWORDS, NULL}, + {"count_reduce_items", + (PyCFunction)array_count_reduce_items, + METH_VARARGS|METH_KEYWORDS, NULL}, {"empty", (PyCFunction)array_empty, METH_VARARGS|METH_KEYWORDS, NULL}, diff --git a/numpy/core/src/multiarray/reduction.c b/numpy/core/src/multiarray/reduction.c index 30fb33bed..1866e3cba 100644 --- a/numpy/core/src/multiarray/reduction.c +++ b/numpy/core/src/multiarray/reduction.c @@ -17,6 +17,7 @@ #include "npy_config.h" #include "numpy/npy_3kcompat.h" +#include "lowlevel_strided_loops.h" #include "reduction.h" /* @@ -418,6 +419,7 @@ PyArray_InitializeReduceResult( * inner_loop : The inner loop which does the reduction. * masked_inner_loop: The inner loop which does the reduction with a mask. * data : Data which is passed to assign_unit and the inner loop. + * buffersize : Buffer size for the iterator. For the default, pass in 0. * funcname : The name of the reduction function, for error messages. */ NPY_NO_EXPORT PyArrayObject * @@ -428,7 +430,7 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, PyArray_AssignReduceUnitFunc *assign_unit, PyArray_ReduceInnerLoopFunc *inner_loop, PyArray_ReduceInnerLoopFunc *masked_inner_loop, - void *data, const char *funcname) + void *data, npy_intp buffersize, const char *funcname) { int use_maskna; PyArrayObject *result = NULL, *op_view = NULL; @@ -549,10 +551,11 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, op_flags[1] |= NPY_ITER_IGNORE_MASKNA; } - iter = NpyIter_MultiNew(2, op, flags, + iter = NpyIter_AdvancedNew(2, op, flags, NPY_KEEPORDER, NPY_SAME_KIND_CASTING, op_flags, - op_dtypes); + op_dtypes, + 0, NULL, NULL, buffersize); if (iter == NULL) { Py_DECREF(result); Py_DECREF(op_dtypes[0]); @@ -633,3 +636,138 @@ fail: return NULL; } + +/* + * This function counts the number of elements that a reduction + * will see along the reduction directions, given the provided options. + * + * If the reduction operand has no NA mask or 'skipna' is false, this + * is simply the prod`uct of all the reduction axis sizes. A NumPy + * scalar is returned in this case. + * + * If the reduction operand has an NA mask and 'skipna' is true, this + * counts the number of elements which are not NA along the reduction + * dimensions, and returns an array with the counts. + */ +NPY_NO_EXPORT PyObject * +PyArray_CountReduceItems(PyArrayObject *operand, + npy_bool *axis_flags, int skipna) +{ + int idim, ndim = PyArray_NDIM(operand); + + /* The product of the reduction dimensions in this case */ + if (!skipna || !PyArray_HASMASKNA(operand)) { + npy_intp count = 1, *shape = PyArray_SHAPE(operand); + PyArray_Descr *dtype; + PyObject *ret; + + for (idim = 0; idim < ndim; ++idim) { + if (axis_flags[idim]) { + count *= shape[idim]; + } + } + + dtype = PyArray_DescrFromType(NPY_INTP); + if (dtype == NULL) { + return NULL; + } + ret = PyArray_Scalar(&count, dtype, NULL); + Py_DECREF(dtype); + return ret; + } + /* Otherwise we need to do a count based on the NA mask */ + else { + npy_intp *strides; + PyArrayObject *result; + PyArray_Descr *result_dtype; + + npy_intp i, coord[NPY_MAXDIMS]; + npy_intp shape_it[NPY_MAXDIMS]; + npy_intp operand_strides_it[NPY_MAXDIMS]; + npy_intp result_strides_it[NPY_MAXDIMS]; + char *operand_data = NULL, *result_data = NULL; + + /* + * To support field-NA, we would create a result type + * with an INTP matching each field, then separately count + * the available elements per-field. + */ + if (PyArray_HASFIELDS(operand)) { + PyErr_SetString(PyExc_RuntimeError, + "field-NA isn't implemented yet"); + return NULL; + } + + /* + * TODO: The loop below is specialized for NPY_BOOL masks, + * will need another version for NPY_MASK masks. + */ + if (PyArray_MASKNA_DTYPE(operand)->type_num != NPY_BOOL) { + PyErr_SetString(PyExc_RuntimeError, + "multi-NA isn't implemented yet"); + return NULL; + } + + /* Allocate an array for the reduction counting */ + result_dtype = PyArray_DescrFromType(NPY_INTP); + if (result_dtype == NULL) { + return NULL; + } + result = PyArray_CreateReduceResult(operand, NULL, + result_dtype, axis_flags, 0, + "count_reduce_items"); + if (result == NULL) { + return NULL; + } + + /* Initialize result to all zeros */ + if (PyArray_AssignZero(result, NULL, 0, NULL) < 0) { + Py_DECREF(result); + return NULL; + } + + /* + * Set all the reduction strides to 0 in result so + * we can use them for raw iteration + */ + strides = PyArray_STRIDES(result); + for (idim = 0; idim < ndim; ++idim) { + if (axis_flags[idim]) { + strides[idim] = 0; + } + } + + /* + * Sort axes based on 'operand', which has more non-zero strides, + * by making it the first operand here + */ + if (PyArray_PrepareTwoRawArrayIter(ndim, PyArray_SHAPE(operand), + PyArray_MASKNA_DATA(operand), PyArray_MASKNA_STRIDES(operand), + PyArray_DATA(result), PyArray_STRIDES(result), + &ndim, shape_it, + &operand_data, operand_strides_it, + &result_data, result_strides_it) < 0) { + Py_DECREF(result); + return NULL; + } + + /* + * NOTE: The following only works for NPY_BOOL masks. + */ + NPY_RAW_ITER_START(idim, ndim, coord, shape_it) { + char *operand_d = operand_data, *result_d = result_data; + for (i = 0; i < shape_it[0]; ++i) { + *(npy_intp *)result_d += *operand_d; + + operand_d += operand_strides_it[0]; + result_d += result_strides_it[0]; + } + } NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it, + operand_data, operand_strides_it, + result_data, result_strides_it); + + /* Remove the reduction axes and return the result */ + PyArray_RemoveAxesInPlace(result, axis_flags); + return PyArray_Return(result); + } +} diff --git a/numpy/core/src/multiarray/reduction.h b/numpy/core/src/multiarray/reduction.h index b2334f896..849db256b 100644 --- a/numpy/core/src/multiarray/reduction.h +++ b/numpy/core/src/multiarray/reduction.h @@ -100,8 +100,7 @@ typedef void (PyArray_ReduceInnerLoopFunc)(NpyIter *iter, * inner_loop : The inner loop which does the reduction. * masked_inner_loop: The inner loop which does the reduction with a mask. * data : Data which is passed to assign_unit and the inner loop. - * needs_api : Should be 1 if the inner loop might call a Python API - * function like PyErr_SetString. + * buffersize : Buffer size for the iterator. For the default, pass in 0. * funcname : The name of the reduction function, for error messages. */ NPY_NO_EXPORT PyArrayObject * @@ -112,6 +111,23 @@ PyArray_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, PyArray_AssignReduceUnitFunc *assign_unit, PyArray_ReduceInnerLoopFunc *inner_loop, PyArray_ReduceInnerLoopFunc *masked_inner_loop, - void *data, const char *funcname); + void *data, npy_intp buffersize, const char *funcname); + +/* + * This function counts the number of elements that a reduction + * will see along the reduction directions, given the provided options. + * + * If the reduction operand has no NA mask or 'skipna' is false, this + * is simply the prod`uct of all the reduction axis sizes. A NumPy + * scalar is returned in this case. + * + * If the reduction operand has an NA mask and 'skipna' is true, this + * counts the number of elements which are not NA along the reduction + * dimensions, and returns an array with the counts. + */ +NPY_NO_EXPORT PyObject * +PyArray_CountReduceItems(PyArrayObject *operand, + npy_bool *axis_flags, int skipna); + #endif diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py index bee959a6d..f946e8073 100644 --- a/numpy/core/tests/test_maskna.py +++ b/numpy/core/tests/test_maskna.py @@ -792,6 +792,80 @@ def check_ufunc_max_1D(max_func): a[...] = np.NA assert_raises(ValueError, max_func, a, skipna=True) +def test_count_reduce_items(): + # np.count_reduce_items + + # When skipna is False, it should always return the + # product of the reduction axes as a NumPy intp scalar + a = np.zeros((2,3,4)) + + res = np.count_reduce_items(a) + assert_equal(res, 24) + assert_equal(type(res), np.intp) + + res = np.count_reduce_items(a, axis=0) + assert_equal(res, 2) + assert_equal(type(res), np.intp) + + res = np.count_reduce_items(a, axis=(1,2)) + assert_equal(res, 12) + assert_equal(type(res), np.intp) + + # This still holds if 'a' has an NA mask and some NA values + a = np.zeros((2,3,4), maskna=True) + a[1,2,2] = np.NA + a[0,1,2] = np.NA + a[1,0,3] = np.NA + + res = np.count_reduce_items(a) + assert_equal(res, 24) + assert_equal(type(res), np.intp) + + res = np.count_reduce_items(a, axis=0) + assert_equal(res, 2) + assert_equal(type(res), np.intp) + + res = np.count_reduce_items(a, axis=(1,2)) + assert_equal(res, 12) + assert_equal(type(res), np.intp) + + # If skipna is True, but the array has no NA mask, the result + # should still be the product of the reduction axes + a = np.zeros((2,3,4)) + + res = np.count_reduce_items(a, skipna=True) + assert_equal(res, 24) + assert_equal(type(res), np.intp) + + res = np.count_reduce_items(a, axis=0, skipna=True) + assert_equal(res, 2) + assert_equal(type(res), np.intp) + + res = np.count_reduce_items(a, axis=(1,2), skipna=True) + assert_equal(res, 12) + assert_equal(type(res), np.intp) + + # Finally, when skipna is True AND the array has an NA mask, + # we get an array of counts + a = np.zeros((2,3,4), maskna=True) + a[1,2,2] = np.NA + a[0,1,2] = np.NA + a[1,0,3] = np.NA + + # When doing a full reduction, should still get the scalar + res = np.count_reduce_items(a, skipna=True) + assert_equal(res, 21) + assert_equal(res.dtype, np.dtype(np.intp)) + + res = np.count_reduce_items(a, axis=0, skipna=True) + assert_equal(res, [[2,2,2,1], [2,2,1,2], [2,2,1,2]]) + assert_equal(res.dtype, np.dtype(np.intp)) + + res = np.count_reduce_items(a, axis=(1,2), skipna=True) + assert_equal(res, [11,10]) + assert_equal(res.dtype, np.dtype(np.intp)) + + def test_array_maskna_clip_method(): # ndarray.clip a = np.array([2, np.NA, 10, 4, np.NA, 7], maskna=True) |