diff options
author | Mark Wiebe <mwiebe@enthought.com> | 2011-08-04 11:01:29 -0500 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-08-27 07:26:51 -0600 |
commit | ecadc64f7ce5a8a683a84893d8aeb326f7a80fb8 (patch) | |
tree | 22779f9ce2f5de06d7570ecbaeda65360d58c6f2 /numpy/core | |
parent | 8fff78d2bfe6faa0919b1c55b3c744e3be8b1e8e (diff) | |
download | numpy-ecadc64f7ce5a8a683a84893d8aeb326f7a80fb8.tar.gz |
ENH: missingdata: Make reduction with skipna=True work better in some cases
In particular, this allows it for ufuncs without a unit, where the
array has just one dimension.
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 159 | ||||
-rw-r--r-- | numpy/core/tests/test_maskna.py | 40 |
2 files changed, 150 insertions, 49 deletions
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index d1b76d070..96bf925ca 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -2676,8 +2676,13 @@ allocate_or_conform_reduce_result(PyArrayObject *arr, PyArrayObject *out, */ static PyArrayObject * initialize_reduce_result(int identity, PyArrayObject *result, - npy_bool *axis_flags, PyArrayObject *arr) + npy_bool *axis_flags, PyArrayObject *arr, + int skipna, char *ufunc_name) { + npy_intp *strides, *shape, shape_orig[NPY_MAXDIMS], shape0; + PyArrayObject *arr_view; + int idim, ndim; + if (identity == PyUFunc_One) { if (PyArray_FillWithOne(result) < 0) { return NULL; @@ -2693,17 +2698,69 @@ initialize_reduce_result(int identity, PyArrayObject *result, return arr; } /* + * With skipna=True and where 'arr' has an NA mask, + * need to do some additional fiddling if there's no unit. + */ + else if (skipna && PyArray_HASMASKNA(arr)) { + char *data, *maskna_data; + npy_intp *maskna_strides; + + ndim = PyArray_NDIM(arr); + + /* + * Currently only supporting one dimension in this case. + */ + if (ndim != 1) { + PyErr_SetString(PyExc_ValueError, + "skipna=True with a non-identity reduction " + "and an array with ndim > 1 isn't implemented yet"); + return NULL; + } + + arr_view = (PyArrayObject *)PyArray_View(arr, NULL, &PyArray_Type); + if (arr_view == NULL) { + return NULL; + } + + shape = PyArray_DIMS(arr_view); + shape0 = shape[0]; + data = PyArray_DATA(arr_view); + strides = PyArray_STRIDES(arr_view); + maskna_data = PyArray_MASKNA_DATA(arr_view); + maskna_strides = PyArray_MASKNA_STRIDES(arr_view); + + /* Shrink the array from the start until we find an exposed element */ + while (shape0 > 0 && + !NpyMaskValue_IsExposed((npy_mask)*maskna_data)) { + --shape0; + data += strides[0]; + maskna_data += maskna_strides[0]; + } + + if (shape0 == 0) { + Py_DECREF(arr_view); + PyErr_Format(PyExc_ValueError, + "fully NA array with skipna=True to " + "%s.reduce which has no identity", ufunc_name); + return NULL; + } + + /* With the first element exposed, fall through to the other code */ + shape[0] = shape0; + ((PyArrayObject_fieldaccess *)arr_view)->data = data; + ((PyArrayObject_fieldaccess *)arr_view)->maskna_data = maskna_data; + } + /* * If there is no identity, copy the first element along the * reduction dimensions. */ else { - npy_intp *strides, *shape, *shape_orig; - PyArrayObject *arr_view; - int idim, ndim = PyArray_NDIM(arr); + ndim = PyArray_NDIM(arr); if (PyArray_SIZE(arr) == 0) { - PyErr_SetString(PyExc_ValueError, - "zero-size array to ufunc.reduce without identity"); + PyErr_Format(PyExc_ValueError, + "zero-size array to %s.reduce which has no identity", + ufunc_name); return NULL; } @@ -2719,45 +2776,45 @@ initialize_reduce_result(int identity, PyArrayObject *result, if (arr_view == NULL) { return NULL; } + } - /* - * Adjust the shape to only look at the first element along - * any of the reduction axes. - */ - shape = PyArray_DIMS(arr_view); - for (idim = 0; idim < ndim; ++idim) { - if (axis_flags[idim]) { - shape[idim] = 1; - } + /* + * Adjust the shape to only look at the first element along + * any of the reduction axes. + */ + shape = PyArray_DIMS(arr_view); + memcpy(shape_orig, shape, ndim * sizeof(npy_intp)); + for (idim = 0; idim < ndim; ++idim) { + if (axis_flags[idim]) { + shape[idim] = 1; } + } - /* Copy the elements into the result to start */ - if (PyArray_CopyInto(result, arr_view) < 0) { - Py_DECREF(arr_view); - return NULL; - } + /* Copy the elements into the result to start */ + if (PyArray_CopyInto(result, arr_view) < 0) { + Py_DECREF(arr_view); + return NULL; + } - /* Adjust the shape to only look at the remaining elements */ - shape_orig = PyArray_DIMS(arr); - strides = PyArray_STRIDES(arr_view); + /* Adjust the shape to only look at the remaining elements */ + strides = PyArray_STRIDES(arr_view); + for (idim = 0; idim < ndim; ++idim) { + if (axis_flags[idim]) { + shape[idim] = shape_orig[idim] - 1; + ((PyArrayObject_fieldaccess *)arr_view)->data += strides[idim]; + } + } + if (PyArray_HASMASKNA(arr_view)) { + strides = PyArray_MASKNA_STRIDES(arr_view); for (idim = 0; idim < ndim; ++idim) { if (axis_flags[idim]) { - shape[idim] = shape_orig[idim] - 1; - ((PyArrayObject_fieldaccess *)arr_view)->data += strides[idim]; - } - } - if (PyArray_HASMASKNA(arr_view)) { - strides = PyArray_MASKNA_STRIDES(arr_view); - for (idim = 0; idim < ndim; ++idim) { - if (axis_flags[idim]) { - ((PyArrayObject_fieldaccess *)arr_view)->maskna_data += - strides[idim]; - } + ((PyArrayObject_fieldaccess *)arr_view)->maskna_data += + strides[idim]; } } - - return arr_view; } + + return arr_view; } /* @@ -2971,17 +3028,6 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out, } else { /* - * If there's no identity, validate that there is no - * reduction which is all NA values. - */ - if (self->identity == PyUFunc_None) { - PyErr_SetString(PyExc_ValueError, - "skipna=True together with a non-identity reduction " - "isn't implemented yet"); - goto fail; - } - - /* * If the result has a mask (i.e. from the out= parameter), * Set it to all exposed. */ @@ -2990,6 +3036,23 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out, goto fail; } } + + /* Special case a one-value input */ + if (PyArray_SIZE(arr) == 1) { + if (NpyMaskValue_IsExposed( + (npy_mask)*PyArray_MASKNA_DATA(arr))) { + /* Copy the element into the result */ + if (PyArray_CopyInto(result, arr) < 0) { + goto finish; + } + } + else { + PyErr_Format(PyExc_ValueError, + "fully NA array with skipna=True to " + "%s.reduce which has no identity", ufunc_name); + goto fail; + } + } } } @@ -2999,7 +3062,7 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out, * all the elements to reduce into 'result'. */ arr_view = initialize_reduce_result(self->identity, result, - axis_flags, arr); + axis_flags, arr, skipna, ufunc_name); if (arr_view == NULL) { goto fail; } diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py index 5ad6f0990..61fb7918e 100644 --- a/numpy/core/tests/test_maskna.py +++ b/numpy/core/tests/test_maskna.py @@ -520,7 +520,7 @@ def test_ufunc_1D(): assert_(c.flags.maskna) #assert_equal(c, [0,2,4]) -def test_ufunc_reduce_1D(): +def test_ufunc_add_reduce_1D(): a = np.arange(3.0, maskna=True) b = np.array(0.5) c_orig = np.array(0.5) @@ -529,6 +529,13 @@ def test_ufunc_reduce_1D(): # Since 'a' has no NA values, this should work np.add.reduce(a, out=b) assert_equal(b, 3.0) + np.add.reduce(a, skipna=True, out=b) + assert_equal(b, 3.0) + + ret = np.add.reduce(a) + assert_equal(ret, 3.0) + ret = np.add.reduce(a, skipna=True) + assert_equal(ret, 3.0) # With an NA value, the reduce should throw with the non-NA output param a[1] = np.NA @@ -567,5 +574,36 @@ def test_ufunc_reduce_1D(): assert_(not np.isna(c)) assert_equal(c, 2.0) +def test_ufunc_maximum_reduce_1D(): + a_orig = np.array([0, 3, 2, 10, -1, 5, 7, -2]) + a = a_orig.view(maskna=True) + + # Straightforward reduce with no NAs + b = np.maximum.reduce(a) + assert_equal(b, 10) + + # Set the biggest value to NA + a[3] = np.NA + b = np.maximum.reduce(a) + assert_(np.isna(b)) + + # Skip the NA + b = np.maximum.reduce(a, skipna=True) + assert_(not b.flags.maskna) + assert_(not np.isna(b)) + assert_equal(b, 7) + + # Set the first value to NA + a[0] = np.NA + b = np.maximum.reduce(a, skipna=True) + assert_(not b.flags.maskna) + assert_(not np.isna(b)) + assert_equal(b, 7) + + # Set all the values to NA - should raise the same error as + # for an empty array + a[...] = np.NA + assert_raises(ValueError, np.maximum.reduce, a, skipna=True) + if __name__ == "__main__": run_module_suite() |