summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
authorMark Wiebe <mwiebe@enthought.com>2011-08-04 11:01:29 -0500
committerCharles Harris <charlesr.harris@gmail.com>2011-08-27 07:26:51 -0600
commitecadc64f7ce5a8a683a84893d8aeb326f7a80fb8 (patch)
tree22779f9ce2f5de06d7570ecbaeda65360d58c6f2 /numpy/core
parent8fff78d2bfe6faa0919b1c55b3c744e3be8b1e8e (diff)
downloadnumpy-ecadc64f7ce5a8a683a84893d8aeb326f7a80fb8.tar.gz
ENH: missingdata: Make reduction with skipna=True work better in some cases
In particular, this allows it for ufuncs without a unit, where the array has just one dimension.
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/src/umath/ufunc_object.c159
-rw-r--r--numpy/core/tests/test_maskna.py40
2 files changed, 150 insertions, 49 deletions
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index d1b76d070..96bf925ca 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -2676,8 +2676,13 @@ allocate_or_conform_reduce_result(PyArrayObject *arr, PyArrayObject *out,
*/
static PyArrayObject *
initialize_reduce_result(int identity, PyArrayObject *result,
- npy_bool *axis_flags, PyArrayObject *arr)
+ npy_bool *axis_flags, PyArrayObject *arr,
+ int skipna, char *ufunc_name)
{
+ npy_intp *strides, *shape, shape_orig[NPY_MAXDIMS], shape0;
+ PyArrayObject *arr_view;
+ int idim, ndim;
+
if (identity == PyUFunc_One) {
if (PyArray_FillWithOne(result) < 0) {
return NULL;
@@ -2693,17 +2698,69 @@ initialize_reduce_result(int identity, PyArrayObject *result,
return arr;
}
/*
+ * With skipna=True and where 'arr' has an NA mask,
+ * need to do some additional fiddling if there's no unit.
+ */
+ else if (skipna && PyArray_HASMASKNA(arr)) {
+ char *data, *maskna_data;
+ npy_intp *maskna_strides;
+
+ ndim = PyArray_NDIM(arr);
+
+ /*
+ * Currently only supporting one dimension in this case.
+ */
+ if (ndim != 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "skipna=True with a non-identity reduction "
+ "and an array with ndim > 1 isn't implemented yet");
+ return NULL;
+ }
+
+ arr_view = (PyArrayObject *)PyArray_View(arr, NULL, &PyArray_Type);
+ if (arr_view == NULL) {
+ return NULL;
+ }
+
+ shape = PyArray_DIMS(arr_view);
+ shape0 = shape[0];
+ data = PyArray_DATA(arr_view);
+ strides = PyArray_STRIDES(arr_view);
+ maskna_data = PyArray_MASKNA_DATA(arr_view);
+ maskna_strides = PyArray_MASKNA_STRIDES(arr_view);
+
+ /* Shrink the array from the start until we find an exposed element */
+ while (shape0 > 0 &&
+ !NpyMaskValue_IsExposed((npy_mask)*maskna_data)) {
+ --shape0;
+ data += strides[0];
+ maskna_data += maskna_strides[0];
+ }
+
+ if (shape0 == 0) {
+ Py_DECREF(arr_view);
+ PyErr_Format(PyExc_ValueError,
+ "fully NA array with skipna=True to "
+ "%s.reduce which has no identity", ufunc_name);
+ return NULL;
+ }
+
+ /* With the first element exposed, fall through to the other code */
+ shape[0] = shape0;
+ ((PyArrayObject_fieldaccess *)arr_view)->data = data;
+ ((PyArrayObject_fieldaccess *)arr_view)->maskna_data = maskna_data;
+ }
+ /*
* If there is no identity, copy the first element along the
* reduction dimensions.
*/
else {
- npy_intp *strides, *shape, *shape_orig;
- PyArrayObject *arr_view;
- int idim, ndim = PyArray_NDIM(arr);
+ ndim = PyArray_NDIM(arr);
if (PyArray_SIZE(arr) == 0) {
- PyErr_SetString(PyExc_ValueError,
- "zero-size array to ufunc.reduce without identity");
+ PyErr_Format(PyExc_ValueError,
+ "zero-size array to %s.reduce which has no identity",
+ ufunc_name);
return NULL;
}
@@ -2719,45 +2776,45 @@ initialize_reduce_result(int identity, PyArrayObject *result,
if (arr_view == NULL) {
return NULL;
}
+ }
- /*
- * Adjust the shape to only look at the first element along
- * any of the reduction axes.
- */
- shape = PyArray_DIMS(arr_view);
- for (idim = 0; idim < ndim; ++idim) {
- if (axis_flags[idim]) {
- shape[idim] = 1;
- }
+ /*
+ * Adjust the shape to only look at the first element along
+ * any of the reduction axes.
+ */
+ shape = PyArray_DIMS(arr_view);
+ memcpy(shape_orig, shape, ndim * sizeof(npy_intp));
+ for (idim = 0; idim < ndim; ++idim) {
+ if (axis_flags[idim]) {
+ shape[idim] = 1;
}
+ }
- /* Copy the elements into the result to start */
- if (PyArray_CopyInto(result, arr_view) < 0) {
- Py_DECREF(arr_view);
- return NULL;
- }
+ /* Copy the elements into the result to start */
+ if (PyArray_CopyInto(result, arr_view) < 0) {
+ Py_DECREF(arr_view);
+ return NULL;
+ }
- /* Adjust the shape to only look at the remaining elements */
- shape_orig = PyArray_DIMS(arr);
- strides = PyArray_STRIDES(arr_view);
+ /* Adjust the shape to only look at the remaining elements */
+ strides = PyArray_STRIDES(arr_view);
+ for (idim = 0; idim < ndim; ++idim) {
+ if (axis_flags[idim]) {
+ shape[idim] = shape_orig[idim] - 1;
+ ((PyArrayObject_fieldaccess *)arr_view)->data += strides[idim];
+ }
+ }
+ if (PyArray_HASMASKNA(arr_view)) {
+ strides = PyArray_MASKNA_STRIDES(arr_view);
for (idim = 0; idim < ndim; ++idim) {
if (axis_flags[idim]) {
- shape[idim] = shape_orig[idim] - 1;
- ((PyArrayObject_fieldaccess *)arr_view)->data += strides[idim];
- }
- }
- if (PyArray_HASMASKNA(arr_view)) {
- strides = PyArray_MASKNA_STRIDES(arr_view);
- for (idim = 0; idim < ndim; ++idim) {
- if (axis_flags[idim]) {
- ((PyArrayObject_fieldaccess *)arr_view)->maskna_data +=
- strides[idim];
- }
+ ((PyArrayObject_fieldaccess *)arr_view)->maskna_data +=
+ strides[idim];
}
}
-
- return arr_view;
}
+
+ return arr_view;
}
/*
@@ -2971,17 +3028,6 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
}
else {
/*
- * If there's no identity, validate that there is no
- * reduction which is all NA values.
- */
- if (self->identity == PyUFunc_None) {
- PyErr_SetString(PyExc_ValueError,
- "skipna=True together with a non-identity reduction "
- "isn't implemented yet");
- goto fail;
- }
-
- /*
* If the result has a mask (i.e. from the out= parameter),
* Set it to all exposed.
*/
@@ -2990,6 +3036,23 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
goto fail;
}
}
+
+ /* Special case a one-value input */
+ if (PyArray_SIZE(arr) == 1) {
+ if (NpyMaskValue_IsExposed(
+ (npy_mask)*PyArray_MASKNA_DATA(arr))) {
+ /* Copy the element into the result */
+ if (PyArray_CopyInto(result, arr) < 0) {
+ goto finish;
+ }
+ }
+ else {
+ PyErr_Format(PyExc_ValueError,
+ "fully NA array with skipna=True to "
+ "%s.reduce which has no identity", ufunc_name);
+ goto fail;
+ }
+ }
}
}
@@ -2999,7 +3062,7 @@ PyUFunc_Reduce(PyUFuncObject *self, PyArrayObject *arr, PyArrayObject *out,
* all the elements to reduce into 'result'.
*/
arr_view = initialize_reduce_result(self->identity, result,
- axis_flags, arr);
+ axis_flags, arr, skipna, ufunc_name);
if (arr_view == NULL) {
goto fail;
}
diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py
index 5ad6f0990..61fb7918e 100644
--- a/numpy/core/tests/test_maskna.py
+++ b/numpy/core/tests/test_maskna.py
@@ -520,7 +520,7 @@ def test_ufunc_1D():
assert_(c.flags.maskna)
#assert_equal(c, [0,2,4])
-def test_ufunc_reduce_1D():
+def test_ufunc_add_reduce_1D():
a = np.arange(3.0, maskna=True)
b = np.array(0.5)
c_orig = np.array(0.5)
@@ -529,6 +529,13 @@ def test_ufunc_reduce_1D():
# Since 'a' has no NA values, this should work
np.add.reduce(a, out=b)
assert_equal(b, 3.0)
+ np.add.reduce(a, skipna=True, out=b)
+ assert_equal(b, 3.0)
+
+ ret = np.add.reduce(a)
+ assert_equal(ret, 3.0)
+ ret = np.add.reduce(a, skipna=True)
+ assert_equal(ret, 3.0)
# With an NA value, the reduce should throw with the non-NA output param
a[1] = np.NA
@@ -567,5 +574,36 @@ def test_ufunc_reduce_1D():
assert_(not np.isna(c))
assert_equal(c, 2.0)
+def test_ufunc_maximum_reduce_1D():
+ a_orig = np.array([0, 3, 2, 10, -1, 5, 7, -2])
+ a = a_orig.view(maskna=True)
+
+ # Straightforward reduce with no NAs
+ b = np.maximum.reduce(a)
+ assert_equal(b, 10)
+
+ # Set the biggest value to NA
+ a[3] = np.NA
+ b = np.maximum.reduce(a)
+ assert_(np.isna(b))
+
+ # Skip the NA
+ b = np.maximum.reduce(a, skipna=True)
+ assert_(not b.flags.maskna)
+ assert_(not np.isna(b))
+ assert_equal(b, 7)
+
+ # Set the first value to NA
+ a[0] = np.NA
+ b = np.maximum.reduce(a, skipna=True)
+ assert_(not b.flags.maskna)
+ assert_(not np.isna(b))
+ assert_equal(b, 7)
+
+ # Set all the values to NA - should raise the same error as
+ # for an empty array
+ a[...] = np.NA
+ assert_raises(ValueError, np.maximum.reduce, a, skipna=True)
+
if __name__ == "__main__":
run_module_suite()