diff options
author | Mark Wiebe <mwiebe@enthought.com> | 2011-08-03 16:29:11 -0500 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2011-08-27 07:26:51 -0600 |
commit | fb2f8d2111f57b2affa49ec9d22cadf45f12de24 (patch) | |
tree | 1133a8a1700202746a93ec40ae7e668b6b82506e /numpy/core | |
parent | de1feee7ff45f7575e1465ee0b637aa31bba8b9a (diff) | |
download | numpy-fb2f8d2111f57b2affa49ec9d22cadf45f12de24.tar.gz |
ENH: missingdata: Fix an iterator MASKNA bug, fill in more missing stuff
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 225 | ||||
-rw-r--r-- | numpy/core/src/multiarray/mapping.c | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/methods.c | 3 | ||||
-rw-r--r-- | numpy/core/src/multiarray/na_mask.c | 4 | ||||
-rw-r--r-- | numpy/core/src/multiarray/na_singleton.c | 13 | ||||
-rw-r--r-- | numpy/core/src/multiarray/na_singleton.h | 3 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer_constr.c | 28 | ||||
-rw-r--r-- | numpy/core/src/multiarray/shape.c | 10 | ||||
-rw-r--r-- | numpy/core/tests/test_maskna.py | 51 |
9 files changed, 274 insertions, 65 deletions
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 997ba861e..e121345b1 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -2820,23 +2820,28 @@ PyArray_EnsureAnyArray(PyObject *op) /* TODO: Put the order parameter in PyArray_CopyAnyInto and remove this */ NPY_NO_EXPORT int -PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, - NPY_ORDER order) +PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) { PyArray_StridedTransferFn *stransfer = NULL; + PyArray_MaskedStridedTransferFn *maskedstransfer = NULL; NpyAuxData *transferdata = NULL; + PyArray_StridedTransferFn *maskna_stransfer = NULL; + NpyAuxData *maskna_transferdata = NULL; NpyIter *dst_iter, *src_iter; NpyIter_IterNextFunc *dst_iternext, *src_iternext; char **dst_dataptr, **src_dataptr; npy_intp dst_stride, src_stride; + npy_intp maskna_src_stride = 0, maskna_dst_stride = 0; npy_intp *dst_countptr, *src_countptr; + npy_uint32 baseflags; char *dst_data, *src_data; + char *maskna_dst_data = NULL, *maskna_src_data = NULL; npy_intp dst_count, src_count, count; - npy_intp src_itemsize; + npy_intp src_itemsize, maskna_src_itemsize = 0; npy_intp dst_size, src_size; - int needs_api; + int needs_api, use_maskna = 0; NPY_BEGIN_THREADS_DEF; @@ -2871,26 +2876,54 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, return 0; } + baseflags = NPY_ITER_EXTERNAL_LOOP | + NPY_ITER_DONT_NEGATE_STRIDES | + NPY_ITER_REFS_OK; + + /* + * If 'src' has a mask, and 'dst' doesn't, need to validate that + * 'src' has everything exposed. Otherwise, the mask needs to + * be copied as well. + */ + if (PyArray_HASMASKNA(src)) { + if (PyArray_HASMASKNA(dst)) { + use_maskna = 1; + baseflags |= NPY_ITER_USE_MASKNA; + } + else { + if (PyArray_ContainsNA(src)) { + PyErr_SetString(PyExc_ValueError, + "Cannot assign NA value to an array which " + "does not support NAs"); + return -1; + } + baseflags |= NPY_ITER_IGNORE_MASKNA; + } + } + /* + * If 'dst' has a mask but 'src' doesn't, set all of 'dst' + * to be exposed, then proceed without worrying about the mask. + */ + else if (PyArray_HASMASKNA(dst)) { + if (PyArray_AssignMaskNA(dst, 1) < 0) { + return -1; + } + baseflags |= NPY_ITER_IGNORE_MASKNA; + } /* * This copy is based on matching C-order traversals of src and dst. * By using two iterators, we can find maximal sub-chunks that * can be processed at once. */ - dst_iter = NpyIter_New(dst, NPY_ITER_WRITEONLY| - NPY_ITER_EXTERNAL_LOOP| - NPY_ITER_DONT_NEGATE_STRIDES| - NPY_ITER_REFS_OK, + dst_iter = NpyIter_New(dst, NPY_ITER_WRITEONLY | baseflags, order, NPY_NO_CASTING, NULL); if (dst_iter == NULL) { return -1; } - src_iter = NpyIter_New(src, NPY_ITER_READONLY| - NPY_ITER_EXTERNAL_LOOP| - NPY_ITER_DONT_NEGATE_STRIDES| - NPY_ITER_REFS_OK, + src_iter = NpyIter_New(src, NPY_ITER_READONLY | baseflags, order, NPY_NO_CASTING, NULL); @@ -2903,14 +2936,21 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, dst_iternext = NpyIter_GetIterNext(dst_iter, NULL); dst_dataptr = NpyIter_GetDataPtrArray(dst_iter); /* Since buffering is disabled, we can cache the stride */ - dst_stride = *NpyIter_GetInnerStrideArray(dst_iter); + dst_stride = NpyIter_GetInnerStrideArray(dst_iter)[0]; dst_countptr = NpyIter_GetInnerLoopSizePtr(dst_iter); src_iternext = NpyIter_GetIterNext(src_iter, NULL); src_dataptr = NpyIter_GetDataPtrArray(src_iter); /* Since buffering is disabled, we can cache the stride */ - src_stride = *NpyIter_GetInnerStrideArray(src_iter); + src_stride = NpyIter_GetInnerStrideArray(src_iter)[0]; src_countptr = NpyIter_GetInnerLoopSizePtr(src_iter); + src_itemsize = PyArray_DESCR(src)->elsize; + + if (use_maskna) { + maskna_src_stride = NpyIter_GetInnerStrideArray(src_iter)[1]; + maskna_dst_stride = NpyIter_GetInnerStrideArray(dst_iter)[1]; + maskna_src_itemsize = PyArray_MASKNA_DTYPE(src)->elsize; + } if (dst_iternext == NULL || src_iternext == NULL) { NpyIter_Deallocate(dst_iter); @@ -2918,8 +2958,6 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, return -1; } - src_itemsize = PyArray_DESCR(src)->elsize; - needs_api = NpyIter_IterationNeedsAPI(dst_iter) || NpyIter_IterationNeedsAPI(src_iter); @@ -2929,18 +2967,49 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, * we can pass them to this function to take advantage of * contiguous strides, etc. */ - if (PyArray_GetDTypeTransferFunction( - PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst), - src_stride, dst_stride, - PyArray_DESCR(src), PyArray_DESCR(dst), - 0, - &stransfer, &transferdata, - &needs_api) != NPY_SUCCEED) { - NpyIter_Deallocate(dst_iter); - NpyIter_Deallocate(src_iter); - return -1; + if (!use_maskna) { + if (PyArray_GetDTypeTransferFunction( + PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst), + src_stride, dst_stride, + PyArray_DESCR(src), PyArray_DESCR(dst), + 0, + &stransfer, &transferdata, + &needs_api) != NPY_SUCCEED) { + NpyIter_Deallocate(dst_iter); + NpyIter_Deallocate(src_iter); + return -1; + } } + else { + if (PyArray_GetMaskedDTypeTransferFunction( + PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst), + src_stride, + dst_stride, + maskna_src_stride, + PyArray_DESCR(src), + PyArray_DESCR(dst), + PyArray_MASKNA_DTYPE(src), + 0, + &maskedstransfer, &transferdata, + &needs_api) != NPY_SUCCEED) { + NpyIter_Deallocate(dst_iter); + NpyIter_Deallocate(src_iter); + return -1; + } + /* Also need a transfer function for the mask itself */ + if (PyArray_GetDTypeTransferFunction(1, + maskna_src_stride, maskna_dst_stride, + PyArray_MASKNA_DTYPE(src), PyArray_MASKNA_DTYPE(dst), + 0, + &maskna_stransfer, &maskna_transferdata, + &needs_api) != NPY_SUCCEED) { + NPY_AUXDATA_FREE(transferdata); + NpyIter_Deallocate(dst_iter); + NpyIter_Deallocate(src_iter); + return -1; + } + } if (!needs_api) { NPY_BEGIN_THREADS; @@ -2948,43 +3017,90 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, dst_count = *dst_countptr; src_count = *src_countptr; - dst_data = *dst_dataptr; - src_data = *src_dataptr; + dst_data = dst_dataptr[0]; + src_data = src_dataptr[0]; /* * The tests did not trigger this code, so added a new function * ndarray.setasflat to the Python exposure in order to test it. */ - for(;;) { - /* Transfer the biggest amount that fits both */ - count = (src_count < dst_count) ? src_count : dst_count; - stransfer(dst_data, dst_stride, - src_data, src_stride, - count, src_itemsize, transferdata); + if (!use_maskna) { + for(;;) { + /* Transfer the biggest amount that fits both */ + count = (src_count < dst_count) ? src_count : dst_count; + stransfer(dst_data, dst_stride, + src_data, src_stride, + count, src_itemsize, transferdata); + + /* If we exhausted the dst block, refresh it */ + if (dst_count == count) { + if (!dst_iternext(dst_iter)) { + break; + } + dst_count = *dst_countptr; + dst_data = dst_dataptr[0]; + } + else { + dst_count -= count; + dst_data += count*dst_stride; + } - /* If we exhausted the dst block, refresh it */ - if (dst_count == count) { - if (!dst_iternext(dst_iter)) { - break; + /* If we exhausted the src block, refresh it */ + if (src_count == count) { + if (!src_iternext(src_iter)) { + break; + } + src_count = *src_countptr; + src_data = src_dataptr[0]; + } + else { + src_count -= count; + src_data += count*src_stride; } - dst_count = *dst_countptr; - dst_data = *dst_dataptr; - } - else { - dst_count -= count; - dst_data += count*dst_stride; } + } + else { + maskna_src_data = src_dataptr[1]; + maskna_dst_data = dst_dataptr[1]; + for(;;) { + /* Transfer the biggest amount that fits both */ + count = (src_count < dst_count) ? src_count : dst_count; + maskedstransfer(dst_data, dst_stride, + src_data, src_stride, + (npy_mask *)maskna_src_data, maskna_src_stride, + count, src_itemsize, transferdata); + maskna_stransfer(maskna_dst_data, maskna_dst_stride, + maskna_src_data, maskna_src_stride, + count, maskna_src_itemsize, maskna_transferdata); + + /* If we exhausted the dst block, refresh it */ + if (dst_count == count) { + if (!dst_iternext(dst_iter)) { + break; + } + dst_count = *dst_countptr; + dst_data = dst_dataptr[0]; + maskna_dst_data = dst_dataptr[1]; + } + else { + dst_count -= count; + dst_data += count*dst_stride; + maskna_dst_data += count*maskna_dst_stride; + } - /* If we exhausted the src block, refresh it */ - if (src_count == count) { - if (!src_iternext(src_iter)) { - break; + /* If we exhausted the src block, refresh it */ + if (src_count == count) { + if (!src_iternext(src_iter)) { + break; + } + src_count = *src_countptr; + src_data = src_dataptr[0]; + maskna_src_data = src_dataptr[1]; + } + else { + src_count -= count; + src_data += count*src_stride; + maskna_src_data += count*maskna_src_stride; } - src_count = *src_countptr; - src_data = *src_dataptr; - } - else { - src_count -= count; - src_data += count*src_stride; } } @@ -2993,6 +3109,7 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, } NPY_AUXDATA_FREE(transferdata); + NPY_AUXDATA_FREE(maskna_transferdata); NpyIter_Deallocate(dst_iter); NpyIter_Deallocate(src_iter); diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 91802ee34..30f00576b 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -1758,7 +1758,7 @@ array_subscript_nice(PyArrayObject *self, PyObject *op) } else { return (PyObject *)NpyNA_FromDTypeAndMaskValue( - PyArray_DESCR(self), (npy_mask)*maskna_item); + PyArray_DESCR(self), (npy_mask)*maskna_item, 0); } } } diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 74937379d..7f850f4dd 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -772,7 +772,8 @@ array_setasflat(PyArrayObject *self, PyObject *args) return NULL; } - arr = (PyArrayObject *)PyArray_FromAny(arr_in, NULL, 0, 0, 0, NULL); + arr = (PyArrayObject *)PyArray_FromAny(arr_in, NULL, + 0, 0, NPY_ARRAY_ALLOWNA, NULL); if (arr == NULL) { return NULL; } diff --git a/numpy/core/src/multiarray/na_mask.c b/numpy/core/src/multiarray/na_mask.c index 70b6e7838..e1644590f 100644 --- a/numpy/core/src/multiarray/na_mask.c +++ b/numpy/core/src/multiarray/na_mask.c @@ -126,7 +126,9 @@ fill_raw_byte_array(int ndim, npy_intp *shape, /*NUMPY_API * * Assigns the mask value to all the NA mask elements of - * the array. + * the array. This routine is intended to be used to mask + * all the elments of an array, or if you will also be assigning + * values to everything at the same time, to unmask all the elements. * * Returns 0 on success, -1 on failure. */ diff --git a/numpy/core/src/multiarray/na_singleton.c b/numpy/core/src/multiarray/na_singleton.c index dd59b5c69..2b4002202 100644 --- a/numpy/core/src/multiarray/na_singleton.c +++ b/numpy/core/src/multiarray/na_singleton.c @@ -439,7 +439,8 @@ NpyNA_FromObject(PyObject *obj, int suppress_error) * if 'maskvalue' represents an exposed mask. */ NPY_NO_EXPORT NpyNA * -NpyNA_FromDTypeAndMaskValue(PyArray_Descr *dtype, npy_mask maskvalue) +NpyNA_FromDTypeAndMaskValue(PyArray_Descr *dtype, npy_mask maskvalue, + int multina) { NpyNA_fields *fna; @@ -462,7 +463,15 @@ NpyNA_FromDTypeAndMaskValue(PyArray_Descr *dtype, npy_mask maskvalue) fna->dtype = dtype; Py_XINCREF(fna->dtype); - fna->payload = NpyMaskValue_GetPayload(maskvalue); + if (multina) { + fna->payload = NpyMaskValue_GetPayload(maskvalue); + } + else if (NpyMaskValue_GetPayload(maskvalue) != 0) { + PyErr_SetString(PyExc_ValueError, + "Cannot convert mask value into NA without enabling multi-NA"); + Py_DECREF(fna); + return NULL; + } return (NpyNA *)fna; } diff --git a/numpy/core/src/multiarray/na_singleton.h b/numpy/core/src/multiarray/na_singleton.h index 069c457b6..0e3399c89 100644 --- a/numpy/core/src/multiarray/na_singleton.h +++ b/numpy/core/src/multiarray/na_singleton.h @@ -54,7 +54,8 @@ NpyNA_FromObject(PyObject *obj, int suppress_error); * if 'maskvalue' represents an exposed mask. */ NPY_NO_EXPORT NpyNA * -NpyNA_FromDTypeAndMaskValue(PyArray_Descr *dtype, npy_mask maskvalue); +NpyNA_FromDTypeAndMaskValue(PyArray_Descr *dtype, npy_mask maskvalue, + int multina); /* * Returns a mask value corresponding to the NA. diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index f20e200e4..10a9d091a 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -3190,6 +3190,7 @@ npyiter_fill_maskna_axisdata(NpyIter *iter, int **op_axes) int idim, ndim = NIT_NDIM(iter); int iop, iop_maskna, nop = NIT_NOP(iter); int first_maskna_op = NIT_FIRST_MASKNA_OP(iter); + npy_int8 *perm; char *op_itflags = NIT_OPITFLAGS(iter); npy_int8 *maskna_indices = NIT_MASKNA_INDICES(iter); @@ -3201,6 +3202,7 @@ npyiter_fill_maskna_axisdata(NpyIter *iter, int **op_axes) axisdata = NIT_AXISDATA(iter); sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop); + perm = NIT_PERM(iter); if (itflags & NPY_ITFLAG_BUFFER) { bufferdata = NIT_BUFFERDATA(iter); @@ -3231,6 +3233,16 @@ npyiter_fill_maskna_axisdata(NpyIter *iter, int **op_axes) /* Process the maskna operands, filling in the axisdata */ for (idim = 0; idim < ndim; ++idim) { npy_intp *strides = NAD_STRIDES(axisdata); + npy_int8 p; + int idim_permuted; + + p = perm[idim]; + if (p < 0) { + idim_permuted = 1-p; + } + else { + idim_permuted = p; + } for (iop = first_maskna_op; iop < nop; ++iop) { /* @@ -3253,17 +3265,27 @@ npyiter_fill_maskna_axisdata(NpyIter *iter, int **op_axes) int i; if (op_axes == NULL || op_axes[iop_maskna] == NULL) { - i = PyArray_NDIM(op_cur) - idim - 1; + i = PyArray_NDIM(op_cur) - idim_permuted - 1; } else { - i = op_axes[iop_maskna][ndim-idim-1]; + i = op_axes[iop_maskna][ndim-idim_permuted-1]; } strides[iop] = PyArray_MASKNA_STRIDES(op_cur)[i]; + /* Reverse the axis if necessary */ + if (p < 0) { + op_dataptr[iop] += (NAD_SHAPE(axisdata)-1) * strides[iop]; + strides[iop] = -strides[iop]; + } } } - /* Initialize the mask data pointers */ + NIT_ADVANCE_AXISDATA(axisdata, 1); + } + + /* Initialize the mask data pointers */ + axisdata = NIT_AXISDATA(iter); + for (idim = 0; idim < ndim; ++idim) { memcpy(NAD_PTRS(axisdata) + first_maskna_op, op_dataptr + first_maskna_op, NPY_SIZEOF_INTP*(nop - first_maskna_op)); diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c index 9a3c29ad9..2fe57a5a3 100644 --- a/numpy/core/src/multiarray/shape.c +++ b/numpy/core/src/multiarray/shape.c @@ -974,7 +974,6 @@ PyArray_Ravel(PyArrayObject *a, NPY_ORDER order) } return (PyObject *)ret; } - } return PyArray_Flatten(a, order); @@ -1001,10 +1000,17 @@ PyArray_Flatten(PyArrayObject *a, NPY_ORDER order) NULL, NULL, 0, (PyObject *)a); - if (ret == NULL) { return NULL; } + + if (PyArray_HASMASKNA(a)) { + if (PyArray_AllocateMaskNA(ret, 1, 0, 1) < 0) { + Py_DECREF(ret); + return NULL; + } + } + if (PyArray_CopyAsFlat(ret, a, order) < 0) { Py_DECREF(ret); return NULL; diff --git a/numpy/core/tests/test_maskna.py b/numpy/core/tests/test_maskna.py index d1d79c86a..1207760d1 100644 --- a/numpy/core/tests/test_maskna.py +++ b/numpy/core/tests/test_maskna.py @@ -79,6 +79,17 @@ def test_isna(): assert_equal(np.isna(np.NA(dtype='f4')), True) assert_equal(np.isna(np.NA(12,dtype='f4')), True) +def test_array_maskna_payload(): + # Single numbered index + a = np.zeros((2,), maskna=True) + a[0] = np.NA + assert_equal(a[0].payload, None) + + # Tuple index + a = np.zeros((2,3), maskna=True) + a[1,1] = np.NA + assert_equal(a[1,1].payload, None) + def test_array_maskna_isna_1D(): a = np.arange(10) @@ -266,12 +277,50 @@ def test_array_maskna_array_function_1D(): assert_(c.flags.ownmaskna) assert_(not (c is b_view)) +def test_array_maskna_setasflat(): + # Copy from a C to a F array with some NAs + a_orig = np.empty((2,3), order='C') + b_orig = np.empty((3,2), order='F') + a = a_orig.view(maskna=True) + b = b_orig.view(maskna=True) + a[...] = 1 + a[0,1] = np.NA + a[1,2] = np.NA + b[...] = 2 + b.setasflat(a) + assert_equal(np.isna(a), [[0,1,0],[0,0,1]]) + assert_equal(b_orig, [[1,2],[1,1],[1,2]]) + assert_equal(np.isna(b), [[0,1],[0,0],[0,1]]) + +def test_array_maskna_ravel(): + # From a C array + a = np.zeros((2,3), maskna=True, order='C') + a[0,1] = np.NA + a[1,2] = np.NA + + # Ravel in C order returns a view + b = np.ravel(a) + assert_(b.base is a) + assert_equal(b.shape, (6,)) + assert_(b.flags.maskna) + assert_(not b.flags.ownmaskna) + assert_equal(np.isna(b), [0,1,0,0,0,1]) + + # Ravel in F order returns a copy + b = np.ravel(a, order='F') + assert_(b.base is None) + assert_equal(b.shape, (6,)) + assert_(b.flags.maskna) + assert_(b.flags.ownmaskna) + assert_equal(np.isna(b), [0,0,1,0,0,1]) + def test_array_maskna_reshape(): # Simple reshape 1D -> 2D a = np.arange(6, maskna=True) a[1] = np.NA a[5] = np.NA + # Reshape from 1D to C order b = a.reshape(2,3) assert_(b.base is a) assert_equal(b.shape, (2,3)) @@ -279,7 +328,9 @@ def test_array_maskna_reshape(): assert_(not b.flags.ownmaskna) assert_equal(np.isna(b), [[0,1,0],[0,0,1]]) + # Reshape from 1D to F order b = a.reshape(2,3,order='F') + assert_(b.base is a) assert_equal(b.shape, (2,3)) assert_(b.flags.maskna) assert_(not b.flags.ownmaskna) |