diff options
author | Mark Wiebe <mwiebe@enthought.com> | 2011-06-20 19:09:29 -0500 |
---|---|---|
committer | Mark Wiebe <mwiebe@enthought.com> | 2011-06-20 19:09:29 -0500 |
commit | 46c91afd4eddc3d692c5280c2a22ad0783ef8219 (patch) | |
tree | 959e64deb287a3cbdc75b81f38884257907a0420 | |
parent | 9f99d86361baaf17aaf0b6d42f3835223c52c56e (diff) | |
download | numpy-46c91afd4eddc3d692c5280c2a22ad0783ef8219.tar.gz |
ENH: datetime-strings: Support casting to/from unicode arrays
-rw-r--r-- | numpy/core/src/multiarray/convert_datatype.c | 54 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert_datatype.h | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/datetime.c | 6 | ||||
-rw-r--r-- | numpy/core/src/multiarray/dtype_transfer.c | 168 | ||||
-rw-r--r-- | numpy/core/src/multiarray/methods.c | 6 | ||||
-rw-r--r-- | numpy/core/src/multiarray/nditer.c.src | 2 | ||||
-rw-r--r-- | numpy/core/tests/test_datetime.py | 16 |
8 files changed, 213 insertions, 43 deletions
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index 68029f17d..36121767a 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -34,7 +34,7 @@ PyArray_CastToType(PyArrayObject *arr, PyArray_Descr *dtype, int fortran) PyObject *out; /* If the requested dtype is flexible, adapt it */ - PyArray_AdaptFlexibleType((PyObject *)arr, PyArray_DESCR(arr), &dtype); + PyArray_AdaptFlexibleDType((PyObject *)arr, PyArray_DESCR(arr), &dtype); if (dtype == NULL) { return NULL; } @@ -123,14 +123,29 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num) * a new dtype that has been adapted based on the values in data_dtype * and data_obj. If the flex_dtype is not flexible, it leaves it as is. * + * Usually, if data_obj is not an array, dtype should be the result + * given by the PyArray_GetArrayParamsFromObject function. + * + * The data_obj may be NULL if just a dtype is is known for the source. + * + * If *flex_dtype is NULL, returns immediately, without setting an + * exception. This basically assumes an error was already set previously. + * * The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID, * and NPY_DATETIME with generic units. */ NPY_NO_EXPORT void -PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype, +PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, PyArray_Descr **flex_dtype) { PyArray_DatetimeMetaData *meta; + int flex_type_num; + + if (*flex_dtype == NULL) { + return; + } + + flex_type_num = (*flex_dtype)->type_num; /* Flexible types with expandable size */ if ((*flex_dtype)->elsize == 0) { @@ -140,8 +155,8 @@ PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype, return; } - if (data_dtype->type_num == (*flex_dtype)->type_num || - (*flex_dtype)->type_num == NPY_VOID) { + if (data_dtype->type_num == flex_type_num || + flex_type_num == NPY_VOID) { (*flex_dtype)->elsize = data_dtype->elsize; } else { @@ -217,17 +232,17 @@ PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype, break; } - if ((*flex_dtype)->type_num == NPY_STRING) { + if (flex_type_num == NPY_STRING) { (*flex_dtype)->elsize = size; } - else if ((*flex_dtype)->type_num == NPY_UNICODE) { + else if (flex_type_num == NPY_UNICODE) { (*flex_dtype)->elsize = size * 4; } } } /* Flexible type with generic time unit that adapts */ - else if ((*flex_dtype)->type_num == NPY_DATETIME || - (*flex_dtype)->type_num == NPY_TIMEDELTA) { + else if (flex_type_num == NPY_DATETIME || + flex_type_num == NPY_TIMEDELTA) { meta = get_datetime_metadata_from_dtype(*flex_dtype); if (meta == NULL) { Py_DECREF(*flex_dtype); @@ -236,11 +251,24 @@ PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype, } if (meta->base == NPY_FR_GENERIC) { - /* Detect the unit from the input's data */ - PyArray_Descr *dtype = find_object_datetime_type(data_obj, - (*flex_dtype)->type_num); - Py_DECREF(*flex_dtype); - *flex_dtype = dtype; + if (data_dtype->type_num == NPY_DATETIME || + data_dtype->type_num == NPY_TIMEDELTA) { + meta = get_datetime_metadata_from_dtype(data_dtype); + if (meta == NULL) { + Py_DECREF(*flex_dtype); + *flex_dtype = NULL; + return; + } + + Py_DECREF(*flex_dtype); + *flex_dtype = create_datetime_dtype(flex_type_num, meta); + } + else if (data_obj != NULL) { + /* Detect the unit from the input's data */ + Py_DECREF(*flex_dtype); + *flex_dtype = find_object_datetime_type(data_obj, + flex_type_num); + } } } } diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h index 5e0f31f50..71001b1c4 100644 --- a/numpy/core/src/multiarray/convert_datatype.h +++ b/numpy/core/src/multiarray/convert_datatype.h @@ -22,7 +22,7 @@ PyArray_ValidType(int type); * and NPY_DATETIME with generic units. */ NPY_NO_EXPORT void -PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype, +PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, PyArray_Descr **flex_dtype); #endif diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index cdcf6b77a..2c68e026b 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1660,7 +1660,7 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, /* If the requested dtype is flexible, adapt it */ if (newtype != NULL) { - PyArray_AdaptFlexibleType(op, + PyArray_AdaptFlexibleDType(op, (dtype == NULL) ? PyArray_DESCR(arr) : dtype, &newtype); } diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c index 2d2370b08..cdf99546f 100644 --- a/numpy/core/src/multiarray/datetime.c +++ b/numpy/core/src/multiarray/datetime.c @@ -3574,8 +3574,10 @@ find_string_array_datetime64_type(PyObject *obj, } /* Use unsafe casting to allow unicode -> ascii string */ - iter = NpyIter_New((PyArrayObject *)obj, NPY_ITER_READONLY| - NPY_ITER_EXTERNAL_LOOP, + iter = NpyIter_New((PyArrayObject *)obj, + NPY_ITER_READONLY| + NPY_ITER_EXTERNAL_LOOP| + NPY_ITER_BUFFERED, NPY_KEEPORDER, NPY_UNSAFE_CASTING, string_dtype); Py_DECREF(string_dtype); diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index e3431e450..445133ebf 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -20,6 +20,7 @@ #include "numpy/npy_3kcompat.h" +#include "convert_datatype.h" #include "_datetime.h" #include "datetime_strings.h" @@ -331,7 +332,8 @@ _strided_to_strided_contig_align_wrap(char *dst, npy_intp dst_stride, PyArray_StridedTransferFn *wrapped = d->wrapped, *tobuffer = d->tobuffer, *frombuffer = d->frombuffer; - npy_intp dst_itemsize = d->dst_itemsize; + npy_intp inner_src_itemsize = d->src_itemsize, + dst_itemsize = d->dst_itemsize; void *wrappeddata = d->wrappeddata, *todata = d->todata, *fromdata = d->fromdata; @@ -339,12 +341,12 @@ _strided_to_strided_contig_align_wrap(char *dst, npy_intp dst_stride, for(;;) { if (N > NPY_LOWLEVEL_BUFFER_BLOCKSIZE) { - tobuffer(bufferin, src_itemsize, src, src_stride, + tobuffer(bufferin, inner_src_itemsize, src, src_stride, NPY_LOWLEVEL_BUFFER_BLOCKSIZE, src_itemsize, todata); - wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, + wrapped(bufferout, dst_itemsize, bufferin, inner_src_itemsize, NPY_LOWLEVEL_BUFFER_BLOCKSIZE, - src_itemsize, wrappeddata); + inner_src_itemsize, wrappeddata); frombuffer(dst, dst_stride, bufferout, dst_itemsize, NPY_LOWLEVEL_BUFFER_BLOCKSIZE, dst_itemsize, fromdata); @@ -353,10 +355,10 @@ _strided_to_strided_contig_align_wrap(char *dst, npy_intp dst_stride, dst += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_stride; } else { - tobuffer(bufferin, src_itemsize, src, src_stride, N, + tobuffer(bufferin, inner_src_itemsize, src, src_stride, N, src_itemsize, todata); - wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, N, - src_itemsize, wrappeddata); + wrapped(bufferout, dst_itemsize, bufferin, inner_src_itemsize, N, + inner_src_itemsize, wrappeddata); frombuffer(dst, dst_stride, bufferout, dst_itemsize, N, dst_itemsize, fromdata); return; @@ -374,7 +376,8 @@ _strided_to_strided_contig_align_wrap_init_dest(char *dst, npy_intp dst_stride, PyArray_StridedTransferFn *wrapped = d->wrapped, *tobuffer = d->tobuffer, *frombuffer = d->frombuffer; - npy_intp dst_itemsize = d->dst_itemsize; + npy_intp inner_src_itemsize = d->src_itemsize, + dst_itemsize = d->dst_itemsize; void *wrappeddata = d->wrappeddata, *todata = d->todata, *fromdata = d->fromdata; @@ -382,13 +385,13 @@ _strided_to_strided_contig_align_wrap_init_dest(char *dst, npy_intp dst_stride, for(;;) { if (N > NPY_LOWLEVEL_BUFFER_BLOCKSIZE) { - tobuffer(bufferin, src_itemsize, src, src_stride, + tobuffer(bufferin, inner_src_itemsize, src, src_stride, NPY_LOWLEVEL_BUFFER_BLOCKSIZE, src_itemsize, todata); memset(bufferout, 0, dst_itemsize*NPY_LOWLEVEL_BUFFER_BLOCKSIZE); - wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, + wrapped(bufferout, dst_itemsize, bufferin, inner_src_itemsize, NPY_LOWLEVEL_BUFFER_BLOCKSIZE, - src_itemsize, wrappeddata); + inner_src_itemsize, wrappeddata); frombuffer(dst, dst_stride, bufferout, dst_itemsize, NPY_LOWLEVEL_BUFFER_BLOCKSIZE, dst_itemsize, fromdata); @@ -397,11 +400,11 @@ _strided_to_strided_contig_align_wrap_init_dest(char *dst, npy_intp dst_stride, dst += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_stride; } else { - tobuffer(bufferin, src_itemsize, src, src_stride, N, + tobuffer(bufferin, inner_src_itemsize, src, src_stride, N, src_itemsize, todata); memset(bufferout, 0, dst_itemsize*N); - wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, N, - src_itemsize, wrappeddata); + wrapped(bufferout, dst_itemsize, bufferin, inner_src_itemsize, N, + inner_src_itemsize, wrappeddata); frombuffer(dst, dst_stride, bufferout, dst_itemsize, N, dst_itemsize, fromdata); return; @@ -1069,9 +1072,69 @@ get_datetime_to_unicode_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, PyArray_StridedTransferFn **out_stransfer, - void **out_transferdata) + void **out_transferdata, + int *out_needs_api) { - + void *castdata = NULL, *todata = NULL, *fromdata = NULL; + PyArray_StridedTransferFn *caststransfer, *tobuffer, *frombuffer; + PyArray_Descr *str_dtype; + + /* Get an ASCII string data type, adapted to match the UNICODE one */ + str_dtype = PyArray_DescrFromType(NPY_STRING); + PyArray_AdaptFlexibleDType(NULL, dst_dtype, &str_dtype); + if (str_dtype == NULL) { + return NPY_FAIL; + } + + /* Get the copy/swap operation to dst */ + if (PyArray_GetDTypeCopySwapFn(aligned, + src_stride, src_dtype->elsize, + src_dtype, + &tobuffer, &todata) != NPY_SUCCEED) { + Py_DECREF(str_dtype); + return NPY_FAIL; + } + + /* Get the NBO datetime to string aligned contig function */ + if (get_nbo_datetime_to_string_transfer_function(1, + src_dtype->elsize, str_dtype->elsize, + src_dtype, str_dtype, + &caststransfer, &castdata) != NPY_SUCCEED) { + Py_DECREF(str_dtype); + PyArray_FreeStridedTransferData(todata); + return NPY_FAIL; + } + + /* Get the cast operation to dst */ + if (PyArray_GetDTypeTransferFunction(aligned, + str_dtype->elsize, dst_stride, + str_dtype, dst_dtype, + 0, + &frombuffer, &fromdata, + out_needs_api) != NPY_SUCCEED) { + Py_DECREF(str_dtype); + PyArray_FreeStridedTransferData(todata); + PyArray_FreeStridedTransferData(castdata); + return NPY_FAIL; + } + + /* Wrap it all up in a new transfer function + data */ + if (wrap_aligned_contig_transfer_function( + src_dtype->elsize, str_dtype->elsize, + tobuffer, todata, + frombuffer, fromdata, + caststransfer, castdata, + PyDataType_FLAGCHK(str_dtype, NPY_NEEDS_INIT), + out_stransfer, out_transferdata) != NPY_SUCCEED) { + PyArray_FreeStridedTransferData(castdata); + PyArray_FreeStridedTransferData(todata); + PyArray_FreeStridedTransferData(fromdata); + return NPY_FAIL; + } + + Py_DECREF(str_dtype); + + return NPY_SUCCEED; } static int @@ -1131,8 +1194,70 @@ get_unicode_to_datetime_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, PyArray_StridedTransferFn **out_stransfer, - void **out_transferdata) + void **out_transferdata, + int *out_needs_api) { + void *castdata = NULL, *todata = NULL, *fromdata = NULL; + PyArray_StridedTransferFn *caststransfer, *tobuffer, *frombuffer; + PyArray_Descr *str_dtype; + + /* Get an ASCII string data type, adapted to match the UNICODE one */ + str_dtype = PyArray_DescrFromType(NPY_STRING); + PyArray_AdaptFlexibleDType(NULL, src_dtype, &str_dtype); + if (str_dtype == NULL) { + return NPY_FAIL; + } + + /* Get the cast operation from src */ + if (PyArray_GetDTypeTransferFunction(aligned, + src_stride, str_dtype->elsize, + src_dtype, str_dtype, + 0, + &tobuffer, &todata, + out_needs_api) != NPY_SUCCEED) { + Py_DECREF(str_dtype); + return NPY_FAIL; + } + + /* Get the string to NBO datetime aligned contig function */ + if (get_nbo_string_to_datetime_transfer_function(1, + str_dtype->elsize, dst_dtype->elsize, + str_dtype, dst_dtype, + &caststransfer, &castdata) != NPY_SUCCEED) { + Py_DECREF(str_dtype); + PyArray_FreeStridedTransferData(todata); + return NPY_FAIL; + } + + /* Get the copy/swap operation to dst */ + if (PyArray_GetDTypeCopySwapFn(aligned, + dst_dtype->elsize, dst_stride, + dst_dtype, + &frombuffer, &fromdata) != NPY_SUCCEED) { + Py_DECREF(str_dtype); + PyArray_FreeStridedTransferData(todata); + PyArray_FreeStridedTransferData(castdata); + return NPY_FAIL; + } + + /* Wrap it all up in a new transfer function + data */ + if (wrap_aligned_contig_transfer_function( + str_dtype->elsize, dst_dtype->elsize, + tobuffer, todata, + frombuffer, fromdata, + caststransfer, castdata, + PyDataType_FLAGCHK(dst_dtype, NPY_NEEDS_INIT), + out_stransfer, out_transferdata) != NPY_SUCCEED) { + Py_DECREF(str_dtype); + PyArray_FreeStridedTransferData(castdata); + PyArray_FreeStridedTransferData(todata); + PyArray_FreeStridedTransferData(fromdata); + return NPY_FAIL; + } + + Py_DECREF(str_dtype); + + return NPY_SUCCEED; } static int @@ -1195,12 +1320,12 @@ get_nbo_cast_transfer_function(int aligned, out_stransfer, out_transferdata); case NPY_UNICODE: - *out_needs_api = 1; return get_datetime_to_unicode_transfer_function( aligned, src_stride, dst_stride, src_dtype, dst_dtype, - out_stransfer, out_transferdata); + out_stransfer, out_transferdata, + out_needs_api); } } else if (dst_dtype->type_num == NPY_DATETIME) { @@ -1215,12 +1340,12 @@ get_nbo_cast_transfer_function(int aligned, out_stransfer, out_transferdata); case NPY_UNICODE: - *out_needs_api = 1; return get_unicode_to_datetime_transfer_function( aligned, src_stride, dst_stride, src_dtype, dst_dtype, - out_stransfer, out_transferdata); + out_stransfer, out_transferdata, + out_needs_api); } } } @@ -3328,6 +3453,7 @@ PyArray_GetDTypeTransferFunction(int aligned, PyTypeNum_ISNUMBER(dst_type_num) && PyArray_ISNBO(src_dtype->byteorder) && PyArray_ISNBO(dst_dtype->byteorder)) { + if (PyArray_EquivTypenums(src_type_num, dst_type_num)) { *out_stransfer = PyArray_GetStridedCopyFn(aligned, src_stride, dst_stride, diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index af53df435..2cd4487ac 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -833,19 +833,19 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds) PyArrayObject *ret; /* If the requested dtype is flexible, adapt it */ - PyArray_AdaptFlexibleType((PyObject *)self, PyArray_DESCR(self), + PyArray_AdaptFlexibleDType((PyObject *)self, PyArray_DESCR(self), &dtype); if (dtype == NULL) { return NULL; } - + /* This steals the reference to dtype, so no DECREF of dtype */ ret = (PyArrayObject *)PyArray_NewLikeArray( self, order, dtype, subok); - if (ret == NULL) { return NULL; } + if (PyArray_CopyInto(ret, self) < 0) { Py_DECREF(ret); return NULL; diff --git a/numpy/core/src/multiarray/nditer.c.src b/numpy/core/src/multiarray/nditer.c.src index d8de7b5c5..dfc0baa6f 100644 --- a/numpy/core/src/multiarray/nditer.c.src +++ b/numpy/core/src/multiarray/nditer.c.src @@ -3146,7 +3146,7 @@ npyiter_prepare_one_operand(PyArrayObject **op, /* We just have a borrowed reference to op_request_dtype */ Py_INCREF(op_request_dtype); /* If the requested dtype is flexible, adapt it */ - PyArray_AdaptFlexibleType((PyObject *)(*op), PyArray_DESCR(*op), + PyArray_AdaptFlexibleDType((PyObject *)(*op), PyArray_DESCR(*op), &op_request_dtype); if (op_request_dtype == NULL) { return 0; diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index a6dca9714..f97592370 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -446,7 +446,7 @@ class TestDateTime(TestCase): def test_datetime_string_conversion(self): a = ['2011-03-16', '1920-01-01', '2013-05-19'] - str_a = np.array(a, dtype='S0') + str_a = np.array(a, dtype='S') dt_a = np.array(a, dtype='M') str_b = np.empty_like(str_a) dt_b = np.empty_like(dt_a) @@ -461,6 +461,20 @@ class TestDateTime(TestCase): str_b[...] = dt_a assert_equal(str_a, str_b) + # Convert the 'S' to 'U' + str_a = str_a.astype('U') + str_b = str_b.astype('U') + + # Unicode to datetime + assert_equal(dt_a, str_a.astype('M')) + assert_equal(dt_a.dtype, str_a.astype('M').dtype) + dt_b[...] = str_a + assert_equal(dt_a, dt_b) + # Datetime to unicode + assert_equal(str_a, dt_a.astype('U')) + str_b[...] = dt_a + assert_equal(str_a, str_b) + def test_pickle(self): # Check that pickle roundtripping works dt = np.dtype('M8[7D]') |