summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Wiebe <mwiebe@enthought.com>2011-06-20 19:09:29 -0500
committerMark Wiebe <mwiebe@enthought.com>2011-06-20 19:09:29 -0500
commit46c91afd4eddc3d692c5280c2a22ad0783ef8219 (patch)
tree959e64deb287a3cbdc75b81f38884257907a0420
parent9f99d86361baaf17aaf0b6d42f3835223c52c56e (diff)
downloadnumpy-46c91afd4eddc3d692c5280c2a22ad0783ef8219.tar.gz
ENH: datetime-strings: Support casting to/from unicode arrays
-rw-r--r--numpy/core/src/multiarray/convert_datatype.c54
-rw-r--r--numpy/core/src/multiarray/convert_datatype.h2
-rw-r--r--numpy/core/src/multiarray/ctors.c2
-rw-r--r--numpy/core/src/multiarray/datetime.c6
-rw-r--r--numpy/core/src/multiarray/dtype_transfer.c168
-rw-r--r--numpy/core/src/multiarray/methods.c6
-rw-r--r--numpy/core/src/multiarray/nditer.c.src2
-rw-r--r--numpy/core/tests/test_datetime.py16
8 files changed, 213 insertions, 43 deletions
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index 68029f17d..36121767a 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -34,7 +34,7 @@ PyArray_CastToType(PyArrayObject *arr, PyArray_Descr *dtype, int fortran)
PyObject *out;
/* If the requested dtype is flexible, adapt it */
- PyArray_AdaptFlexibleType((PyObject *)arr, PyArray_DESCR(arr), &dtype);
+ PyArray_AdaptFlexibleDType((PyObject *)arr, PyArray_DESCR(arr), &dtype);
if (dtype == NULL) {
return NULL;
}
@@ -123,14 +123,29 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num)
* a new dtype that has been adapted based on the values in data_dtype
* and data_obj. If the flex_dtype is not flexible, it leaves it as is.
*
+ * Usually, if data_obj is not an array, dtype should be the result
+ * given by the PyArray_GetArrayParamsFromObject function.
+ *
+ * The data_obj may be NULL if just a dtype is is known for the source.
+ *
+ * If *flex_dtype is NULL, returns immediately, without setting an
+ * exception. This basically assumes an error was already set previously.
+ *
* The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID,
* and NPY_DATETIME with generic units.
*/
NPY_NO_EXPORT void
-PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype,
+PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
PyArray_Descr **flex_dtype)
{
PyArray_DatetimeMetaData *meta;
+ int flex_type_num;
+
+ if (*flex_dtype == NULL) {
+ return;
+ }
+
+ flex_type_num = (*flex_dtype)->type_num;
/* Flexible types with expandable size */
if ((*flex_dtype)->elsize == 0) {
@@ -140,8 +155,8 @@ PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype,
return;
}
- if (data_dtype->type_num == (*flex_dtype)->type_num ||
- (*flex_dtype)->type_num == NPY_VOID) {
+ if (data_dtype->type_num == flex_type_num ||
+ flex_type_num == NPY_VOID) {
(*flex_dtype)->elsize = data_dtype->elsize;
}
else {
@@ -217,17 +232,17 @@ PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype,
break;
}
- if ((*flex_dtype)->type_num == NPY_STRING) {
+ if (flex_type_num == NPY_STRING) {
(*flex_dtype)->elsize = size;
}
- else if ((*flex_dtype)->type_num == NPY_UNICODE) {
+ else if (flex_type_num == NPY_UNICODE) {
(*flex_dtype)->elsize = size * 4;
}
}
}
/* Flexible type with generic time unit that adapts */
- else if ((*flex_dtype)->type_num == NPY_DATETIME ||
- (*flex_dtype)->type_num == NPY_TIMEDELTA) {
+ else if (flex_type_num == NPY_DATETIME ||
+ flex_type_num == NPY_TIMEDELTA) {
meta = get_datetime_metadata_from_dtype(*flex_dtype);
if (meta == NULL) {
Py_DECREF(*flex_dtype);
@@ -236,11 +251,24 @@ PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype,
}
if (meta->base == NPY_FR_GENERIC) {
- /* Detect the unit from the input's data */
- PyArray_Descr *dtype = find_object_datetime_type(data_obj,
- (*flex_dtype)->type_num);
- Py_DECREF(*flex_dtype);
- *flex_dtype = dtype;
+ if (data_dtype->type_num == NPY_DATETIME ||
+ data_dtype->type_num == NPY_TIMEDELTA) {
+ meta = get_datetime_metadata_from_dtype(data_dtype);
+ if (meta == NULL) {
+ Py_DECREF(*flex_dtype);
+ *flex_dtype = NULL;
+ return;
+ }
+
+ Py_DECREF(*flex_dtype);
+ *flex_dtype = create_datetime_dtype(flex_type_num, meta);
+ }
+ else if (data_obj != NULL) {
+ /* Detect the unit from the input's data */
+ Py_DECREF(*flex_dtype);
+ *flex_dtype = find_object_datetime_type(data_obj,
+ flex_type_num);
+ }
}
}
}
diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h
index 5e0f31f50..71001b1c4 100644
--- a/numpy/core/src/multiarray/convert_datatype.h
+++ b/numpy/core/src/multiarray/convert_datatype.h
@@ -22,7 +22,7 @@ PyArray_ValidType(int type);
* and NPY_DATETIME with generic units.
*/
NPY_NO_EXPORT void
-PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype,
+PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
PyArray_Descr **flex_dtype);
#endif
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index cdcf6b77a..2c68e026b 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1660,7 +1660,7 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
/* If the requested dtype is flexible, adapt it */
if (newtype != NULL) {
- PyArray_AdaptFlexibleType(op,
+ PyArray_AdaptFlexibleDType(op,
(dtype == NULL) ? PyArray_DESCR(arr) : dtype,
&newtype);
}
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index 2d2370b08..cdf99546f 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -3574,8 +3574,10 @@ find_string_array_datetime64_type(PyObject *obj,
}
/* Use unsafe casting to allow unicode -> ascii string */
- iter = NpyIter_New((PyArrayObject *)obj, NPY_ITER_READONLY|
- NPY_ITER_EXTERNAL_LOOP,
+ iter = NpyIter_New((PyArrayObject *)obj,
+ NPY_ITER_READONLY|
+ NPY_ITER_EXTERNAL_LOOP|
+ NPY_ITER_BUFFERED,
NPY_KEEPORDER, NPY_UNSAFE_CASTING,
string_dtype);
Py_DECREF(string_dtype);
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index e3431e450..445133ebf 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -20,6 +20,7 @@
#include "numpy/npy_3kcompat.h"
+#include "convert_datatype.h"
#include "_datetime.h"
#include "datetime_strings.h"
@@ -331,7 +332,8 @@ _strided_to_strided_contig_align_wrap(char *dst, npy_intp dst_stride,
PyArray_StridedTransferFn *wrapped = d->wrapped,
*tobuffer = d->tobuffer,
*frombuffer = d->frombuffer;
- npy_intp dst_itemsize = d->dst_itemsize;
+ npy_intp inner_src_itemsize = d->src_itemsize,
+ dst_itemsize = d->dst_itemsize;
void *wrappeddata = d->wrappeddata,
*todata = d->todata,
*fromdata = d->fromdata;
@@ -339,12 +341,12 @@ _strided_to_strided_contig_align_wrap(char *dst, npy_intp dst_stride,
for(;;) {
if (N > NPY_LOWLEVEL_BUFFER_BLOCKSIZE) {
- tobuffer(bufferin, src_itemsize, src, src_stride,
+ tobuffer(bufferin, inner_src_itemsize, src, src_stride,
NPY_LOWLEVEL_BUFFER_BLOCKSIZE,
src_itemsize, todata);
- wrapped(bufferout, dst_itemsize, bufferin, src_itemsize,
+ wrapped(bufferout, dst_itemsize, bufferin, inner_src_itemsize,
NPY_LOWLEVEL_BUFFER_BLOCKSIZE,
- src_itemsize, wrappeddata);
+ inner_src_itemsize, wrappeddata);
frombuffer(dst, dst_stride, bufferout, dst_itemsize,
NPY_LOWLEVEL_BUFFER_BLOCKSIZE,
dst_itemsize, fromdata);
@@ -353,10 +355,10 @@ _strided_to_strided_contig_align_wrap(char *dst, npy_intp dst_stride,
dst += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_stride;
}
else {
- tobuffer(bufferin, src_itemsize, src, src_stride, N,
+ tobuffer(bufferin, inner_src_itemsize, src, src_stride, N,
src_itemsize, todata);
- wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, N,
- src_itemsize, wrappeddata);
+ wrapped(bufferout, dst_itemsize, bufferin, inner_src_itemsize, N,
+ inner_src_itemsize, wrappeddata);
frombuffer(dst, dst_stride, bufferout, dst_itemsize, N,
dst_itemsize, fromdata);
return;
@@ -374,7 +376,8 @@ _strided_to_strided_contig_align_wrap_init_dest(char *dst, npy_intp dst_stride,
PyArray_StridedTransferFn *wrapped = d->wrapped,
*tobuffer = d->tobuffer,
*frombuffer = d->frombuffer;
- npy_intp dst_itemsize = d->dst_itemsize;
+ npy_intp inner_src_itemsize = d->src_itemsize,
+ dst_itemsize = d->dst_itemsize;
void *wrappeddata = d->wrappeddata,
*todata = d->todata,
*fromdata = d->fromdata;
@@ -382,13 +385,13 @@ _strided_to_strided_contig_align_wrap_init_dest(char *dst, npy_intp dst_stride,
for(;;) {
if (N > NPY_LOWLEVEL_BUFFER_BLOCKSIZE) {
- tobuffer(bufferin, src_itemsize, src, src_stride,
+ tobuffer(bufferin, inner_src_itemsize, src, src_stride,
NPY_LOWLEVEL_BUFFER_BLOCKSIZE,
src_itemsize, todata);
memset(bufferout, 0, dst_itemsize*NPY_LOWLEVEL_BUFFER_BLOCKSIZE);
- wrapped(bufferout, dst_itemsize, bufferin, src_itemsize,
+ wrapped(bufferout, dst_itemsize, bufferin, inner_src_itemsize,
NPY_LOWLEVEL_BUFFER_BLOCKSIZE,
- src_itemsize, wrappeddata);
+ inner_src_itemsize, wrappeddata);
frombuffer(dst, dst_stride, bufferout, dst_itemsize,
NPY_LOWLEVEL_BUFFER_BLOCKSIZE,
dst_itemsize, fromdata);
@@ -397,11 +400,11 @@ _strided_to_strided_contig_align_wrap_init_dest(char *dst, npy_intp dst_stride,
dst += NPY_LOWLEVEL_BUFFER_BLOCKSIZE*dst_stride;
}
else {
- tobuffer(bufferin, src_itemsize, src, src_stride, N,
+ tobuffer(bufferin, inner_src_itemsize, src, src_stride, N,
src_itemsize, todata);
memset(bufferout, 0, dst_itemsize*N);
- wrapped(bufferout, dst_itemsize, bufferin, src_itemsize, N,
- src_itemsize, wrappeddata);
+ wrapped(bufferout, dst_itemsize, bufferin, inner_src_itemsize, N,
+ inner_src_itemsize, wrappeddata);
frombuffer(dst, dst_stride, bufferout, dst_itemsize, N,
dst_itemsize, fromdata);
return;
@@ -1069,9 +1072,69 @@ get_datetime_to_unicode_transfer_function(int aligned,
npy_intp src_stride, npy_intp dst_stride,
PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
PyArray_StridedTransferFn **out_stransfer,
- void **out_transferdata)
+ void **out_transferdata,
+ int *out_needs_api)
{
-
+ void *castdata = NULL, *todata = NULL, *fromdata = NULL;
+ PyArray_StridedTransferFn *caststransfer, *tobuffer, *frombuffer;
+ PyArray_Descr *str_dtype;
+
+ /* Get an ASCII string data type, adapted to match the UNICODE one */
+ str_dtype = PyArray_DescrFromType(NPY_STRING);
+ PyArray_AdaptFlexibleDType(NULL, dst_dtype, &str_dtype);
+ if (str_dtype == NULL) {
+ return NPY_FAIL;
+ }
+
+ /* Get the copy/swap operation to dst */
+ if (PyArray_GetDTypeCopySwapFn(aligned,
+ src_stride, src_dtype->elsize,
+ src_dtype,
+ &tobuffer, &todata) != NPY_SUCCEED) {
+ Py_DECREF(str_dtype);
+ return NPY_FAIL;
+ }
+
+ /* Get the NBO datetime to string aligned contig function */
+ if (get_nbo_datetime_to_string_transfer_function(1,
+ src_dtype->elsize, str_dtype->elsize,
+ src_dtype, str_dtype,
+ &caststransfer, &castdata) != NPY_SUCCEED) {
+ Py_DECREF(str_dtype);
+ PyArray_FreeStridedTransferData(todata);
+ return NPY_FAIL;
+ }
+
+ /* Get the cast operation to dst */
+ if (PyArray_GetDTypeTransferFunction(aligned,
+ str_dtype->elsize, dst_stride,
+ str_dtype, dst_dtype,
+ 0,
+ &frombuffer, &fromdata,
+ out_needs_api) != NPY_SUCCEED) {
+ Py_DECREF(str_dtype);
+ PyArray_FreeStridedTransferData(todata);
+ PyArray_FreeStridedTransferData(castdata);
+ return NPY_FAIL;
+ }
+
+ /* Wrap it all up in a new transfer function + data */
+ if (wrap_aligned_contig_transfer_function(
+ src_dtype->elsize, str_dtype->elsize,
+ tobuffer, todata,
+ frombuffer, fromdata,
+ caststransfer, castdata,
+ PyDataType_FLAGCHK(str_dtype, NPY_NEEDS_INIT),
+ out_stransfer, out_transferdata) != NPY_SUCCEED) {
+ PyArray_FreeStridedTransferData(castdata);
+ PyArray_FreeStridedTransferData(todata);
+ PyArray_FreeStridedTransferData(fromdata);
+ return NPY_FAIL;
+ }
+
+ Py_DECREF(str_dtype);
+
+ return NPY_SUCCEED;
}
static int
@@ -1131,8 +1194,70 @@ get_unicode_to_datetime_transfer_function(int aligned,
npy_intp src_stride, npy_intp dst_stride,
PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
PyArray_StridedTransferFn **out_stransfer,
- void **out_transferdata)
+ void **out_transferdata,
+ int *out_needs_api)
{
+ void *castdata = NULL, *todata = NULL, *fromdata = NULL;
+ PyArray_StridedTransferFn *caststransfer, *tobuffer, *frombuffer;
+ PyArray_Descr *str_dtype;
+
+ /* Get an ASCII string data type, adapted to match the UNICODE one */
+ str_dtype = PyArray_DescrFromType(NPY_STRING);
+ PyArray_AdaptFlexibleDType(NULL, src_dtype, &str_dtype);
+ if (str_dtype == NULL) {
+ return NPY_FAIL;
+ }
+
+ /* Get the cast operation from src */
+ if (PyArray_GetDTypeTransferFunction(aligned,
+ src_stride, str_dtype->elsize,
+ src_dtype, str_dtype,
+ 0,
+ &tobuffer, &todata,
+ out_needs_api) != NPY_SUCCEED) {
+ Py_DECREF(str_dtype);
+ return NPY_FAIL;
+ }
+
+ /* Get the string to NBO datetime aligned contig function */
+ if (get_nbo_string_to_datetime_transfer_function(1,
+ str_dtype->elsize, dst_dtype->elsize,
+ str_dtype, dst_dtype,
+ &caststransfer, &castdata) != NPY_SUCCEED) {
+ Py_DECREF(str_dtype);
+ PyArray_FreeStridedTransferData(todata);
+ return NPY_FAIL;
+ }
+
+ /* Get the copy/swap operation to dst */
+ if (PyArray_GetDTypeCopySwapFn(aligned,
+ dst_dtype->elsize, dst_stride,
+ dst_dtype,
+ &frombuffer, &fromdata) != NPY_SUCCEED) {
+ Py_DECREF(str_dtype);
+ PyArray_FreeStridedTransferData(todata);
+ PyArray_FreeStridedTransferData(castdata);
+ return NPY_FAIL;
+ }
+
+ /* Wrap it all up in a new transfer function + data */
+ if (wrap_aligned_contig_transfer_function(
+ str_dtype->elsize, dst_dtype->elsize,
+ tobuffer, todata,
+ frombuffer, fromdata,
+ caststransfer, castdata,
+ PyDataType_FLAGCHK(dst_dtype, NPY_NEEDS_INIT),
+ out_stransfer, out_transferdata) != NPY_SUCCEED) {
+ Py_DECREF(str_dtype);
+ PyArray_FreeStridedTransferData(castdata);
+ PyArray_FreeStridedTransferData(todata);
+ PyArray_FreeStridedTransferData(fromdata);
+ return NPY_FAIL;
+ }
+
+ Py_DECREF(str_dtype);
+
+ return NPY_SUCCEED;
}
static int
@@ -1195,12 +1320,12 @@ get_nbo_cast_transfer_function(int aligned,
out_stransfer, out_transferdata);
case NPY_UNICODE:
- *out_needs_api = 1;
return get_datetime_to_unicode_transfer_function(
aligned,
src_stride, dst_stride,
src_dtype, dst_dtype,
- out_stransfer, out_transferdata);
+ out_stransfer, out_transferdata,
+ out_needs_api);
}
}
else if (dst_dtype->type_num == NPY_DATETIME) {
@@ -1215,12 +1340,12 @@ get_nbo_cast_transfer_function(int aligned,
out_stransfer, out_transferdata);
case NPY_UNICODE:
- *out_needs_api = 1;
return get_unicode_to_datetime_transfer_function(
aligned,
src_stride, dst_stride,
src_dtype, dst_dtype,
- out_stransfer, out_transferdata);
+ out_stransfer, out_transferdata,
+ out_needs_api);
}
}
}
@@ -3328,6 +3453,7 @@ PyArray_GetDTypeTransferFunction(int aligned,
PyTypeNum_ISNUMBER(dst_type_num) &&
PyArray_ISNBO(src_dtype->byteorder) &&
PyArray_ISNBO(dst_dtype->byteorder)) {
+
if (PyArray_EquivTypenums(src_type_num, dst_type_num)) {
*out_stransfer = PyArray_GetStridedCopyFn(aligned,
src_stride, dst_stride,
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index af53df435..2cd4487ac 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -833,19 +833,19 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds)
PyArrayObject *ret;
/* If the requested dtype is flexible, adapt it */
- PyArray_AdaptFlexibleType((PyObject *)self, PyArray_DESCR(self),
+ PyArray_AdaptFlexibleDType((PyObject *)self, PyArray_DESCR(self),
&dtype);
if (dtype == NULL) {
return NULL;
}
-
+
/* This steals the reference to dtype, so no DECREF of dtype */
ret = (PyArrayObject *)PyArray_NewLikeArray(
self, order, dtype, subok);
-
if (ret == NULL) {
return NULL;
}
+
if (PyArray_CopyInto(ret, self) < 0) {
Py_DECREF(ret);
return NULL;
diff --git a/numpy/core/src/multiarray/nditer.c.src b/numpy/core/src/multiarray/nditer.c.src
index d8de7b5c5..dfc0baa6f 100644
--- a/numpy/core/src/multiarray/nditer.c.src
+++ b/numpy/core/src/multiarray/nditer.c.src
@@ -3146,7 +3146,7 @@ npyiter_prepare_one_operand(PyArrayObject **op,
/* We just have a borrowed reference to op_request_dtype */
Py_INCREF(op_request_dtype);
/* If the requested dtype is flexible, adapt it */
- PyArray_AdaptFlexibleType((PyObject *)(*op), PyArray_DESCR(*op),
+ PyArray_AdaptFlexibleDType((PyObject *)(*op), PyArray_DESCR(*op),
&op_request_dtype);
if (op_request_dtype == NULL) {
return 0;
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index a6dca9714..f97592370 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -446,7 +446,7 @@ class TestDateTime(TestCase):
def test_datetime_string_conversion(self):
a = ['2011-03-16', '1920-01-01', '2013-05-19']
- str_a = np.array(a, dtype='S0')
+ str_a = np.array(a, dtype='S')
dt_a = np.array(a, dtype='M')
str_b = np.empty_like(str_a)
dt_b = np.empty_like(dt_a)
@@ -461,6 +461,20 @@ class TestDateTime(TestCase):
str_b[...] = dt_a
assert_equal(str_a, str_b)
+ # Convert the 'S' to 'U'
+ str_a = str_a.astype('U')
+ str_b = str_b.astype('U')
+
+ # Unicode to datetime
+ assert_equal(dt_a, str_a.astype('M'))
+ assert_equal(dt_a.dtype, str_a.astype('M').dtype)
+ dt_b[...] = str_a
+ assert_equal(dt_a, dt_b)
+ # Datetime to unicode
+ assert_equal(str_a, dt_a.astype('U'))
+ str_b[...] = dt_a
+ assert_equal(str_a, str_b)
+
def test_pickle(self):
# Check that pickle roundtripping works
dt = np.dtype('M8[7D]')