diff options
author | Jay Bourque <jay.bourque@continuum.io> | 2013-12-20 12:08:53 -0600 |
---|---|---|
committer | Jay Bourque <jay.bourque@continuum.io> | 2014-02-25 14:53:28 -0600 |
commit | 1f9d4d2613d7c8bccf7e16720e8d0fa87e74e34a (patch) | |
tree | 76f7d671d3a593849127f74d53b6beed6ae3911c /numpy | |
parent | 56eb28ed29573d644696743804decf3a8d3260fc (diff) | |
download | numpy-1f9d4d2613d7c8bccf7e16720e8d0fa87e74e34a.tar.gz |
BUG: Fix promote_types, can_cast, as astype issues
- promote_types does not return correct string size for integer and string arguments. Fix so that integer and string types are promoted to string type that is long enough to hold integer type safely cast to string.
- can_cast incorrectly returns True for certain integer and string types. Fix so that can_cast only returns True if string type is long enough to hold integer type safely cast to string.
- calling astype to convert integer to string should fail if string type is not long enough to hold integer converted to string and casting argument is set to "safe".
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/add_newdocs.py | 28 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert_datatype.c | 203 | ||||
-rw-r--r-- | numpy/core/tests/test_api.py | 6 | ||||
-rw-r--r-- | numpy/core/tests/test_numeric.py | 91 | ||||
-rw-r--r-- | numpy/core/tests/test_shape_base.py | 2 |
5 files changed, 285 insertions, 45 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index be343f79d..6934cadcc 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -1598,6 +1598,14 @@ add_newdoc('numpy.core.multiarray', 'can_cast', out : bool True if cast can occur according to the casting rule. + Notes + ----- + Starting in NumPy 1.9, can_cast function now returns False in 'safe' + casting mode for integer/float dtype and string dtype if the string dtype + length is not long enough to store the max integer/float value converted + to a string. Previously can_cast in 'safe' mode returned True for + integer/float dtype and a string dtype of any length. + See also -------- dtype, result_type @@ -1618,7 +1626,7 @@ add_newdoc('numpy.core.multiarray', 'can_cast', >>> np.can_cast('i8', 'f4') False >>> np.can_cast('i4', 'S4') - True + False Casting scalars @@ -1693,6 +1701,11 @@ add_newdoc('numpy.core.multiarray', 'promote_types', Notes ----- .. versionadded:: 1.6.0 + Starting in NumPy 1.9, promote_types function now returns a valid string + length when given an integer or float dtype as one argument and a string + dtype as another argument. Previously it always returned the input string + dtype, even if it wasn't long enough to store the max integer/float value + converted to a string. See Also -------- @@ -1709,10 +1722,8 @@ add_newdoc('numpy.core.multiarray', 'promote_types', >>> np.promote_types('>i8', '<c8') dtype('complex128') - >>> np.promote_types('i1', 'S8') - Traceback (most recent call last): - File "<stdin>", line 1, in <module> - TypeError: invalid type promotion + >>> np.promote_types('i4', 'S8') + dtype('S11') """) @@ -3126,6 +3137,13 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('astype', is a new array of the same shape as the input array, with dtype, order given by `dtype`, `order`. + Notes + ----- + Starting in NumPy 1.9, astype method now returns an error if the string + dtype to cast to is not long enough in 'safe' casting mode to hold the max + value of integer/float array that is being casted. Previously the casting + was allowed even if the result was truncated. + Raises ------ ComplexWarning diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index 0f6c11092..b58d8e9fb 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -20,6 +20,18 @@ #include "_datetime.h" #include "datetime_strings.h" + +/* + * Required length of string when converting from unsigned integer type. + * Array index is integer size in bytes. + * - 3 chars needed for cast to max value of 255 or 127 + * - 5 chars needed for cast to max value of 65535 or 32767 + * - 10 chars needed for cast to max value of 4294967295 or 2147483647 + * - 20 chars needed for cast to max value of 18446744073709551615 + * or 9223372036854775807 + */ +NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[] = {0, 3, 5, 10, 10, 20, 20, 20, 20}; + /*NUMPY_API * For backward compatibility * @@ -166,7 +178,7 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, flex_type_num == NPY_VOID) { (*flex_dtype)->elsize = data_dtype->elsize; } - else { + else if (flex_type_num == NPY_STRING || flex_type_num == NPY_UNICODE) { npy_intp size = 8; /* @@ -176,37 +188,35 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, */ switch (data_dtype->type_num) { case NPY_BOOL: - size = 8; - break; case NPY_UBYTE: - size = 8; - break; case NPY_BYTE: - size = 8; - break; case NPY_USHORT: - size = 8; - break; case NPY_SHORT: - size = 8; - break; case NPY_UINT: - size = 16; - break; case NPY_INT: - size = 16; - break; case NPY_ULONG: - size = 24; - break; case NPY_LONG: - size = 24; - break; case NPY_ULONGLONG: - size = 24; - break; case NPY_LONGLONG: - size = 24; + if (data_dtype->kind == 'b') { + /* 5 chars needed for cast to 'True' or 'False' */ + size = 5; + } + else if (data_dtype->elsize > 8 || + data_dtype->elsize < 0) { + /* + * Element size should never be greater than 8 or + * less than 0 for integer type, but just in case... + */ + break; + } + else if (data_dtype->kind == 'u') { + size = REQUIRED_STR_LEN[data_dtype->elsize]; + } + else if (data_dtype->kind == 'i') { + /* Add character for sign symbol */ + size = REQUIRED_STR_LEN[data_dtype->elsize] + 1; + } break; case NPY_HALF: case NPY_FLOAT: @@ -307,6 +317,16 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, (*flex_dtype)->elsize = size * 4; } } + else { + /* + * We should never get here, but just in case someone adds + * a new flex dtype... + */ + PyErr_SetString(PyExc_TypeError, + "don't know how to adapt flex dtype"); + *flex_dtype = NULL; + return; + } } /* Flexible type with generic time unit that adapts */ else if (flex_type_num == NPY_DATETIME || @@ -491,10 +511,53 @@ PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to) NPY_SAFE_CASTING); } /* - * TODO: If to_type_num is STRING or unicode + * If to_type_num is STRING or unicode * see if the length is long enough to hold the * stringified value of the object. */ + else if (to_type_num == NPY_STRING || to_type_num == NPY_UNICODE) { + /* + * Boolean value cast to string type is 5 characters max + * for string 'False'. + */ + int char_size = 1; + if (to_type_num == NPY_UNICODE) { + char_size = 4; + } + + ret = 0; + if (to->elsize == 0) { + ret = 1; + } + /* + * Need at least 5 characters to convert from boolean + * to 'True' or 'False'. + */ + else if (from->kind == 'b' && to->elsize >= 5 * char_size) { + ret = 1; + } + else if (from->kind == 'u') { + /* Guard against unexpected integer size */ + if (from->elsize > 8 || from->elsize < 0) { + ret = 0; + } + else if (to->elsize >= + REQUIRED_STR_LEN[from->elsize] * char_size) { + ret = 1; + } + } + else if (from->kind == 'i') { + /* Guard against unexpected integer size */ + if (from->elsize > 8 || from->elsize < 0) { + ret = 0; + } + /* Extra character needed for sign */ + else if (to->elsize >= + (REQUIRED_STR_LEN[from->elsize] + 1) * char_size) { + ret = 1; + } + } + } } return ret; } @@ -1019,12 +1082,25 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) switch (type_num1) { /* BOOL can convert to anything except datetime/void */ case NPY_BOOL: - if (type_num2 != NPY_DATETIME && type_num2 != NPY_VOID) { + if (type_num2 == NPY_STRING || type_num2 == NPY_UNICODE) { + int char_size = 1; + if (type_num2 == NPY_UNICODE) { + char_size = 4; + } + if (type2->elsize < 5 * char_size) { + PyArray_Descr *ret = NULL; + PyArray_Descr *temp = PyArray_DescrNew(type2); + ret = ensure_dtype_nbo(temp); + ret->elsize = 5 * char_size; + Py_DECREF(temp); + return ret; + } return ensure_dtype_nbo(type2); } - else { - break; + else if (type_num2 != NPY_DATETIME && type_num2 != NPY_VOID) { + return ensure_dtype_nbo(type2); } + break; /* For strings and unicodes, take the larger size */ case NPY_STRING: if (type_num2 == NPY_STRING) { @@ -1050,8 +1126,20 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) } /* Allow NUMBER -> STRING */ else if (PyTypeNum_ISNUMBER(type_num2)) { - return ensure_dtype_nbo(type1); + PyArray_Descr *ret = NULL; + PyArray_Descr *temp = PyArray_DescrNew(type1); + temp->elsize = 0; + PyArray_AdaptFlexibleDType(NULL, type2, &temp); + if (temp->elsize > type1->elsize) { + ret = ensure_dtype_nbo(temp); + } + else { + ret = ensure_dtype_nbo(type1); + } + Py_DECREF(temp); + return ret; } + break; case NPY_UNICODE: if (type_num2 == NPY_UNICODE) { if (type1->elsize > type2->elsize) { @@ -1076,7 +1164,18 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) } /* Allow NUMBER -> UNICODE */ else if (PyTypeNum_ISNUMBER(type_num2)) { - return ensure_dtype_nbo(type1); + PyArray_Descr *ret = NULL; + PyArray_Descr *temp = PyArray_DescrNew(type1); + temp->elsize = 0; + PyArray_AdaptFlexibleDType(NULL, type2, &temp); + if (temp->elsize > type1->elsize) { + ret = ensure_dtype_nbo(temp); + } + else { + ret = ensure_dtype_nbo(type1); + } + Py_DECREF(temp); + return ret; } break; case NPY_DATETIME: @@ -1090,22 +1189,58 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) switch (type_num2) { /* BOOL can convert to almost anything */ case NPY_BOOL: - if (type_num1 != NPY_DATETIME && type_num1 != NPY_TIMEDELTA && + if (type_num2 == NPY_STRING || type_num2 == NPY_UNICODE) { + int char_size = 1; + if (type_num2 == NPY_UNICODE) { + char_size = 4; + } + if (type2->elsize < 5 * char_size) { + PyArray_Descr *ret = NULL; + PyArray_Descr *temp = PyArray_DescrNew(type2); + ret = ensure_dtype_nbo(temp); + ret->elsize = 5 * char_size; + Py_DECREF(temp); + return ret; + } + return ensure_dtype_nbo(type2); + } + else if (type_num1 != NPY_DATETIME && type_num1 != NPY_TIMEDELTA && type_num1 != NPY_VOID) { return ensure_dtype_nbo(type1); } - else { - break; - } + break; case NPY_STRING: /* Allow NUMBER -> STRING */ if (PyTypeNum_ISNUMBER(type_num1)) { - return ensure_dtype_nbo(type2); + PyArray_Descr *ret = NULL; + PyArray_Descr *temp = PyArray_DescrNew(type2); + temp->elsize = 0; + PyArray_AdaptFlexibleDType(NULL, type1, &temp); + if (temp->elsize > type2->elsize) { + ret = ensure_dtype_nbo(temp); + } + else { + ret = ensure_dtype_nbo(type2); + } + Py_DECREF(temp); + return ret; } + break; case NPY_UNICODE: /* Allow NUMBER -> UNICODE */ if (PyTypeNum_ISNUMBER(type_num1)) { - return ensure_dtype_nbo(type2); + PyArray_Descr *ret = NULL; + PyArray_Descr *temp = PyArray_DescrNew(type2); + temp->elsize = 0; + PyArray_AdaptFlexibleDType(NULL, type1, &temp); + if (temp->elsize > type2->elsize) { + ret = ensure_dtype_nbo(temp); + } + else { + ret = ensure_dtype_nbo(type2); + } + Py_DECREF(temp); + return ret; } break; case NPY_TIMEDELTA: diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py index a1a3f896c..2fd6463c7 100644 --- a/numpy/core/tests/test_api.py +++ b/numpy/core/tests/test_api.py @@ -278,6 +278,12 @@ def test_array_astype(): b = np.ndarray(buffer=a, dtype='uint32', shape=2) assert_(b.size == 2) + a = np.array([1000], dtype='i4') + assert_raises(TypeError, a.astype, 'S1', casting='safe') + + a = np.array(1000, dtype='i4') + assert_raises(TypeError, a.astype, 'U1', casting='safe') + def test_copyto_fromscalar(): a = np.arange(6, dtype='f4').reshape(2, 3) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 12a39a522..2a698d1c2 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -686,10 +686,10 @@ class TestTypes(TestCase): assert_equal(np.promote_types('<i8', '<i8'), np.dtype('i8')) assert_equal(np.promote_types('>i8', '>i8'), np.dtype('i8')) - assert_equal(np.promote_types('>i8', '>U16'), np.dtype('U16')) - assert_equal(np.promote_types('<i8', '<U16'), np.dtype('U16')) - assert_equal(np.promote_types('>U16', '>i8'), np.dtype('U16')) - assert_equal(np.promote_types('<U16', '<i8'), np.dtype('U16')) + assert_equal(np.promote_types('>i8', '>U16'), np.dtype('U21')) + assert_equal(np.promote_types('<i8', '<U16'), np.dtype('U21')) + assert_equal(np.promote_types('>U16', '>i8'), np.dtype('U21')) + assert_equal(np.promote_types('<U16', '<i8'), np.dtype('U21')) assert_equal(np.promote_types('<S5', '<U8'), np.dtype('U8')) assert_equal(np.promote_types('>S5', '>U8'), np.dtype('U8')) @@ -703,6 +703,39 @@ class TestTypes(TestCase): assert_equal(np.promote_types('<m8', '<m8'), np.dtype('m8')) assert_equal(np.promote_types('>m8', '>m8'), np.dtype('m8')) + def test_promote_types_strings(self): + assert_equal(np.promote_types('bool', 'S'), np.dtype('S5')) + assert_equal(np.promote_types('b', 'S'), np.dtype('S4')) + assert_equal(np.promote_types('u1', 'S'), np.dtype('S3')) + assert_equal(np.promote_types('u2', 'S'), np.dtype('S5')) + assert_equal(np.promote_types('u4', 'S'), np.dtype('S10')) + assert_equal(np.promote_types('u8', 'S'), np.dtype('S20')) + assert_equal(np.promote_types('i1', 'S'), np.dtype('S4')) + assert_equal(np.promote_types('i2', 'S'), np.dtype('S6')) + assert_equal(np.promote_types('i4', 'S'), np.dtype('S11')) + assert_equal(np.promote_types('i8', 'S'), np.dtype('S21')) + assert_equal(np.promote_types('bool', 'U'), np.dtype('U5')) + assert_equal(np.promote_types('b', 'U'), np.dtype('U4')) + assert_equal(np.promote_types('u1', 'U'), np.dtype('U3')) + assert_equal(np.promote_types('u2', 'U'), np.dtype('U5')) + assert_equal(np.promote_types('u4', 'U'), np.dtype('U10')) + assert_equal(np.promote_types('u8', 'U'), np.dtype('U20')) + assert_equal(np.promote_types('i1', 'U'), np.dtype('U4')) + assert_equal(np.promote_types('i2', 'U'), np.dtype('U6')) + assert_equal(np.promote_types('i4', 'U'), np.dtype('U11')) + assert_equal(np.promote_types('i8', 'U'), np.dtype('U21')) + assert_equal(np.promote_types('bool', 'S1'), np.dtype('S5')) + assert_equal(np.promote_types('bool', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('b', 'S1'), np.dtype('S4')) + assert_equal(np.promote_types('b', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('u1', 'S1'), np.dtype('S3')) + assert_equal(np.promote_types('u1', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('u2', 'S1'), np.dtype('S5')) + assert_equal(np.promote_types('u2', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('u4', 'S1'), np.dtype('S10')) + assert_equal(np.promote_types('u4', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('u8', 'S1'), np.dtype('S20')) + assert_equal(np.promote_types('u8', 'S30'), np.dtype('S30')) def test_can_cast(self): assert_(np.can_cast(np.int32, np.int64)) @@ -711,7 +744,7 @@ class TestTypes(TestCase): assert_(np.can_cast('i8', 'f8')) assert_(not np.can_cast('i8', 'f4')) - assert_(np.can_cast('i4', 'S4')) + assert_(np.can_cast('i4', 'S11')) assert_(np.can_cast('i8', 'i8', 'no')) assert_(not np.can_cast('<i8', '>i8', 'no')) @@ -727,6 +760,54 @@ class TestTypes(TestCase): assert_(np.can_cast('<i8', '>u4', 'unsafe')) + assert_(np.can_cast('bool', 'S5')) + assert_(not np.can_cast('bool', 'S4')) + + assert_(np.can_cast('b', 'S4')) + assert_(not np.can_cast('b', 'S3')) + + assert_(np.can_cast('u1', 'S3')) + assert_(not np.can_cast('u1', 'S2')) + assert_(np.can_cast('u2', 'S5')) + assert_(not np.can_cast('u2', 'S4')) + assert_(np.can_cast('u4', 'S10')) + assert_(not np.can_cast('u4', 'S9')) + assert_(np.can_cast('u8', 'S20')) + assert_(not np.can_cast('u8', 'S19')) + + assert_(np.can_cast('i1', 'S4')) + assert_(not np.can_cast('i1', 'S3')) + assert_(np.can_cast('i2', 'S6')) + assert_(not np.can_cast('i2', 'S5')) + assert_(np.can_cast('i4', 'S11')) + assert_(not np.can_cast('i4', 'S10')) + assert_(np.can_cast('i8', 'S21')) + assert_(not np.can_cast('i8', 'S20')) + + assert_(np.can_cast('bool', 'S5')) + assert_(not np.can_cast('bool', 'S4')) + + assert_(np.can_cast('b', 'U4')) + assert_(not np.can_cast('b', 'U3')) + + assert_(np.can_cast('u1', 'U3')) + assert_(not np.can_cast('u1', 'U2')) + assert_(np.can_cast('u2', 'U5')) + assert_(not np.can_cast('u2', 'U4')) + assert_(np.can_cast('u4', 'U10')) + assert_(not np.can_cast('u4', 'U9')) + assert_(np.can_cast('u8', 'U20')) + assert_(not np.can_cast('u8', 'U19')) + + assert_(np.can_cast('i1', 'U4')) + assert_(not np.can_cast('i1', 'U3')) + assert_(np.can_cast('i2', 'U6')) + assert_(not np.can_cast('i2', 'U5')) + assert_(np.can_cast('i4', 'U11')) + assert_(not np.can_cast('i4', 'U10')) + assert_(np.can_cast('i8', 'U21')) + assert_(not np.can_cast('i8', 'U20')) + assert_raises(TypeError, np.can_cast, 'i4', None) assert_raises(TypeError, np.can_cast, None, 'i4') diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py index e94e964b2..f1f5311c9 100644 --- a/numpy/core/tests/test_shape_base.py +++ b/numpy/core/tests/test_shape_base.py @@ -183,7 +183,7 @@ def test_concatenate_axis_None(): assert_equal(r.size, a.size + len(b)) assert_equal(r.dtype, a.dtype) r = np.concatenate((a, b, c), axis=None) - d = array(['0', '1', '2', '3', + d = array(['0.0', '1.0', '2.0', '3.0', '0', '1', '2', 'x']) assert_array_equal(r, d) |