diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/add_newdocs.py | 28 | ||||
-rw-r--r-- | numpy/core/src/multiarray/convert_datatype.c | 203 | ||||
-rw-r--r-- | numpy/core/tests/test_api.py | 6 | ||||
-rw-r--r-- | numpy/core/tests/test_numeric.py | 91 | ||||
-rw-r--r-- | numpy/core/tests/test_shape_base.py | 2 |
5 files changed, 285 insertions, 45 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index be343f79d..6934cadcc 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -1598,6 +1598,14 @@ add_newdoc('numpy.core.multiarray', 'can_cast', out : bool True if cast can occur according to the casting rule. + Notes + ----- + Starting in NumPy 1.9, can_cast function now returns False in 'safe' + casting mode for integer/float dtype and string dtype if the string dtype + length is not long enough to store the max integer/float value converted + to a string. Previously can_cast in 'safe' mode returned True for + integer/float dtype and a string dtype of any length. + See also -------- dtype, result_type @@ -1618,7 +1626,7 @@ add_newdoc('numpy.core.multiarray', 'can_cast', >>> np.can_cast('i8', 'f4') False >>> np.can_cast('i4', 'S4') - True + False Casting scalars @@ -1693,6 +1701,11 @@ add_newdoc('numpy.core.multiarray', 'promote_types', Notes ----- .. versionadded:: 1.6.0 + Starting in NumPy 1.9, promote_types function now returns a valid string + length when given an integer or float dtype as one argument and a string + dtype as another argument. Previously it always returned the input string + dtype, even if it wasn't long enough to store the max integer/float value + converted to a string. See Also -------- @@ -1709,10 +1722,8 @@ add_newdoc('numpy.core.multiarray', 'promote_types', >>> np.promote_types('>i8', '<c8') dtype('complex128') - >>> np.promote_types('i1', 'S8') - Traceback (most recent call last): - File "<stdin>", line 1, in <module> - TypeError: invalid type promotion + >>> np.promote_types('i4', 'S8') + dtype('S11') """) @@ -3126,6 +3137,13 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('astype', is a new array of the same shape as the input array, with dtype, order given by `dtype`, `order`. + Notes + ----- + Starting in NumPy 1.9, astype method now returns an error if the string + dtype to cast to is not long enough in 'safe' casting mode to hold the max + value of integer/float array that is being casted. Previously the casting + was allowed even if the result was truncated. + Raises ------ ComplexWarning diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index 0f6c11092..b58d8e9fb 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -20,6 +20,18 @@ #include "_datetime.h" #include "datetime_strings.h" + +/* + * Required length of string when converting from unsigned integer type. + * Array index is integer size in bytes. + * - 3 chars needed for cast to max value of 255 or 127 + * - 5 chars needed for cast to max value of 65535 or 32767 + * - 10 chars needed for cast to max value of 4294967295 or 2147483647 + * - 20 chars needed for cast to max value of 18446744073709551615 + * or 9223372036854775807 + */ +NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[] = {0, 3, 5, 10, 10, 20, 20, 20, 20}; + /*NUMPY_API * For backward compatibility * @@ -166,7 +178,7 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, flex_type_num == NPY_VOID) { (*flex_dtype)->elsize = data_dtype->elsize; } - else { + else if (flex_type_num == NPY_STRING || flex_type_num == NPY_UNICODE) { npy_intp size = 8; /* @@ -176,37 +188,35 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, */ switch (data_dtype->type_num) { case NPY_BOOL: - size = 8; - break; case NPY_UBYTE: - size = 8; - break; case NPY_BYTE: - size = 8; - break; case NPY_USHORT: - size = 8; - break; case NPY_SHORT: - size = 8; - break; case NPY_UINT: - size = 16; - break; case NPY_INT: - size = 16; - break; case NPY_ULONG: - size = 24; - break; case NPY_LONG: - size = 24; - break; case NPY_ULONGLONG: - size = 24; - break; case NPY_LONGLONG: - size = 24; + if (data_dtype->kind == 'b') { + /* 5 chars needed for cast to 'True' or 'False' */ + size = 5; + } + else if (data_dtype->elsize > 8 || + data_dtype->elsize < 0) { + /* + * Element size should never be greater than 8 or + * less than 0 for integer type, but just in case... + */ + break; + } + else if (data_dtype->kind == 'u') { + size = REQUIRED_STR_LEN[data_dtype->elsize]; + } + else if (data_dtype->kind == 'i') { + /* Add character for sign symbol */ + size = REQUIRED_STR_LEN[data_dtype->elsize] + 1; + } break; case NPY_HALF: case NPY_FLOAT: @@ -307,6 +317,16 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, (*flex_dtype)->elsize = size * 4; } } + else { + /* + * We should never get here, but just in case someone adds + * a new flex dtype... + */ + PyErr_SetString(PyExc_TypeError, + "don't know how to adapt flex dtype"); + *flex_dtype = NULL; + return; + } } /* Flexible type with generic time unit that adapts */ else if (flex_type_num == NPY_DATETIME || @@ -491,10 +511,53 @@ PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to) NPY_SAFE_CASTING); } /* - * TODO: If to_type_num is STRING or unicode + * If to_type_num is STRING or unicode * see if the length is long enough to hold the * stringified value of the object. */ + else if (to_type_num == NPY_STRING || to_type_num == NPY_UNICODE) { + /* + * Boolean value cast to string type is 5 characters max + * for string 'False'. + */ + int char_size = 1; + if (to_type_num == NPY_UNICODE) { + char_size = 4; + } + + ret = 0; + if (to->elsize == 0) { + ret = 1; + } + /* + * Need at least 5 characters to convert from boolean + * to 'True' or 'False'. + */ + else if (from->kind == 'b' && to->elsize >= 5 * char_size) { + ret = 1; + } + else if (from->kind == 'u') { + /* Guard against unexpected integer size */ + if (from->elsize > 8 || from->elsize < 0) { + ret = 0; + } + else if (to->elsize >= + REQUIRED_STR_LEN[from->elsize] * char_size) { + ret = 1; + } + } + else if (from->kind == 'i') { + /* Guard against unexpected integer size */ + if (from->elsize > 8 || from->elsize < 0) { + ret = 0; + } + /* Extra character needed for sign */ + else if (to->elsize >= + (REQUIRED_STR_LEN[from->elsize] + 1) * char_size) { + ret = 1; + } + } + } } return ret; } @@ -1019,12 +1082,25 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) switch (type_num1) { /* BOOL can convert to anything except datetime/void */ case NPY_BOOL: - if (type_num2 != NPY_DATETIME && type_num2 != NPY_VOID) { + if (type_num2 == NPY_STRING || type_num2 == NPY_UNICODE) { + int char_size = 1; + if (type_num2 == NPY_UNICODE) { + char_size = 4; + } + if (type2->elsize < 5 * char_size) { + PyArray_Descr *ret = NULL; + PyArray_Descr *temp = PyArray_DescrNew(type2); + ret = ensure_dtype_nbo(temp); + ret->elsize = 5 * char_size; + Py_DECREF(temp); + return ret; + } return ensure_dtype_nbo(type2); } - else { - break; + else if (type_num2 != NPY_DATETIME && type_num2 != NPY_VOID) { + return ensure_dtype_nbo(type2); } + break; /* For strings and unicodes, take the larger size */ case NPY_STRING: if (type_num2 == NPY_STRING) { @@ -1050,8 +1126,20 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) } /* Allow NUMBER -> STRING */ else if (PyTypeNum_ISNUMBER(type_num2)) { - return ensure_dtype_nbo(type1); + PyArray_Descr *ret = NULL; + PyArray_Descr *temp = PyArray_DescrNew(type1); + temp->elsize = 0; + PyArray_AdaptFlexibleDType(NULL, type2, &temp); + if (temp->elsize > type1->elsize) { + ret = ensure_dtype_nbo(temp); + } + else { + ret = ensure_dtype_nbo(type1); + } + Py_DECREF(temp); + return ret; } + break; case NPY_UNICODE: if (type_num2 == NPY_UNICODE) { if (type1->elsize > type2->elsize) { @@ -1076,7 +1164,18 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) } /* Allow NUMBER -> UNICODE */ else if (PyTypeNum_ISNUMBER(type_num2)) { - return ensure_dtype_nbo(type1); + PyArray_Descr *ret = NULL; + PyArray_Descr *temp = PyArray_DescrNew(type1); + temp->elsize = 0; + PyArray_AdaptFlexibleDType(NULL, type2, &temp); + if (temp->elsize > type1->elsize) { + ret = ensure_dtype_nbo(temp); + } + else { + ret = ensure_dtype_nbo(type1); + } + Py_DECREF(temp); + return ret; } break; case NPY_DATETIME: @@ -1090,22 +1189,58 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) switch (type_num2) { /* BOOL can convert to almost anything */ case NPY_BOOL: - if (type_num1 != NPY_DATETIME && type_num1 != NPY_TIMEDELTA && + if (type_num2 == NPY_STRING || type_num2 == NPY_UNICODE) { + int char_size = 1; + if (type_num2 == NPY_UNICODE) { + char_size = 4; + } + if (type2->elsize < 5 * char_size) { + PyArray_Descr *ret = NULL; + PyArray_Descr *temp = PyArray_DescrNew(type2); + ret = ensure_dtype_nbo(temp); + ret->elsize = 5 * char_size; + Py_DECREF(temp); + return ret; + } + return ensure_dtype_nbo(type2); + } + else if (type_num1 != NPY_DATETIME && type_num1 != NPY_TIMEDELTA && type_num1 != NPY_VOID) { return ensure_dtype_nbo(type1); } - else { - break; - } + break; case NPY_STRING: /* Allow NUMBER -> STRING */ if (PyTypeNum_ISNUMBER(type_num1)) { - return ensure_dtype_nbo(type2); + PyArray_Descr *ret = NULL; + PyArray_Descr *temp = PyArray_DescrNew(type2); + temp->elsize = 0; + PyArray_AdaptFlexibleDType(NULL, type1, &temp); + if (temp->elsize > type2->elsize) { + ret = ensure_dtype_nbo(temp); + } + else { + ret = ensure_dtype_nbo(type2); + } + Py_DECREF(temp); + return ret; } + break; case NPY_UNICODE: /* Allow NUMBER -> UNICODE */ if (PyTypeNum_ISNUMBER(type_num1)) { - return ensure_dtype_nbo(type2); + PyArray_Descr *ret = NULL; + PyArray_Descr *temp = PyArray_DescrNew(type2); + temp->elsize = 0; + PyArray_AdaptFlexibleDType(NULL, type1, &temp); + if (temp->elsize > type2->elsize) { + ret = ensure_dtype_nbo(temp); + } + else { + ret = ensure_dtype_nbo(type2); + } + Py_DECREF(temp); + return ret; } break; case NPY_TIMEDELTA: diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py index a1a3f896c..2fd6463c7 100644 --- a/numpy/core/tests/test_api.py +++ b/numpy/core/tests/test_api.py @@ -278,6 +278,12 @@ def test_array_astype(): b = np.ndarray(buffer=a, dtype='uint32', shape=2) assert_(b.size == 2) + a = np.array([1000], dtype='i4') + assert_raises(TypeError, a.astype, 'S1', casting='safe') + + a = np.array(1000, dtype='i4') + assert_raises(TypeError, a.astype, 'U1', casting='safe') + def test_copyto_fromscalar(): a = np.arange(6, dtype='f4').reshape(2, 3) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 12a39a522..2a698d1c2 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -686,10 +686,10 @@ class TestTypes(TestCase): assert_equal(np.promote_types('<i8', '<i8'), np.dtype('i8')) assert_equal(np.promote_types('>i8', '>i8'), np.dtype('i8')) - assert_equal(np.promote_types('>i8', '>U16'), np.dtype('U16')) - assert_equal(np.promote_types('<i8', '<U16'), np.dtype('U16')) - assert_equal(np.promote_types('>U16', '>i8'), np.dtype('U16')) - assert_equal(np.promote_types('<U16', '<i8'), np.dtype('U16')) + assert_equal(np.promote_types('>i8', '>U16'), np.dtype('U21')) + assert_equal(np.promote_types('<i8', '<U16'), np.dtype('U21')) + assert_equal(np.promote_types('>U16', '>i8'), np.dtype('U21')) + assert_equal(np.promote_types('<U16', '<i8'), np.dtype('U21')) assert_equal(np.promote_types('<S5', '<U8'), np.dtype('U8')) assert_equal(np.promote_types('>S5', '>U8'), np.dtype('U8')) @@ -703,6 +703,39 @@ class TestTypes(TestCase): assert_equal(np.promote_types('<m8', '<m8'), np.dtype('m8')) assert_equal(np.promote_types('>m8', '>m8'), np.dtype('m8')) + def test_promote_types_strings(self): + assert_equal(np.promote_types('bool', 'S'), np.dtype('S5')) + assert_equal(np.promote_types('b', 'S'), np.dtype('S4')) + assert_equal(np.promote_types('u1', 'S'), np.dtype('S3')) + assert_equal(np.promote_types('u2', 'S'), np.dtype('S5')) + assert_equal(np.promote_types('u4', 'S'), np.dtype('S10')) + assert_equal(np.promote_types('u8', 'S'), np.dtype('S20')) + assert_equal(np.promote_types('i1', 'S'), np.dtype('S4')) + assert_equal(np.promote_types('i2', 'S'), np.dtype('S6')) + assert_equal(np.promote_types('i4', 'S'), np.dtype('S11')) + assert_equal(np.promote_types('i8', 'S'), np.dtype('S21')) + assert_equal(np.promote_types('bool', 'U'), np.dtype('U5')) + assert_equal(np.promote_types('b', 'U'), np.dtype('U4')) + assert_equal(np.promote_types('u1', 'U'), np.dtype('U3')) + assert_equal(np.promote_types('u2', 'U'), np.dtype('U5')) + assert_equal(np.promote_types('u4', 'U'), np.dtype('U10')) + assert_equal(np.promote_types('u8', 'U'), np.dtype('U20')) + assert_equal(np.promote_types('i1', 'U'), np.dtype('U4')) + assert_equal(np.promote_types('i2', 'U'), np.dtype('U6')) + assert_equal(np.promote_types('i4', 'U'), np.dtype('U11')) + assert_equal(np.promote_types('i8', 'U'), np.dtype('U21')) + assert_equal(np.promote_types('bool', 'S1'), np.dtype('S5')) + assert_equal(np.promote_types('bool', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('b', 'S1'), np.dtype('S4')) + assert_equal(np.promote_types('b', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('u1', 'S1'), np.dtype('S3')) + assert_equal(np.promote_types('u1', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('u2', 'S1'), np.dtype('S5')) + assert_equal(np.promote_types('u2', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('u4', 'S1'), np.dtype('S10')) + assert_equal(np.promote_types('u4', 'S30'), np.dtype('S30')) + assert_equal(np.promote_types('u8', 'S1'), np.dtype('S20')) + assert_equal(np.promote_types('u8', 'S30'), np.dtype('S30')) def test_can_cast(self): assert_(np.can_cast(np.int32, np.int64)) @@ -711,7 +744,7 @@ class TestTypes(TestCase): assert_(np.can_cast('i8', 'f8')) assert_(not np.can_cast('i8', 'f4')) - assert_(np.can_cast('i4', 'S4')) + assert_(np.can_cast('i4', 'S11')) assert_(np.can_cast('i8', 'i8', 'no')) assert_(not np.can_cast('<i8', '>i8', 'no')) @@ -727,6 +760,54 @@ class TestTypes(TestCase): assert_(np.can_cast('<i8', '>u4', 'unsafe')) + assert_(np.can_cast('bool', 'S5')) + assert_(not np.can_cast('bool', 'S4')) + + assert_(np.can_cast('b', 'S4')) + assert_(not np.can_cast('b', 'S3')) + + assert_(np.can_cast('u1', 'S3')) + assert_(not np.can_cast('u1', 'S2')) + assert_(np.can_cast('u2', 'S5')) + assert_(not np.can_cast('u2', 'S4')) + assert_(np.can_cast('u4', 'S10')) + assert_(not np.can_cast('u4', 'S9')) + assert_(np.can_cast('u8', 'S20')) + assert_(not np.can_cast('u8', 'S19')) + + assert_(np.can_cast('i1', 'S4')) + assert_(not np.can_cast('i1', 'S3')) + assert_(np.can_cast('i2', 'S6')) + assert_(not np.can_cast('i2', 'S5')) + assert_(np.can_cast('i4', 'S11')) + assert_(not np.can_cast('i4', 'S10')) + assert_(np.can_cast('i8', 'S21')) + assert_(not np.can_cast('i8', 'S20')) + + assert_(np.can_cast('bool', 'S5')) + assert_(not np.can_cast('bool', 'S4')) + + assert_(np.can_cast('b', 'U4')) + assert_(not np.can_cast('b', 'U3')) + + assert_(np.can_cast('u1', 'U3')) + assert_(not np.can_cast('u1', 'U2')) + assert_(np.can_cast('u2', 'U5')) + assert_(not np.can_cast('u2', 'U4')) + assert_(np.can_cast('u4', 'U10')) + assert_(not np.can_cast('u4', 'U9')) + assert_(np.can_cast('u8', 'U20')) + assert_(not np.can_cast('u8', 'U19')) + + assert_(np.can_cast('i1', 'U4')) + assert_(not np.can_cast('i1', 'U3')) + assert_(np.can_cast('i2', 'U6')) + assert_(not np.can_cast('i2', 'U5')) + assert_(np.can_cast('i4', 'U11')) + assert_(not np.can_cast('i4', 'U10')) + assert_(np.can_cast('i8', 'U21')) + assert_(not np.can_cast('i8', 'U20')) + assert_raises(TypeError, np.can_cast, 'i4', None) assert_raises(TypeError, np.can_cast, None, 'i4') diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py index e94e964b2..f1f5311c9 100644 --- a/numpy/core/tests/test_shape_base.py +++ b/numpy/core/tests/test_shape_base.py @@ -183,7 +183,7 @@ def test_concatenate_axis_None(): assert_equal(r.size, a.size + len(b)) assert_equal(r.dtype, a.dtype) r = np.concatenate((a, b, c), axis=None) - d = array(['0', '1', '2', '3', + d = array(['0.0', '1.0', '2.0', '3.0', '0', '1', '2', 'x']) assert_array_equal(r, d) |