summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/add_newdocs.py28
-rw-r--r--numpy/core/src/multiarray/convert_datatype.c203
-rw-r--r--numpy/core/tests/test_api.py6
-rw-r--r--numpy/core/tests/test_numeric.py91
-rw-r--r--numpy/core/tests/test_shape_base.py2
5 files changed, 285 insertions, 45 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py
index be343f79d..6934cadcc 100644
--- a/numpy/add_newdocs.py
+++ b/numpy/add_newdocs.py
@@ -1598,6 +1598,14 @@ add_newdoc('numpy.core.multiarray', 'can_cast',
out : bool
True if cast can occur according to the casting rule.
+ Notes
+ -----
+ Starting in NumPy 1.9, can_cast function now returns False in 'safe'
+ casting mode for integer/float dtype and string dtype if the string dtype
+ length is not long enough to store the max integer/float value converted
+ to a string. Previously can_cast in 'safe' mode returned True for
+ integer/float dtype and a string dtype of any length.
+
See also
--------
dtype, result_type
@@ -1618,7 +1626,7 @@ add_newdoc('numpy.core.multiarray', 'can_cast',
>>> np.can_cast('i8', 'f4')
False
>>> np.can_cast('i4', 'S4')
- True
+ False
Casting scalars
@@ -1693,6 +1701,11 @@ add_newdoc('numpy.core.multiarray', 'promote_types',
Notes
-----
.. versionadded:: 1.6.0
+ Starting in NumPy 1.9, promote_types function now returns a valid string
+ length when given an integer or float dtype as one argument and a string
+ dtype as another argument. Previously it always returned the input string
+ dtype, even if it wasn't long enough to store the max integer/float value
+ converted to a string.
See Also
--------
@@ -1709,10 +1722,8 @@ add_newdoc('numpy.core.multiarray', 'promote_types',
>>> np.promote_types('>i8', '<c8')
dtype('complex128')
- >>> np.promote_types('i1', 'S8')
- Traceback (most recent call last):
- File "<stdin>", line 1, in <module>
- TypeError: invalid type promotion
+ >>> np.promote_types('i4', 'S8')
+ dtype('S11')
""")
@@ -3126,6 +3137,13 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('astype',
is a new array of the same shape as the input array, with dtype, order
given by `dtype`, `order`.
+ Notes
+ -----
+ Starting in NumPy 1.9, astype method now returns an error if the string
+ dtype to cast to is not long enough in 'safe' casting mode to hold the max
+ value of integer/float array that is being casted. Previously the casting
+ was allowed even if the result was truncated.
+
Raises
------
ComplexWarning
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index 0f6c11092..b58d8e9fb 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -20,6 +20,18 @@
#include "_datetime.h"
#include "datetime_strings.h"
+
+/*
+ * Required length of string when converting from unsigned integer type.
+ * Array index is integer size in bytes.
+ * - 3 chars needed for cast to max value of 255 or 127
+ * - 5 chars needed for cast to max value of 65535 or 32767
+ * - 10 chars needed for cast to max value of 4294967295 or 2147483647
+ * - 20 chars needed for cast to max value of 18446744073709551615
+ * or 9223372036854775807
+ */
+NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[] = {0, 3, 5, 10, 10, 20, 20, 20, 20};
+
/*NUMPY_API
* For backward compatibility
*
@@ -166,7 +178,7 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
flex_type_num == NPY_VOID) {
(*flex_dtype)->elsize = data_dtype->elsize;
}
- else {
+ else if (flex_type_num == NPY_STRING || flex_type_num == NPY_UNICODE) {
npy_intp size = 8;
/*
@@ -176,37 +188,35 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
*/
switch (data_dtype->type_num) {
case NPY_BOOL:
- size = 8;
- break;
case NPY_UBYTE:
- size = 8;
- break;
case NPY_BYTE:
- size = 8;
- break;
case NPY_USHORT:
- size = 8;
- break;
case NPY_SHORT:
- size = 8;
- break;
case NPY_UINT:
- size = 16;
- break;
case NPY_INT:
- size = 16;
- break;
case NPY_ULONG:
- size = 24;
- break;
case NPY_LONG:
- size = 24;
- break;
case NPY_ULONGLONG:
- size = 24;
- break;
case NPY_LONGLONG:
- size = 24;
+ if (data_dtype->kind == 'b') {
+ /* 5 chars needed for cast to 'True' or 'False' */
+ size = 5;
+ }
+ else if (data_dtype->elsize > 8 ||
+ data_dtype->elsize < 0) {
+ /*
+ * Element size should never be greater than 8 or
+ * less than 0 for integer type, but just in case...
+ */
+ break;
+ }
+ else if (data_dtype->kind == 'u') {
+ size = REQUIRED_STR_LEN[data_dtype->elsize];
+ }
+ else if (data_dtype->kind == 'i') {
+ /* Add character for sign symbol */
+ size = REQUIRED_STR_LEN[data_dtype->elsize] + 1;
+ }
break;
case NPY_HALF:
case NPY_FLOAT:
@@ -307,6 +317,16 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
(*flex_dtype)->elsize = size * 4;
}
}
+ else {
+ /*
+ * We should never get here, but just in case someone adds
+ * a new flex dtype...
+ */
+ PyErr_SetString(PyExc_TypeError,
+ "don't know how to adapt flex dtype");
+ *flex_dtype = NULL;
+ return;
+ }
}
/* Flexible type with generic time unit that adapts */
else if (flex_type_num == NPY_DATETIME ||
@@ -491,10 +511,53 @@ PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to)
NPY_SAFE_CASTING);
}
/*
- * TODO: If to_type_num is STRING or unicode
+ * If to_type_num is STRING or unicode
* see if the length is long enough to hold the
* stringified value of the object.
*/
+ else if (to_type_num == NPY_STRING || to_type_num == NPY_UNICODE) {
+ /*
+ * Boolean value cast to string type is 5 characters max
+ * for string 'False'.
+ */
+ int char_size = 1;
+ if (to_type_num == NPY_UNICODE) {
+ char_size = 4;
+ }
+
+ ret = 0;
+ if (to->elsize == 0) {
+ ret = 1;
+ }
+ /*
+ * Need at least 5 characters to convert from boolean
+ * to 'True' or 'False'.
+ */
+ else if (from->kind == 'b' && to->elsize >= 5 * char_size) {
+ ret = 1;
+ }
+ else if (from->kind == 'u') {
+ /* Guard against unexpected integer size */
+ if (from->elsize > 8 || from->elsize < 0) {
+ ret = 0;
+ }
+ else if (to->elsize >=
+ REQUIRED_STR_LEN[from->elsize] * char_size) {
+ ret = 1;
+ }
+ }
+ else if (from->kind == 'i') {
+ /* Guard against unexpected integer size */
+ if (from->elsize > 8 || from->elsize < 0) {
+ ret = 0;
+ }
+ /* Extra character needed for sign */
+ else if (to->elsize >=
+ (REQUIRED_STR_LEN[from->elsize] + 1) * char_size) {
+ ret = 1;
+ }
+ }
+ }
}
return ret;
}
@@ -1019,12 +1082,25 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
switch (type_num1) {
/* BOOL can convert to anything except datetime/void */
case NPY_BOOL:
- if (type_num2 != NPY_DATETIME && type_num2 != NPY_VOID) {
+ if (type_num2 == NPY_STRING || type_num2 == NPY_UNICODE) {
+ int char_size = 1;
+ if (type_num2 == NPY_UNICODE) {
+ char_size = 4;
+ }
+ if (type2->elsize < 5 * char_size) {
+ PyArray_Descr *ret = NULL;
+ PyArray_Descr *temp = PyArray_DescrNew(type2);
+ ret = ensure_dtype_nbo(temp);
+ ret->elsize = 5 * char_size;
+ Py_DECREF(temp);
+ return ret;
+ }
return ensure_dtype_nbo(type2);
}
- else {
- break;
+ else if (type_num2 != NPY_DATETIME && type_num2 != NPY_VOID) {
+ return ensure_dtype_nbo(type2);
}
+ break;
/* For strings and unicodes, take the larger size */
case NPY_STRING:
if (type_num2 == NPY_STRING) {
@@ -1050,8 +1126,20 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
}
/* Allow NUMBER -> STRING */
else if (PyTypeNum_ISNUMBER(type_num2)) {
- return ensure_dtype_nbo(type1);
+ PyArray_Descr *ret = NULL;
+ PyArray_Descr *temp = PyArray_DescrNew(type1);
+ temp->elsize = 0;
+ PyArray_AdaptFlexibleDType(NULL, type2, &temp);
+ if (temp->elsize > type1->elsize) {
+ ret = ensure_dtype_nbo(temp);
+ }
+ else {
+ ret = ensure_dtype_nbo(type1);
+ }
+ Py_DECREF(temp);
+ return ret;
}
+ break;
case NPY_UNICODE:
if (type_num2 == NPY_UNICODE) {
if (type1->elsize > type2->elsize) {
@@ -1076,7 +1164,18 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
}
/* Allow NUMBER -> UNICODE */
else if (PyTypeNum_ISNUMBER(type_num2)) {
- return ensure_dtype_nbo(type1);
+ PyArray_Descr *ret = NULL;
+ PyArray_Descr *temp = PyArray_DescrNew(type1);
+ temp->elsize = 0;
+ PyArray_AdaptFlexibleDType(NULL, type2, &temp);
+ if (temp->elsize > type1->elsize) {
+ ret = ensure_dtype_nbo(temp);
+ }
+ else {
+ ret = ensure_dtype_nbo(type1);
+ }
+ Py_DECREF(temp);
+ return ret;
}
break;
case NPY_DATETIME:
@@ -1090,22 +1189,58 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
switch (type_num2) {
/* BOOL can convert to almost anything */
case NPY_BOOL:
- if (type_num1 != NPY_DATETIME && type_num1 != NPY_TIMEDELTA &&
+ if (type_num2 == NPY_STRING || type_num2 == NPY_UNICODE) {
+ int char_size = 1;
+ if (type_num2 == NPY_UNICODE) {
+ char_size = 4;
+ }
+ if (type2->elsize < 5 * char_size) {
+ PyArray_Descr *ret = NULL;
+ PyArray_Descr *temp = PyArray_DescrNew(type2);
+ ret = ensure_dtype_nbo(temp);
+ ret->elsize = 5 * char_size;
+ Py_DECREF(temp);
+ return ret;
+ }
+ return ensure_dtype_nbo(type2);
+ }
+ else if (type_num1 != NPY_DATETIME && type_num1 != NPY_TIMEDELTA &&
type_num1 != NPY_VOID) {
return ensure_dtype_nbo(type1);
}
- else {
- break;
- }
+ break;
case NPY_STRING:
/* Allow NUMBER -> STRING */
if (PyTypeNum_ISNUMBER(type_num1)) {
- return ensure_dtype_nbo(type2);
+ PyArray_Descr *ret = NULL;
+ PyArray_Descr *temp = PyArray_DescrNew(type2);
+ temp->elsize = 0;
+ PyArray_AdaptFlexibleDType(NULL, type1, &temp);
+ if (temp->elsize > type2->elsize) {
+ ret = ensure_dtype_nbo(temp);
+ }
+ else {
+ ret = ensure_dtype_nbo(type2);
+ }
+ Py_DECREF(temp);
+ return ret;
}
+ break;
case NPY_UNICODE:
/* Allow NUMBER -> UNICODE */
if (PyTypeNum_ISNUMBER(type_num1)) {
- return ensure_dtype_nbo(type2);
+ PyArray_Descr *ret = NULL;
+ PyArray_Descr *temp = PyArray_DescrNew(type2);
+ temp->elsize = 0;
+ PyArray_AdaptFlexibleDType(NULL, type1, &temp);
+ if (temp->elsize > type2->elsize) {
+ ret = ensure_dtype_nbo(temp);
+ }
+ else {
+ ret = ensure_dtype_nbo(type2);
+ }
+ Py_DECREF(temp);
+ return ret;
}
break;
case NPY_TIMEDELTA:
diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py
index a1a3f896c..2fd6463c7 100644
--- a/numpy/core/tests/test_api.py
+++ b/numpy/core/tests/test_api.py
@@ -278,6 +278,12 @@ def test_array_astype():
b = np.ndarray(buffer=a, dtype='uint32', shape=2)
assert_(b.size == 2)
+ a = np.array([1000], dtype='i4')
+ assert_raises(TypeError, a.astype, 'S1', casting='safe')
+
+ a = np.array(1000, dtype='i4')
+ assert_raises(TypeError, a.astype, 'U1', casting='safe')
+
def test_copyto_fromscalar():
a = np.arange(6, dtype='f4').reshape(2, 3)
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index 12a39a522..2a698d1c2 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -686,10 +686,10 @@ class TestTypes(TestCase):
assert_equal(np.promote_types('<i8', '<i8'), np.dtype('i8'))
assert_equal(np.promote_types('>i8', '>i8'), np.dtype('i8'))
- assert_equal(np.promote_types('>i8', '>U16'), np.dtype('U16'))
- assert_equal(np.promote_types('<i8', '<U16'), np.dtype('U16'))
- assert_equal(np.promote_types('>U16', '>i8'), np.dtype('U16'))
- assert_equal(np.promote_types('<U16', '<i8'), np.dtype('U16'))
+ assert_equal(np.promote_types('>i8', '>U16'), np.dtype('U21'))
+ assert_equal(np.promote_types('<i8', '<U16'), np.dtype('U21'))
+ assert_equal(np.promote_types('>U16', '>i8'), np.dtype('U21'))
+ assert_equal(np.promote_types('<U16', '<i8'), np.dtype('U21'))
assert_equal(np.promote_types('<S5', '<U8'), np.dtype('U8'))
assert_equal(np.promote_types('>S5', '>U8'), np.dtype('U8'))
@@ -703,6 +703,39 @@ class TestTypes(TestCase):
assert_equal(np.promote_types('<m8', '<m8'), np.dtype('m8'))
assert_equal(np.promote_types('>m8', '>m8'), np.dtype('m8'))
+ def test_promote_types_strings(self):
+ assert_equal(np.promote_types('bool', 'S'), np.dtype('S5'))
+ assert_equal(np.promote_types('b', 'S'), np.dtype('S4'))
+ assert_equal(np.promote_types('u1', 'S'), np.dtype('S3'))
+ assert_equal(np.promote_types('u2', 'S'), np.dtype('S5'))
+ assert_equal(np.promote_types('u4', 'S'), np.dtype('S10'))
+ assert_equal(np.promote_types('u8', 'S'), np.dtype('S20'))
+ assert_equal(np.promote_types('i1', 'S'), np.dtype('S4'))
+ assert_equal(np.promote_types('i2', 'S'), np.dtype('S6'))
+ assert_equal(np.promote_types('i4', 'S'), np.dtype('S11'))
+ assert_equal(np.promote_types('i8', 'S'), np.dtype('S21'))
+ assert_equal(np.promote_types('bool', 'U'), np.dtype('U5'))
+ assert_equal(np.promote_types('b', 'U'), np.dtype('U4'))
+ assert_equal(np.promote_types('u1', 'U'), np.dtype('U3'))
+ assert_equal(np.promote_types('u2', 'U'), np.dtype('U5'))
+ assert_equal(np.promote_types('u4', 'U'), np.dtype('U10'))
+ assert_equal(np.promote_types('u8', 'U'), np.dtype('U20'))
+ assert_equal(np.promote_types('i1', 'U'), np.dtype('U4'))
+ assert_equal(np.promote_types('i2', 'U'), np.dtype('U6'))
+ assert_equal(np.promote_types('i4', 'U'), np.dtype('U11'))
+ assert_equal(np.promote_types('i8', 'U'), np.dtype('U21'))
+ assert_equal(np.promote_types('bool', 'S1'), np.dtype('S5'))
+ assert_equal(np.promote_types('bool', 'S30'), np.dtype('S30'))
+ assert_equal(np.promote_types('b', 'S1'), np.dtype('S4'))
+ assert_equal(np.promote_types('b', 'S30'), np.dtype('S30'))
+ assert_equal(np.promote_types('u1', 'S1'), np.dtype('S3'))
+ assert_equal(np.promote_types('u1', 'S30'), np.dtype('S30'))
+ assert_equal(np.promote_types('u2', 'S1'), np.dtype('S5'))
+ assert_equal(np.promote_types('u2', 'S30'), np.dtype('S30'))
+ assert_equal(np.promote_types('u4', 'S1'), np.dtype('S10'))
+ assert_equal(np.promote_types('u4', 'S30'), np.dtype('S30'))
+ assert_equal(np.promote_types('u8', 'S1'), np.dtype('S20'))
+ assert_equal(np.promote_types('u8', 'S30'), np.dtype('S30'))
def test_can_cast(self):
assert_(np.can_cast(np.int32, np.int64))
@@ -711,7 +744,7 @@ class TestTypes(TestCase):
assert_(np.can_cast('i8', 'f8'))
assert_(not np.can_cast('i8', 'f4'))
- assert_(np.can_cast('i4', 'S4'))
+ assert_(np.can_cast('i4', 'S11'))
assert_(np.can_cast('i8', 'i8', 'no'))
assert_(not np.can_cast('<i8', '>i8', 'no'))
@@ -727,6 +760,54 @@ class TestTypes(TestCase):
assert_(np.can_cast('<i8', '>u4', 'unsafe'))
+ assert_(np.can_cast('bool', 'S5'))
+ assert_(not np.can_cast('bool', 'S4'))
+
+ assert_(np.can_cast('b', 'S4'))
+ assert_(not np.can_cast('b', 'S3'))
+
+ assert_(np.can_cast('u1', 'S3'))
+ assert_(not np.can_cast('u1', 'S2'))
+ assert_(np.can_cast('u2', 'S5'))
+ assert_(not np.can_cast('u2', 'S4'))
+ assert_(np.can_cast('u4', 'S10'))
+ assert_(not np.can_cast('u4', 'S9'))
+ assert_(np.can_cast('u8', 'S20'))
+ assert_(not np.can_cast('u8', 'S19'))
+
+ assert_(np.can_cast('i1', 'S4'))
+ assert_(not np.can_cast('i1', 'S3'))
+ assert_(np.can_cast('i2', 'S6'))
+ assert_(not np.can_cast('i2', 'S5'))
+ assert_(np.can_cast('i4', 'S11'))
+ assert_(not np.can_cast('i4', 'S10'))
+ assert_(np.can_cast('i8', 'S21'))
+ assert_(not np.can_cast('i8', 'S20'))
+
+ assert_(np.can_cast('bool', 'S5'))
+ assert_(not np.can_cast('bool', 'S4'))
+
+ assert_(np.can_cast('b', 'U4'))
+ assert_(not np.can_cast('b', 'U3'))
+
+ assert_(np.can_cast('u1', 'U3'))
+ assert_(not np.can_cast('u1', 'U2'))
+ assert_(np.can_cast('u2', 'U5'))
+ assert_(not np.can_cast('u2', 'U4'))
+ assert_(np.can_cast('u4', 'U10'))
+ assert_(not np.can_cast('u4', 'U9'))
+ assert_(np.can_cast('u8', 'U20'))
+ assert_(not np.can_cast('u8', 'U19'))
+
+ assert_(np.can_cast('i1', 'U4'))
+ assert_(not np.can_cast('i1', 'U3'))
+ assert_(np.can_cast('i2', 'U6'))
+ assert_(not np.can_cast('i2', 'U5'))
+ assert_(np.can_cast('i4', 'U11'))
+ assert_(not np.can_cast('i4', 'U10'))
+ assert_(np.can_cast('i8', 'U21'))
+ assert_(not np.can_cast('i8', 'U20'))
+
assert_raises(TypeError, np.can_cast, 'i4', None)
assert_raises(TypeError, np.can_cast, None, 'i4')
diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py
index e94e964b2..f1f5311c9 100644
--- a/numpy/core/tests/test_shape_base.py
+++ b/numpy/core/tests/test_shape_base.py
@@ -183,7 +183,7 @@ def test_concatenate_axis_None():
assert_equal(r.size, a.size + len(b))
assert_equal(r.dtype, a.dtype)
r = np.concatenate((a, b, c), axis=None)
- d = array(['0', '1', '2', '3',
+ d = array(['0.0', '1.0', '2.0', '3.0',
'0', '1', '2', 'x'])
assert_array_equal(r, d)