diff options
author | Sebastian Berg <sebastian@sipsolutions.net> | 2020-06-19 08:46:05 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-06-19 07:46:05 -0600 |
commit | f253a7e39204142e3cf82d6beeef1ce22f5500cd (patch) | |
tree | e51be0deadd95abe0170066b8c43cdf69acf57cd /numpy | |
parent | e8c78e09ab38b80b586108dfb4151cbe70c7a694 (diff) | |
download | numpy-f253a7e39204142e3cf82d6beeef1ce22f5500cd.tar.gz |
TST: Add new tests for array coercion (#16571)
* ENH: Add traceback-skip to `assert_array_compare`
* TST: Add tests for arraycoercion
These tests have many xfails (some technically maybe not correct),
which describe in detail what will change when merging the
array-coercion changes; since all of those xfails are going
to be removed.
* TST: Add test for empty sequences
* TST: Add tests for bad self-mutating sequence inputs to np.array
These should never happen and rightly should lead to undefined
behaviour (and preferably errors), but they excercise some more
tricky code branches and should probably not crash.
* Simplify the "all scalars" logic (hardcode) and fix complex cases
* MAINT: Some cleanup, and xfail pypy
PyPy seems to have issues with int(numpy_complex), maybe because
it gives a warning during conversion (python does not define it).
So simply mark it as xfail, it should work in my branch.
Also some smaller cleanups.
* TST: Add some further test, re-add missing rational
* MAINT: Make rationals optional, because they fail some tests currently
The failures will go away, since this must again be related to
implemetning int() and float().
* Update numpy/conftest.py
* MAINT: Fix strings and use pytest.param to clean things up
* TST: Add tests for 0-D array-like input to np.array() corner-cases
* TST: Improve parameterization IDs to get better printing at -vv
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/tests/test_array_coercion.py | 577 | ||||
-rw-r--r-- | numpy/core/tests/test_indexing.py | 14 | ||||
-rw-r--r-- | numpy/testing/_private/utils.py | 2 |
3 files changed, 593 insertions, 0 deletions
diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py new file mode 100644 index 000000000..b8d4b5cdf --- /dev/null +++ b/numpy/core/tests/test_array_coercion.py @@ -0,0 +1,577 @@ +""" +Tests for array coercion, mainly through testing `np.array` results directly. +Note that other such tests exist e.g. in `test_api.py` and many corner-cases +are tested (sometimes indirectly) elsewhere. +""" + +import pytest +from pytest import param + +from itertools import product + +import numpy as np +from numpy.core._rational_tests import rational + +from numpy.testing import ( + assert_array_equal, assert_warns, IS_PYPY) + + +def arraylikes(): + """ + Generator for functions converting an array into various array-likes. + If full is True (default) includes array-likes not capable of handling + all dtypes + """ + # base array: + def ndarray(a): + return a + + yield param(ndarray, id="ndarray") + + # subclass: + class MyArr(np.ndarray): + pass + + def subclass(a): + return a.view(MyArr) + + yield subclass + + # Array-interface + class ArrayDunder: + def __init__(self, a): + self.a = a + + def __array__(self, dtype=None): + return self.a + + yield param(ArrayDunder, id="__array__") + + # memory-view + yield param(memoryview, id="memoryview") + + # Array-interface + class ArrayInterface: + def __init__(self, a): + self.a = a # need to hold on to keep interface valid + self.__array_interface__ = a.__array_interface__ + + yield param(ArrayInterface, id="__array_interface__") + + # Array-Struct + class ArrayStruct: + def __init__(self, a): + self.a = a # need to hold on to keep struct valid + self.__array_struct__ = a.__array_struct__ + + yield param(ArrayStruct, id="__array_struct__") + + +def scalar_instances(times=True, extended_precision=True, user_dtype=True): + # Hard-coded list of scalar instances. + # Floats: + yield param(np.sqrt(np.float16(5)), id="float16") + yield param(np.sqrt(np.float32(5)), id="float32") + yield param(np.sqrt(np.float64(5)), id="float64") + if extended_precision: + yield param(np.sqrt(np.longdouble(5)), id="longdouble") + + # Complex: + yield param(np.sqrt(np.complex64(2+3j)), id="complex64") + yield param(np.sqrt(np.complex128(2+3j)), id="complex128") + if extended_precision: + yield param(np.sqrt(np.longcomplex(2+3j)), id="clongdouble") + + # Bool: + # XFAIL: Bool should be added, but has some bad properties when it + # comes to strings, see also gh-9875 + # yield param(np.bool_(0), id="bool") + + # Integers: + yield param(np.int8(2), id="int8") + yield param(np.int16(2), id="int16") + yield param(np.int32(2), id="int32") + yield param(np.int64(2), id="int64") + + yield param(np.uint8(2), id="uint8") + yield param(np.uint16(2), id="uint16") + yield param(np.uint32(2), id="uint32") + yield param(np.uint64(2), id="uint64") + + # Rational: + if user_dtype: + yield param(rational(1, 2), id="rational") + + # Cannot create a structured void scalar directly: + structured = np.array([(1, 3)], "i,i")[0] + assert isinstance(structured, np.void) + assert structured.dtype == np.dtype("i,i") + yield param(structured, id="structured") + + if times: + # Datetimes and timedelta + yield param(np.timedelta64(2), id="timedelta64[generic]") + yield param(np.timedelta64(23, "s"), id="timedelta64[s]") + yield param(np.timedelta64("NaT", "s"), id="timedelta64[s](NaT)") + + yield param(np.datetime64("NaT"), id="datetime64[generic](NaT)") + yield param(np.datetime64("2020-06-07 12:43", "ms"), id="datetime64[ms]") + + # Strings and unstructured void: + yield param(np.bytes_(b"1234"), id="bytes") + yield param(np.unicode_("2345"), id="unicode") + yield param(np.void(b"4321"), id="unstructured_void") + + +def is_parametric_dtype(dtype): + """Returns True if the the dtype is a parametric legacy dtype (itemsize + is 0, or a datetime without units) + """ + if dtype.itemsize == 0: + return True + if issubclass(dtype.type, (np.datetime64, np.timedelta64)): + if dtype.name.endswith("64"): + # Generic time units + return True + return False + + +class TestStringDiscovery: + @pytest.mark.parametrize("obj", + [object(), 1.2, 10**43, None, "string"], + ids=["object", "1.2", "10**43", "None", "string"]) + def test_basic_stringlength(self, obj): + if not isinstance(obj, (str, int)): + pytest.xfail( + "The Single object (first assert) uses a different branch " + "and thus gives a different result (either wrong or longer" + "string than normally discovered).") + + length = len(str(obj)) + expected = np.dtype(f"S{length}") + + assert np.array(obj, dtype="S").dtype == expected + assert np.array([obj], dtype="S").dtype == expected + + # A nested array is also discovered correctly + arr = np.array(obj, dtype="O") + assert np.array(arr, dtype="S").dtype == expected + + @pytest.mark.xfail(reason="Only single array unpacking is supported") + @pytest.mark.parametrize("obj", + [object(), 1.2, 10**43, None, "string"], + ids=["object", "1.2", "10**43", "None", "string"]) + def test_nested_arrays_stringlength(self, obj): + length = len(str(obj)) + expected = np.dtype(f"S{length}") + arr = np.array(obj, dtype="O") + assert np.array([arr, arr], dtype="S").dtype == expected + + @pytest.mark.xfail(reason="Only single array unpacking is supported") + @pytest.mark.parametrize("arraylike", arraylikes()) + def test_unpack_first_level(self, arraylike): + # We unpack exactly one level of array likes + obj = np.array([None]) + obj[0] = np.array(1.2) + # the length of the included item, not of the float dtype + length = len(str(obj[0])) + expected = np.dtype(f"S{length}") + + obj = arraylike(obj) + # casting to string usually calls str(obj) + arr = np.array([obj], dtype="S") + assert arr.shape == (1, 1) + assert arr.dtype == expected + + +class TestScalarDiscovery: + def test_void_special_case(self): + # Void dtypes with structures discover tuples as elements + arr = np.array((1, 2, 3), dtype="i,i,i") + assert arr.shape == () + arr = np.array([(1, 2, 3)], dtype="i,i,i") + assert arr.shape == (1,) + + def test_char_special_case(self): + arr = np.array("string", dtype="c") + assert arr.shape == (6,) + assert arr.dtype.char == "c" + arr = np.array(["string"], dtype="c") + assert arr.shape == (1, 6) + assert arr.dtype.char == "c" + + def test_char_special_case_deep(self): + # Check that the character special case errors correctly if the + # array is too deep: + nested = ["string"] # 2 dimensions (due to string being sequence) + for i in range(np.MAXDIMS - 2): + nested = [nested] + + arr = np.array(nested, dtype='c') + assert arr.shape == (1,) * (np.MAXDIMS - 1) + (6,) + with pytest.raises(ValueError): + np.array([nested], dtype="c") + + def test_unknown_object(self): + arr = np.array(object()) + assert arr.shape == () + assert arr.dtype == np.dtype("O") + + @pytest.mark.parametrize("scalar", scalar_instances()) + def test_scalar(self, scalar): + arr = np.array(scalar) + assert arr.shape == () + assert arr.dtype == scalar.dtype + + if type(scalar) is np.bytes_: + pytest.xfail("Nested bytes use len(str(scalar)) currently.") + + arr = np.array([[scalar, scalar]]) + assert arr.shape == (1, 2) + assert arr.dtype == scalar.dtype + + # Additionally to string this test also runs into a corner case + # with datetime promotion (the difference is the promotion order). + @pytest.mark.xfail(reason="Coercion to string is not symmetric") + def test_scalar_promotion(self): + for sc1, sc2 in product(scalar_instances(), scalar_instances()): + sc1, sc2 = sc1.values[0], sc2.values[0] + # test all combinations: + arr = np.array([sc1, sc2]) + assert arr.shape == (2,) + try: + dt1, dt2 = sc1.dtype, sc2.dtype + expected_dtype = np.promote_types(dt1, dt2) + assert arr.dtype == expected_dtype + except TypeError as e: + # Will currently always go to object dtype + assert arr.dtype == np.dtype("O") + + @pytest.mark.parametrize("scalar", scalar_instances()) + def test_scalar_coercion(self, scalar): + # This tests various scalar coercion paths, mainly for the numerical + # types. It includes some paths not directly related to `np.array` + if isinstance(scalar, np.inexact): + # Ensure we have a full-precision number if available + scalar = type(scalar)((scalar * 2)**0.5) + + if is_parametric_dtype(scalar.dtype) or type(scalar) is rational: + # datetime with unit will be named "datetime64[unit]" + # Rational generally fails due to a missing cast. In the future + # object casts should automatically be defined based on `setitem`. + pytest.xfail("0-D object array to a unit-less datetime cast fails") + + # Use casting from object: + arr = np.array(scalar, dtype=object).astype(scalar.dtype) + + # Test various ways to create an array containing this scalar: + arr1 = np.array(scalar).reshape(1) + arr2 = np.array([scalar]) + arr3 = np.empty(1, dtype=scalar.dtype) + arr3[0] = scalar + arr4 = np.empty(1, dtype=scalar.dtype) + arr4[:] = [scalar] + # All of these methods should yield the same results + assert_array_equal(arr, arr1) + assert_array_equal(arr, arr2) + assert_array_equal(arr, arr3) + assert_array_equal(arr, arr4) + + @pytest.mark.xfail(IS_PYPY, reason="`int(np.complex128(3))` fails on PyPy") + @pytest.mark.filterwarnings("ignore::numpy.ComplexWarning") + # After change, can enable times here, and below and it will work, + # Right now times are too complex, so map out some details below. + @pytest.mark.parametrize("cast_to", scalar_instances(times=False)) + def test_scalar_coercion_same_as_cast_and_assignment(self, cast_to): + """ + Test that in most cases: + * `np.array(scalar, dtype=dtype)` + * `np.empty((), dtype=dtype)[()] = scalar` + * `np.array(scalar).astype(dtype)` + should behave the same. The only exceptions are paramteric dtypes + (mainly datetime/timedelta without unit) and void without fields. + """ + dtype = cast_to.dtype # use to parametrize only the target dtype + + # XFAIL: Some extended precision tests fail, because assigning to + # complex256 will use float(float128). Rational fails currently. + for scalar in scalar_instances( + times=False, extended_precision=False, user_dtype=False): + scalar = scalar.values[0] + + if dtype.type == np.void: + if scalar.dtype.fields is not None and dtype.fields is None: + # Here, coercion to "V6" works, but the cast fails. + # Since the types are identical, SETITEM takes care of + # this, but has different rules than the cast. + with pytest.raises(TypeError): + np.array(scalar).astype(dtype) + # XFAIL: np.array(scalar, dtype=dtype) + np.array([scalar], dtype=dtype) + continue + + # The main test, we first try to use casting and if it succeeds + # continue below testing that things are the same, otherwise + # test that the alternative paths at least also fail. + try: + cast = np.array(scalar).astype(dtype) + except (TypeError, ValueError, RuntimeError): + # coercion should also raise (error type may change) + with pytest.raises(Exception): + np.array(scalar, dtype=dtype) + # assignment should also raise + res = np.zeros((), dtype=dtype) + with pytest.raises(Exception): + res[()] = scalar + + return + + # Non error path: + arr = np.array(scalar, dtype=dtype) + assert_array_equal(arr, cast) + # assignment behaves the same + ass = np.zeros((), dtype=dtype) + ass[()] = scalar + assert_array_equal(ass, cast) + + +class TestTimeScalars: + @pytest.mark.parametrize("dtype", [np.int64, np.float32]) + @pytest.mark.parametrize("scalar", + [param(np.timedelta64("NaT", "s"), id="timedelta64[s](NaT)"), + param(np.timedelta64(123, "s"), id="timedelta64[s]"), + param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"), + param(np.datetime64(1, "D"), id="datetime64[D]")],) + @pytest.mark.xfail( + reason="This uses int(scalar) or float(scalar) to assign, which " + "fails. However, casting currently does not fail.") + def test_coercion_basic(self, dtype, scalar): + arr = np.array(scalar, dtype=dtype) + cast = np.array(scalar).astype(dtype) + ass = np.ones((), dtype=dtype) + ass[()] = scalar # raises, as would np.array([scalar], dtype=dtype) + + assert_array_equal(arr, cast) + assert_array_equal(cast, cast) + + @pytest.mark.parametrize("dtype", [np.int64, np.float32]) + @pytest.mark.parametrize("scalar", + [param(np.timedelta64(123, "ns"), id="timedelta64[ns]"), + param(np.timedelta64(12, "generic"), id="timedelta64[generic]")]) + def test_coercion_timedelta_convert_to_number(self, dtype, scalar): + # Only "ns" and "generic" timedeltas can be converted to numbers + # so these are slightly special. + arr = np.array(scalar, dtype=dtype) + cast = np.array(scalar).astype(dtype) + ass = np.ones((), dtype=dtype) + ass[()] = scalar # raises, as would np.array([scalar], dtype=dtype) + + assert_array_equal(arr, cast) + assert_array_equal(cast, cast) + + @pytest.mark.parametrize(["val", "unit"], + [param(123, "s", id="[s]"), param(123, "D", id="[D]")]) + @pytest.mark.parametrize("scalar_type", [np.datetime64, np.timedelta64]) + @pytest.mark.xfail(reason="Error not raised for assignment") + def test_coercion_assignment_times(self, scalar_type, val, unit): + scalar = scalar_type(val, unit) + + # The error type is not ideal, fails because string is too short: + with pytest.raises(RuntimeError): + np.array(scalar, dtype="S6") + with pytest.raises(RuntimeError): + cast = np.array(scalar).astype("S6") + ass = np.ones((), dtype="S6") + with pytest.raises(RuntimeError): + ass[()] = scalar + + +class TestNested: + @pytest.mark.xfail(reason="No deprecation warning given.") + def test_nested_simple(self): + initial = [1.2] + nested = initial + for i in range(np.MAXDIMS - 1): + nested = [nested] + + arr = np.array(nested, dtype="float64") + assert arr.shape == (1,) * np.MAXDIMS + with pytest.raises(ValueError): + np.array([nested], dtype="float64") + + # We discover object automatically at this time: + with assert_warns(np.VisibleDeprecationWarning): + arr = np.array([nested]) + assert arr.dtype == np.dtype("O") + assert arr.shape == (1,) * np.MAXDIMS + assert arr.item() is initial + + def test_pathological_self_containing(self): + # Test that this also works for two nested sequences + l = [] + l.append(l) + arr = np.array([l, l, l], dtype=object) + assert arr.shape == (3,) + (1,) * (np.MAXDIMS - 1) + + # Also check a ragged case: + arr = np.array([l, [None], l], dtype=object) + assert arr.shape == (3, 1) + + @pytest.mark.xfail( + reason="For arrays and memoryview, this used to not complain " + "and assign to a too small array instead. For other " + "array-likes the error is different because fewer (only " + "MAXDIM-1) dimensions are found, failing the last test.") + @pytest.mark.parametrize("arraylike", arraylikes()) + def test_nested_arraylikes(self, arraylike): + # We try storing an array like into an array, but the array-like + # will have too many dimensions. This means the shape discovery + # decides that the array-like must be treated as an object (a special + # case of ragged discovery). The result will be an array with one + # dimension less than the maximum dimensions, and the array being + # assigned to it (which does work for object or if `float(arraylike)` + # works). + initial = arraylike(np.ones((1, 1))) + #if not isinstance(initial, (np.ndarray, memoryview)): + # pytest.xfail( + # "When coercing to object, these cases currently discover " + # "fewer dimensions than ndarray failing the second part.") + + nested = initial + for i in range(np.MAXDIMS - 1): + nested = [nested] + + with pytest.raises(ValueError): + # It will refuse to assign the array into + np.array(nested, dtype="float64") + + # If this is object, we end up assigning a (1, 1) array into (1,) + # (due to running out of dimensions), this is currently supported but + # a special case which is not ideal. + arr = np.array(nested, dtype=object) + assert arr.shape == (1,) * np.MAXDIMS + assert arr.item() == np.array(initial).item() + + @pytest.mark.parametrize("arraylike", arraylikes()) + def test_uneven_depth_ragged(self, arraylike): + arr = np.arange(4).reshape((2, 2)) + arr = arraylike(arr) + + # Array is ragged in the second dimension already: + out = np.array([arr, [arr]], dtype=object) + assert out.shape == (2,) + assert out[0] is arr + assert type(out[1]) is list + + if not isinstance(arr, (np.ndarray, memoryview)): + pytest.xfail( + "does not raise ValueError below, because it discovers " + "the dimension as (2,) and not (2, 2, 2)") + + # Array is ragged in the third dimension: + with pytest.raises(ValueError): + # This is a broadcast error during assignment, because + # the array shape would be (2, 2, 2) but `arr[0, 0] = arr` fails. + np.array([arr, [arr, arr]], dtype=object) + + def test_empty_sequence(self): + arr = np.array([[], [1], [[1]]], dtype=object) + assert arr.shape == (3,) + + # The empty sequence stops further dimension discovery, so the + # result shape will be (0,) which leads to an error during: + with pytest.raises(ValueError): + np.array([[], np.empty((0, 1))], dtype=object) + + +class TestBadSequences: + # These are tests for bad objects passed into `np.array`, in general + # these have undefined behaviour. In the old code they partially worked + # when now they will fail. We could (and maybe should) create a copy + # of all sequences to be safe against bad-actors. + + def test_growing_list(self): + # List to coerce, `mylist` will append to it during coercion + obj = [] + class mylist(list): + def __len__(self): + obj.append([1, 2]) + return super().__len__() + + obj.append(mylist([1, 2])) + + with pytest.raises(ValueError): # changes to RuntimeError + np.array(obj) + + # Note: We do not test a shrinking list. These do very evil things + # and the only way to fix them would be to copy all sequences. + # (which may be a real option in the future). + + def test_mutated_list(self): + # List to coerce, `mylist` will mutate the first element + obj = [] + class mylist(list): + def __len__(self): + obj[0] = [2, 3] # replace with a different list. + return super().__len__() + + obj.append([2, 3]) + obj.append(mylist([1, 2])) + #with pytest.raises(RuntimeError): # Will error in the future + np.array(obj) + + def test_replace_0d_array(self): + # List to coerce, `mylist` will mutate the first element + obj = [] + class baditem: + def __len__(self): + obj[0][0] = 2 # replace with a different list. + raise ValueError("not actually a sequence!") + + def __getitem__(self): + pass + + # Runs into a corner case in the new code, the `array(2)` is cached + # so replacing it invalidates the cache. + obj.append([np.array(2), baditem()]) + # with pytest.raises(RuntimeError): # Will error in the future + np.array(obj) + + +class TestArrayLikes: + @pytest.mark.parametrize("arraylike", arraylikes()) + def test_0d_object_special_case(self, arraylike): + arr = np.array(0.) + obj = arraylike(arr) + # A single array-like is always converted: + res = np.array(obj, dtype=object) + assert_array_equal(arr, res) + + # But a single 0-D nested array-like never: + res = np.array([obj], dtype=object) + assert res[0] is obj + + def test_0d_generic_special_case(self): + class ArraySubclass(np.ndarray): + def __float__(self): + raise TypeError("e.g. quantities raise on this") + + arr = np.array(0.) + obj = arr.view(ArraySubclass) + res = np.array(obj) + # The subclass is simply cast: + assert_array_equal(arr, res) + + # If the 0-D array-like is included, __float__ is currently + # guaranteed to be used. We may want to change that, quantities + # and masked arrays half make use of this. + with pytest.raises(TypeError): + np.array([obj]) + + # The same holds for memoryview: + obj = memoryview(arr) + res = np.array(obj) + assert_array_equal(arr, res) + with pytest.raises(ValueError): + # The error type does not matter much here. + np.array([obj]) diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py index 4bb5cb11a..f6e263774 100644 --- a/numpy/core/tests/test_indexing.py +++ b/numpy/core/tests/test_indexing.py @@ -370,6 +370,20 @@ class TestIndexing: a[...] = s assert_((a == 1).all()) + def test_array_like_values(self): + # Similar to the above test, but use a memoryview instead + a = np.zeros((5, 5)) + s = np.arange(25, dtype=np.float64).reshape(5, 5) + + a[[0, 1, 2, 3, 4], :] = memoryview(s) + assert_array_equal(a, s) + + a[:, [0, 1, 2, 3, 4]] = memoryview(s) + assert_array_equal(a, s) + + a[...] = memoryview(s) + assert_array_equal(a, s) + def test_subclass_writeable(self): d = np.rec.array([('NGC1001', 11), ('NGC1002', 1.), ('NGC1003', 1.)], dtype=[('target', 'S20'), ('V_mag', '>f4')]) diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py index ef623255b..3827b7505 100644 --- a/numpy/testing/_private/utils.py +++ b/numpy/testing/_private/utils.py @@ -719,6 +719,8 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True, at the same locations. """ + __tracebackhide__ = True # Hide traceback for py.test + x_id = func(x) y_id = func(y) # We include work-arounds here to handle three types of slightly |