3 files changed, 593 insertions, 0 deletions
diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
new file mode 100644
index 000000000..b8d4b5cdf
--- /dev/null
+++ b/numpy/core/tests/test_array_coercion.py
@@ -0,0 +1,577 @@
+"""
+Tests for array coercion, mainly through testing `np.array` results directly.
+Note that other such tests exist e.g. in `test_api.py` and many corner-cases
+are tested (sometimes indirectly) elsewhere.
+"""
+
+import pytest
+from pytest import param
+
+from itertools import product
+
+import numpy as np
+from numpy.core._rational_tests import rational
+
+from numpy.testing import (
+    assert_array_equal, assert_warns, IS_PYPY)
+
+
+def arraylikes():
+    """
+    Generator for functions converting an array into various array-likes.
+    If full is True (default) includes array-likes not capable of handling
+    all dtypes
+    """
+    # base array:
+    def ndarray(a):
+        return a
+
+    yield param(ndarray, id="ndarray")
+
+    # subclass:
+    class MyArr(np.ndarray):
+        pass
+
+    def subclass(a):
+        return a.view(MyArr)
+
+    yield subclass
+
+    # Array-interface
+    class ArrayDunder:
+        def __init__(self, a):
+            self.a = a
+
+        def __array__(self, dtype=None):
+            return self.a
+
+    yield param(ArrayDunder, id="__array__")
+
+    # memory-view
+    yield param(memoryview, id="memoryview")
+
+    # Array-interface
+    class ArrayInterface:
+        def __init__(self, a):
+            self.a = a  # need to hold on to keep interface valid
+            self.__array_interface__ = a.__array_interface__
+
+    yield param(ArrayInterface, id="__array_interface__")
+
+    # Array-Struct
+    class ArrayStruct:
+        def __init__(self, a):
+            self.a = a  # need to hold on to keep struct valid
+            self.__array_struct__ = a.__array_struct__
+
+    yield param(ArrayStruct, id="__array_struct__")
+
+
+def scalar_instances(times=True, extended_precision=True, user_dtype=True):
+    # Hard-coded list of scalar instances.
+    # Floats:
+    yield param(np.sqrt(np.float16(5)), id="float16")
+    yield param(np.sqrt(np.float32(5)), id="float32")
+    yield param(np.sqrt(np.float64(5)), id="float64")
+    if extended_precision:
+        yield param(np.sqrt(np.longdouble(5)), id="longdouble")
+
+    # Complex:
+    yield param(np.sqrt(np.complex64(2+3j)), id="complex64")
+    yield param(np.sqrt(np.complex128(2+3j)), id="complex128")
+    if extended_precision:
+        yield param(np.sqrt(np.longcomplex(2+3j)), id="clongdouble")
+
+    # Bool:
+    # XFAIL: Bool should be added, but has some bad properties when it
+    # comes to strings, see also gh-9875
+    # yield param(np.bool_(0), id="bool")
+
+    # Integers:
+    yield param(np.int8(2), id="int8")
+    yield param(np.int16(2), id="int16")
+    yield param(np.int32(2), id="int32")
+    yield param(np.int64(2), id="int64")
+
+    yield param(np.uint8(2), id="uint8")
+    yield param(np.uint16(2), id="uint16")
+    yield param(np.uint32(2), id="uint32")
+    yield param(np.uint64(2), id="uint64")
+
+    # Rational:
+    if user_dtype:
+        yield param(rational(1, 2), id="rational")
+
+    # Cannot create a structured void scalar directly:
+    structured = np.array([(1, 3)], "i,i")[0]
+    assert isinstance(structured, np.void)
+    assert structured.dtype == np.dtype("i,i")
+    yield param(structured, id="structured")
+
+    if times:
+        # Datetimes and timedelta
+        yield param(np.timedelta64(2), id="timedelta64[generic]")
+        yield param(np.timedelta64(23, "s"), id="timedelta64[s]")
+        yield param(np.timedelta64("NaT", "s"), id="timedelta64[s](NaT)")
+
+        yield param(np.datetime64("NaT"), id="datetime64[generic](NaT)")
+        yield param(np.datetime64("2020-06-07 12:43", "ms"), id="datetime64[ms]")
+
+    # Strings and unstructured void:
+    yield param(np.bytes_(b"1234"), id="bytes")
+    yield param(np.unicode_("2345"), id="unicode")
+    yield param(np.void(b"4321"), id="unstructured_void")
+
+
+def is_parametric_dtype(dtype):
+    """Returns True if the the dtype is a parametric legacy dtype (itemsize
+    is 0, or a datetime without units)
+    """
+    if dtype.itemsize == 0:
+        return True
+    if issubclass(dtype.type, (np.datetime64, np.timedelta64)):
+        if dtype.name.endswith("64"):
+            # Generic time units
+            return True
+    return False
+
+
+class TestStringDiscovery:
+    @pytest.mark.parametrize("obj",
+            [object(), 1.2, 10**43, None, "string"],
+            ids=["object", "1.2", "10**43", "None", "string"])
+    def test_basic_stringlength(self, obj):
+        if not isinstance(obj, (str, int)):
+            pytest.xfail(
+                "The Single object (first assert) uses a different branch "
+                "and thus gives a different result (either wrong or longer"
+                "string than normally discovered).")
+
+        length = len(str(obj))
+        expected = np.dtype(f"S{length}")
+
+        assert np.array(obj, dtype="S").dtype == expected
+        assert np.array([obj], dtype="S").dtype == expected
+
+        # A nested array is also discovered correctly
+        arr = np.array(obj, dtype="O")
+        assert np.array(arr, dtype="S").dtype == expected
+
+    @pytest.mark.xfail(reason="Only single array unpacking is supported")
+    @pytest.mark.parametrize("obj",
+            [object(), 1.2, 10**43, None, "string"],
+            ids=["object", "1.2", "10**43", "None", "string"])
+    def test_nested_arrays_stringlength(self, obj):
+        length = len(str(obj))
+        expected = np.dtype(f"S{length}")
+        arr = np.array(obj, dtype="O")
+        assert np.array([arr, arr], dtype="S").dtype == expected
+
+    @pytest.mark.xfail(reason="Only single array unpacking is supported")
+    @pytest.mark.parametrize("arraylike", arraylikes())
+    def test_unpack_first_level(self, arraylike):
+        # We unpack exactly one level of array likes
+        obj = np.array([None])
+        obj[0] = np.array(1.2)
+        # the length of the included item, not of the float dtype
+        length = len(str(obj[0]))
+        expected = np.dtype(f"S{length}")
+
+        obj = arraylike(obj)
+        # casting to string usually calls str(obj)
+        arr = np.array([obj], dtype="S")
+        assert arr.shape == (1, 1)
+        assert arr.dtype == expected
+
+
+class TestScalarDiscovery:
+    def test_void_special_case(self):
+        # Void dtypes with structures discover tuples as elements
+        arr = np.array((1, 2, 3), dtype="i,i,i")
+        assert arr.shape == ()
+        arr = np.array([(1, 2, 3)], dtype="i,i,i")
+        assert arr.shape == (1,)
+
+    def test_char_special_case(self):
+        arr = np.array("string", dtype="c")
+        assert arr.shape == (6,)
+        assert arr.dtype.char == "c"
+        arr = np.array(["string"], dtype="c")
+        assert arr.shape == (1, 6)
+        assert arr.dtype.char == "c"
+
+    def test_char_special_case_deep(self):
+        # Check that the character special case errors correctly if the
+        # array is too deep:
+        nested = ["string"]  # 2 dimensions (due to string being sequence)
+        for i in range(np.MAXDIMS - 2):
+            nested = [nested]
+
+        arr = np.array(nested, dtype='c')
+        assert arr.shape == (1,) * (np.MAXDIMS - 1) + (6,)
+        with pytest.raises(ValueError):
+            np.array([nested], dtype="c")
+
+    def test_unknown_object(self):
+        arr = np.array(object())
+        assert arr.shape == ()
+        assert arr.dtype == np.dtype("O")
+
+    @pytest.mark.parametrize("scalar", scalar_instances())
+    def test_scalar(self, scalar):
+        arr = np.array(scalar)
+        assert arr.shape == ()
+        assert arr.dtype == scalar.dtype
+
+        if type(scalar) is np.bytes_:
+            pytest.xfail("Nested bytes use len(str(scalar)) currently.")
+
+        arr = np.array([[scalar, scalar]])
+        assert arr.shape == (1, 2)
+        assert arr.dtype == scalar.dtype
+
+    # Additionally to string this test also runs into a corner case
+    # with datetime promotion (the difference is the promotion order).
+    @pytest.mark.xfail(reason="Coercion to string is not symmetric")
+    def test_scalar_promotion(self):
+        for sc1, sc2 in product(scalar_instances(), scalar_instances()):
+            sc1, sc2 = sc1.values[0], sc2.values[0]
+            # test all combinations:
+            arr = np.array([sc1, sc2])
+            assert arr.shape == (2,)
+            try:
+                dt1, dt2 = sc1.dtype, sc2.dtype
+                expected_dtype = np.promote_types(dt1, dt2)
+                assert arr.dtype == expected_dtype
+            except TypeError as e:
+                # Will currently always go to object dtype
+                assert arr.dtype == np.dtype("O")
+
+    @pytest.mark.parametrize("scalar", scalar_instances())
+    def test_scalar_coercion(self, scalar):
+        # This tests various scalar coercion paths, mainly for the numerical
+        # types.  It includes some paths not directly related to `np.array`
+        if isinstance(scalar, np.inexact):
+            # Ensure we have a full-precision number if available
+            scalar = type(scalar)((scalar * 2)**0.5)
+
+        if is_parametric_dtype(scalar.dtype) or type(scalar) is rational:
+            # datetime with unit will be named "datetime64[unit]"
+            # Rational generally fails due to a missing cast. In the future
+            # object casts should automatically be defined based on `setitem`.
+            pytest.xfail("0-D object array to a unit-less datetime cast fails")
+
+        # Use casting from object:
+        arr = np.array(scalar, dtype=object).astype(scalar.dtype)
+
+        # Test various ways to create an array containing this scalar:
+        arr1 = np.array(scalar).reshape(1)
+        arr2 = np.array([scalar])
+        arr3 = np.empty(1, dtype=scalar.dtype)
+        arr3[0] = scalar
+        arr4 = np.empty(1, dtype=scalar.dtype)
+        arr4[:] = [scalar]
+        # All of these methods should yield the same results
+        assert_array_equal(arr, arr1)
+        assert_array_equal(arr, arr2)
+        assert_array_equal(arr, arr3)
+        assert_array_equal(arr, arr4)
+
+    @pytest.mark.xfail(IS_PYPY, reason="`int(np.complex128(3))` fails on PyPy")
+    @pytest.mark.filterwarnings("ignore::numpy.ComplexWarning")
+    # After change, can enable times here, and below and it will work,
+    # Right now times are too complex, so map out some details below.
+    @pytest.mark.parametrize("cast_to", scalar_instances(times=False))
+    def test_scalar_coercion_same_as_cast_and_assignment(self, cast_to):
+        """
+        Test that in most cases:
+           * `np.array(scalar, dtype=dtype)`
+           * `np.empty((), dtype=dtype)[()] = scalar`
+           * `np.array(scalar).astype(dtype)`
+        should behave the same.  The only exceptions are paramteric dtypes
+        (mainly datetime/timedelta without unit) and void without fields.
+        """
+        dtype = cast_to.dtype  # use to parametrize only the target dtype
+
+        # XFAIL: Some extended precision tests fail, because assigning to
+        #        complex256 will use float(float128). Rational fails currently.
+        for scalar in scalar_instances(
+                times=False, extended_precision=False, user_dtype=False):
+            scalar = scalar.values[0]
+
+            if dtype.type == np.void:
+               if scalar.dtype.fields is not None and dtype.fields is None:
+                    # Here, coercion to "V6" works, but the cast fails.
+                    # Since the types are identical, SETITEM takes care of
+                    # this, but has different rules than the cast.
+                    with pytest.raises(TypeError):
+                        np.array(scalar).astype(dtype)
+                    # XFAIL: np.array(scalar, dtype=dtype)
+                    np.array([scalar], dtype=dtype)
+                    continue
+
+            # The main test, we first try to use casting and if it succeeds
+            # continue below testing that things are the same, otherwise
+            # test that the alternative paths at least also fail.
+            try:
+                cast = np.array(scalar).astype(dtype)
+            except (TypeError, ValueError, RuntimeError):
+                # coercion should also raise (error type may change)
+                with pytest.raises(Exception):
+                    np.array(scalar, dtype=dtype)
+                # assignment should also raise
+                res = np.zeros((), dtype=dtype)
+                with pytest.raises(Exception):
+                    res[()] = scalar
+
+                return
+
+            # Non error path:
+            arr = np.array(scalar, dtype=dtype)
+            assert_array_equal(arr, cast)
+            # assignment behaves the same
+            ass = np.zeros((), dtype=dtype)
+            ass[()] = scalar
+            assert_array_equal(ass, cast)
+
+
+class TestTimeScalars:
+    @pytest.mark.parametrize("dtype", [np.int64, np.float32])
+    @pytest.mark.parametrize("scalar",
+            [param(np.timedelta64("NaT", "s"), id="timedelta64[s](NaT)"),
+             param(np.timedelta64(123, "s"), id="timedelta64[s]"),
+             param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"),
+             param(np.datetime64(1, "D"), id="datetime64[D]")],)
+    @pytest.mark.xfail(
+            reason="This uses int(scalar) or float(scalar) to assign, which "
+                   "fails.  However, casting currently does not fail.")
+    def test_coercion_basic(self, dtype, scalar):
+        arr = np.array(scalar, dtype=dtype)
+        cast = np.array(scalar).astype(dtype)
+        ass = np.ones((), dtype=dtype)
+        ass[()] = scalar  # raises, as would np.array([scalar], dtype=dtype)
+
+        assert_array_equal(arr, cast)
+        assert_array_equal(cast, cast)
+
+    @pytest.mark.parametrize("dtype", [np.int64, np.float32])
+    @pytest.mark.parametrize("scalar",
+            [param(np.timedelta64(123, "ns"), id="timedelta64[ns]"),
+             param(np.timedelta64(12, "generic"), id="timedelta64[generic]")])
+    def test_coercion_timedelta_convert_to_number(self, dtype, scalar):
+        # Only "ns" and "generic" timedeltas can be converted to numbers
+        # so these are slightly special.
+        arr = np.array(scalar, dtype=dtype)
+        cast = np.array(scalar).astype(dtype)
+        ass = np.ones((), dtype=dtype)
+        ass[()] = scalar  # raises, as would np.array([scalar], dtype=dtype)
+
+        assert_array_equal(arr, cast)
+        assert_array_equal(cast, cast)
+
+    @pytest.mark.parametrize(["val", "unit"],
+            [param(123, "s", id="[s]"), param(123, "D", id="[D]")])
+    @pytest.mark.parametrize("scalar_type", [np.datetime64, np.timedelta64])
+    @pytest.mark.xfail(reason="Error not raised for assignment")
+    def test_coercion_assignment_times(self, scalar_type, val, unit):
+        scalar = scalar_type(val, unit)
+
+        # The error type is not ideal, fails because string is too short:
+        with pytest.raises(RuntimeError):
+            np.array(scalar, dtype="S6")
+        with pytest.raises(RuntimeError):
+            cast = np.array(scalar).astype("S6")
+        ass = np.ones((), dtype="S6")
+        with pytest.raises(RuntimeError):
+            ass[()] = scalar
+
+
+class TestNested:
+    @pytest.mark.xfail(reason="No deprecation warning given.")
+    def test_nested_simple(self):
+        initial = [1.2]
+        nested = initial
+        for i in range(np.MAXDIMS - 1):
+            nested = [nested]
+
+        arr = np.array(nested, dtype="float64")
+        assert arr.shape == (1,) * np.MAXDIMS
+        with pytest.raises(ValueError):
+            np.array([nested], dtype="float64")
+
+        # We discover object automatically at this time:
+        with assert_warns(np.VisibleDeprecationWarning):
+            arr = np.array([nested])
+        assert arr.dtype == np.dtype("O")
+        assert arr.shape == (1,) * np.MAXDIMS
+        assert arr.item() is initial
+
+    def test_pathological_self_containing(self):
+        # Test that this also works for two nested sequences
+        l = []
+        l.append(l)
+        arr = np.array([l, l, l], dtype=object)
+        assert arr.shape == (3,) + (1,) * (np.MAXDIMS - 1)
+
+        # Also check a ragged case:
+        arr = np.array([l, [None], l], dtype=object)
+        assert arr.shape == (3, 1)
+
+    @pytest.mark.xfail(
+            reason="For arrays and memoryview, this used to not complain "
+                   "and assign to a too small array instead. For other "
+                   "array-likes the error is different because fewer (only "
+                   "MAXDIM-1) dimensions are found, failing the last test.")
+    @pytest.mark.parametrize("arraylike", arraylikes())
+    def test_nested_arraylikes(self, arraylike):
+        # We try storing an array like into an array, but the array-like
+        # will have too many dimensions.  This means the shape discovery
+        # decides that the array-like must be treated as an object (a special
+        # case of ragged discovery).  The result will be an array with one
+        # dimension less than the maximum dimensions, and the array being
+        # assigned to it (which does work for object or if `float(arraylike)`
+        # works).
+        initial = arraylike(np.ones((1, 1)))
+        #if not isinstance(initial, (np.ndarray, memoryview)):
+        #    pytest.xfail(
+        #        "When coercing to object, these cases currently discover "
+        #        "fewer dimensions than ndarray failing the second part.")
+
+        nested = initial
+        for i in range(np.MAXDIMS - 1):
+            nested = [nested]
+
+        with pytest.raises(ValueError):
+            # It will refuse to assign the array into
+            np.array(nested, dtype="float64")
+
+        # If this is object, we end up assigning a (1, 1) array into (1,)
+        # (due to running out of dimensions), this is currently supported but
+        # a special case which is not ideal.
+        arr = np.array(nested, dtype=object)
+        assert arr.shape == (1,) * np.MAXDIMS
+        assert arr.item() == np.array(initial).item()
+
+    @pytest.mark.parametrize("arraylike", arraylikes())
+    def test_uneven_depth_ragged(self, arraylike):
+        arr = np.arange(4).reshape((2, 2))
+        arr = arraylike(arr)
+
+        # Array is ragged in the second dimension already:
+        out = np.array([arr, [arr]], dtype=object)
+        assert out.shape == (2,)
+        assert out[0] is arr
+        assert type(out[1]) is list
+
+        if not isinstance(arr, (np.ndarray, memoryview)):
+            pytest.xfail(
+                "does not raise ValueError below, because it discovers "
+                "the dimension as (2,) and not (2, 2, 2)")
+
+        # Array is ragged in the third dimension:
+        with pytest.raises(ValueError):
+            # This is a broadcast error during assignment, because
+            # the array shape would be (2, 2, 2) but `arr[0, 0] = arr` fails.
+            np.array([arr, [arr, arr]], dtype=object)
+
+    def test_empty_sequence(self):
+        arr = np.array([[], [1], [[1]]], dtype=object)
+        assert arr.shape == (3,)
+
+        # The empty sequence stops further dimension discovery, so the
+        # result shape will be (0,) which leads to an error during:
+        with pytest.raises(ValueError):
+            np.array([[], np.empty((0, 1))], dtype=object)
+
+
+class TestBadSequences:
+    # These are tests for bad objects passed into `np.array`, in general
+    # these have undefined behaviour.  In the old code they partially worked
+    # when now they will fail.  We could (and maybe should) create a copy
+    # of all sequences to be safe against bad-actors.
+
+    def test_growing_list(self):
+        # List to coerce, `mylist` will append to it during coercion
+        obj = []
+        class mylist(list):
+            def __len__(self):
+                obj.append([1, 2])
+                return super().__len__()
+
+        obj.append(mylist([1, 2]))
+
+        with pytest.raises(ValueError):  # changes to RuntimeError
+            np.array(obj)
+
+    # Note: We do not test a shrinking list.  These do very evil things
+    #       and the only way to fix them would be to copy all sequences.
+    #       (which may be a real option in the future).
+
+    def test_mutated_list(self):
+        # List to coerce, `mylist` will mutate the first element
+        obj = []
+        class mylist(list):
+            def __len__(self):
+                obj[0] = [2, 3]  # replace with a different list.
+                return super().__len__()
+
+        obj.append([2, 3])
+        obj.append(mylist([1, 2]))
+        #with pytest.raises(RuntimeError):  # Will error in the future
+        np.array(obj)
+
+    def test_replace_0d_array(self):
+        # List to coerce, `mylist` will mutate the first element
+        obj = []
+        class baditem:
+            def __len__(self):
+                obj[0][0] = 2  # replace with a different list.
+                raise ValueError("not actually a sequence!")
+
+            def __getitem__(self):
+                pass
+
+        # Runs into a corner case in the new code, the `array(2)` is cached
+        # so replacing it invalidates the cache.
+        obj.append([np.array(2), baditem()])
+        # with pytest.raises(RuntimeError):  # Will error in the future
+        np.array(obj)
+
+
+class TestArrayLikes:
+    @pytest.mark.parametrize("arraylike", arraylikes())
+    def test_0d_object_special_case(self, arraylike):
+        arr = np.array(0.)
+        obj = arraylike(arr)
+        # A single array-like is always converted:
+        res = np.array(obj, dtype=object)
+        assert_array_equal(arr, res)
+
+        # But a single 0-D nested array-like never:
+        res = np.array([obj], dtype=object)
+        assert res[0] is obj
+
+    def test_0d_generic_special_case(self):
+        class ArraySubclass(np.ndarray):
+            def __float__(self):
+                raise TypeError("e.g. quantities raise on this")
+
+        arr = np.array(0.)
+        obj = arr.view(ArraySubclass)
+        res = np.array(obj)
+        # The subclass is simply cast:
+        assert_array_equal(arr, res)
+
+        # If the 0-D array-like is included, __float__ is currently
+        # guaranteed to be used.  We may want to change that, quantities
+        # and masked arrays half make use of this.
+        with pytest.raises(TypeError):
+            np.array([obj])
+
+        # The same holds for memoryview:
+        obj = memoryview(arr)
+        res = np.array(obj)
+        assert_array_equal(arr, res)
+        with pytest.raises(ValueError):
+            # The error type does not matter much here.
+            np.array([obj])
diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py
index 4bb5cb11a..f6e263774 100644
--- a/numpy/core/tests/test_indexing.py
+++ b/numpy/core/tests/test_indexing.py
@@ -370,6 +370,20 @@ class TestIndexing:
         a[...] = s
         assert_((a == 1).all())
 
+    def test_array_like_values(self):
+        # Similar to the above test, but use a memoryview instead
+        a = np.zeros((5, 5))
+        s = np.arange(25, dtype=np.float64).reshape(5, 5)
+
+        a[[0, 1, 2, 3, 4], :] = memoryview(s)
+        assert_array_equal(a, s)
+
+        a[:, [0, 1, 2, 3, 4]] = memoryview(s)
+        assert_array_equal(a, s)
+
+        a[...] = memoryview(s)
+        assert_array_equal(a, s)
+
     def test_subclass_writeable(self):
         d = np.rec.array([('NGC1001', 11), ('NGC1002', 1.), ('NGC1003', 1.)],
                          dtype=[('target', 'S20'), ('V_mag', '>f4')])
diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py
index ef623255b..3827b7505 100644
--- a/numpy/testing/_private/utils.py
+++ b/numpy/testing/_private/utils.py
@@ -719,6 +719,8 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
         at the same locations.
 
         """
+        __tracebackhide__ = True  # Hide traceback for py.test
+
         x_id = func(x)
         y_id = func(y)
         # We include work-arounds here to handle three types of slightly