diff options
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/_add_newdocs.py | 16 | ||||
-rw-r--r-- | numpy/core/_asarray.pyi | 8 | ||||
-rw-r--r-- | numpy/core/code_generators/generate_umath.py | 6 | ||||
-rw-r--r-- | numpy/core/multiarray.pyi | 101 | ||||
-rw-r--r-- | numpy/core/numeric.pyi | 21 | ||||
-rw-r--r-- | numpy/core/numerictypes.py | 2 | ||||
-rw-r--r-- | numpy/core/numerictypes.pyi | 1 | ||||
-rw-r--r-- | numpy/core/setup.py | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 29 | ||||
-rw-r--r-- | numpy/core/src/multiarray/getset.c | 13 | ||||
-rw-r--r-- | numpy/core/src/multiarray/methods.c | 14 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 113 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.h.src | 38 | ||||
-rw-r--r-- | numpy/core/src/umath/loops_minmax.dispatch.c.src | 551 | ||||
-rw-r--r-- | numpy/core/src/umath/simd.inc.src | 188 | ||||
-rw-r--r-- | numpy/core/src/umath/ufunc_object.c | 18 | ||||
-rw-r--r-- | numpy/core/tests/test_deprecations.py | 11 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 28 |
18 files changed, 766 insertions, 393 deletions
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index 11d1810f9..a800143a8 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -2265,10 +2265,6 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_interface__', """Array protocol: Python side.""")) -add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_finalize__', - """None.""")) - - add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_priority__', """Array priority.""")) @@ -2278,12 +2274,12 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_struct__', add_newdoc('numpy.core.multiarray', 'ndarray', ('__dlpack__', """a.__dlpack__(*, stream=None) - + DLPack Protocol: Part of the Array API.""")) add_newdoc('numpy.core.multiarray', 'ndarray', ('__dlpack_device__', """a.__dlpack_device__() - + DLPack Protocol: Part of the Array API.""")) add_newdoc('numpy.core.multiarray', 'ndarray', ('base', @@ -2811,6 +2807,14 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('__array__', """)) +add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_finalize__', + """a.__array_finalize__(obj, /) + + Present so subclasses can call super. Does nothing. + + """)) + + add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_prepare__', """a.__array_prepare__(array[, context], /) diff --git a/numpy/core/_asarray.pyi b/numpy/core/_asarray.pyi index 0da2de912..51b794130 100644 --- a/numpy/core/_asarray.pyi +++ b/numpy/core/_asarray.pyi @@ -2,7 +2,7 @@ from collections.abc import Iterable from typing import TypeVar, Union, overload, Literal from numpy import ndarray -from numpy.typing import ArrayLike, DTypeLike +from numpy.typing import DTypeLike, _SupportsArrayFunc _ArrayType = TypeVar("_ArrayType", bound=ndarray) @@ -22,7 +22,7 @@ def require( dtype: None = ..., requirements: None | _Requirements | Iterable[_Requirements] = ..., *, - like: ArrayLike = ... + like: _SupportsArrayFunc = ... ) -> _ArrayType: ... @overload def require( @@ -30,7 +30,7 @@ def require( dtype: DTypeLike = ..., requirements: _E | Iterable[_RequirementsWithE] = ..., *, - like: ArrayLike = ... + like: _SupportsArrayFunc = ... ) -> ndarray: ... @overload def require( @@ -38,5 +38,5 @@ def require( dtype: DTypeLike = ..., requirements: None | _Requirements | Iterable[_Requirements] = ..., *, - like: ArrayLike = ... + like: _SupportsArrayFunc = ... ) -> ndarray: ... diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index 1844ab96a..054150b28 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -522,14 +522,14 @@ defdict = { Ufunc(2, 1, ReorderableNone, docstrings.get('numpy.core.umath.maximum'), 'PyUFunc_SimpleUniformOperationTypeResolver', - TD(noobj, simd=[('avx512f', 'fd')]), + TD(noobj, dispatch=[('loops_minmax', ints+'fdg')]), TD(O, f='npy_ObjectMax') ), 'minimum': Ufunc(2, 1, ReorderableNone, docstrings.get('numpy.core.umath.minimum'), 'PyUFunc_SimpleUniformOperationTypeResolver', - TD(noobj, simd=[('avx512f', 'fd')]), + TD(noobj, dispatch=[('loops_minmax', ints+'fdg')]), TD(O, f='npy_ObjectMin') ), 'clip': @@ -543,6 +543,7 @@ defdict = { Ufunc(2, 1, ReorderableNone, docstrings.get('numpy.core.umath.fmax'), 'PyUFunc_SimpleUniformOperationTypeResolver', + TD('fdg', dispatch=[('loops_minmax', 'fdg')]), TD(noobj), TD(O, f='npy_ObjectMax') ), @@ -550,6 +551,7 @@ defdict = { Ufunc(2, 1, ReorderableNone, docstrings.get('numpy.core.umath.fmin'), 'PyUFunc_SimpleUniformOperationTypeResolver', + TD('fdg', dispatch=[('loops_minmax', 'fdg')]), TD(noobj), TD(O, f='npy_ObjectMin') ), diff --git a/numpy/core/multiarray.pyi b/numpy/core/multiarray.pyi index 423aed85e..f2d3622d2 100644 --- a/numpy/core/multiarray.pyi +++ b/numpy/core/multiarray.pyi @@ -61,6 +61,7 @@ from numpy.typing import ( NDArray, ArrayLike, _SupportsArray, + _SupportsArrayFunc, _NestedSequence, _FiniteNestedSequence, _ArrayLikeBool_co, @@ -177,7 +178,7 @@ def array( order: _OrderKACF = ..., subok: L[True], ndmin: int = ..., - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> _ArrayType: ... @overload def array( @@ -188,7 +189,7 @@ def array( order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def array( @@ -199,7 +200,7 @@ def array( order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload def array( @@ -210,7 +211,7 @@ def array( order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def array( @@ -221,7 +222,7 @@ def array( order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload @@ -230,7 +231,7 @@ def zeros( dtype: None = ..., order: _OrderCF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[float64]: ... @overload def zeros( @@ -238,7 +239,7 @@ def zeros( dtype: _DTypeLike[_SCT], order: _OrderCF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def zeros( @@ -246,7 +247,7 @@ def zeros( dtype: DTypeLike, order: _OrderCF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload @@ -255,7 +256,7 @@ def empty( dtype: None = ..., order: _OrderCF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[float64]: ... @overload def empty( @@ -263,7 +264,7 @@ def empty( dtype: _DTypeLike[_SCT], order: _OrderCF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def empty( @@ -271,7 +272,7 @@ def empty( dtype: DTypeLike, order: _OrderCF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload @@ -468,7 +469,7 @@ def asarray( dtype: None = ..., order: _OrderKACF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def asarray( @@ -476,7 +477,7 @@ def asarray( dtype: None = ..., order: _OrderKACF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload def asarray( @@ -484,7 +485,7 @@ def asarray( dtype: _DTypeLike[_SCT], order: _OrderKACF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def asarray( @@ -492,7 +493,7 @@ def asarray( dtype: DTypeLike, order: _OrderKACF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload @@ -501,7 +502,7 @@ def asanyarray( dtype: None = ..., order: _OrderKACF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> _ArrayType: ... @overload def asanyarray( @@ -509,7 +510,7 @@ def asanyarray( dtype: None = ..., order: _OrderKACF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def asanyarray( @@ -517,7 +518,7 @@ def asanyarray( dtype: None = ..., order: _OrderKACF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload def asanyarray( @@ -525,7 +526,7 @@ def asanyarray( dtype: _DTypeLike[_SCT], order: _OrderKACF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def asanyarray( @@ -533,7 +534,7 @@ def asanyarray( dtype: DTypeLike, order: _OrderKACF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload @@ -541,28 +542,28 @@ def ascontiguousarray( a: _ArrayLike[_SCT], dtype: None = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def ascontiguousarray( a: object, dtype: None = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload def ascontiguousarray( a: Any, dtype: _DTypeLike[_SCT], *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def ascontiguousarray( a: Any, dtype: DTypeLike, *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload @@ -570,28 +571,28 @@ def asfortranarray( a: _ArrayLike[_SCT], dtype: None = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def asfortranarray( a: object, dtype: None = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload def asfortranarray( a: Any, dtype: _DTypeLike[_SCT], *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def asfortranarray( a: Any, dtype: DTypeLike, *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... # In practice `list[Any]` is list with an int, int and a valid @@ -609,7 +610,7 @@ def fromstring( count: SupportsIndex = ..., *, sep: str, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[float64]: ... @overload def fromstring( @@ -618,7 +619,7 @@ def fromstring( count: SupportsIndex = ..., *, sep: str, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def fromstring( @@ -627,7 +628,7 @@ def fromstring( count: SupportsIndex = ..., *, sep: str, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... def frompyfunc( @@ -646,7 +647,7 @@ def fromfile( sep: str = ..., offset: SupportsIndex = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[float64]: ... @overload def fromfile( @@ -656,7 +657,7 @@ def fromfile( sep: str = ..., offset: SupportsIndex = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def fromfile( @@ -666,7 +667,7 @@ def fromfile( sep: str = ..., offset: SupportsIndex = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload @@ -675,7 +676,7 @@ def fromiter( dtype: _DTypeLike[_SCT], count: SupportsIndex = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def fromiter( @@ -683,7 +684,7 @@ def fromiter( dtype: DTypeLike, count: SupportsIndex = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload @@ -693,7 +694,7 @@ def frombuffer( count: SupportsIndex = ..., offset: SupportsIndex = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[float64]: ... @overload def frombuffer( @@ -702,7 +703,7 @@ def frombuffer( count: SupportsIndex = ..., offset: SupportsIndex = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def frombuffer( @@ -711,7 +712,7 @@ def frombuffer( count: SupportsIndex = ..., offset: SupportsIndex = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload @@ -719,7 +720,7 @@ def arange( # type: ignore[misc] stop: _IntLike_co, /, *, dtype: None = ..., - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[signedinteger[Any]]: ... @overload def arange( # type: ignore[misc] @@ -728,14 +729,14 @@ def arange( # type: ignore[misc] step: _IntLike_co = ..., dtype: None = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[signedinteger[Any]]: ... @overload def arange( # type: ignore[misc] stop: _FloatLike_co, /, *, dtype: None = ..., - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[floating[Any]]: ... @overload def arange( # type: ignore[misc] @@ -744,14 +745,14 @@ def arange( # type: ignore[misc] step: _FloatLike_co = ..., dtype: None = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[floating[Any]]: ... @overload def arange( stop: _TD64Like_co, /, *, dtype: None = ..., - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[timedelta64]: ... @overload def arange( @@ -760,7 +761,7 @@ def arange( step: _TD64Like_co = ..., dtype: None = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[timedelta64]: ... @overload def arange( # both start and stop must always be specified for datetime64 @@ -769,14 +770,14 @@ def arange( # both start and stop must always be specified for datetime64 step: datetime64 = ..., dtype: None = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[datetime64]: ... @overload def arange( stop: Any, /, *, dtype: _DTypeLike[_SCT], - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def arange( @@ -785,14 +786,14 @@ def arange( step: Any = ..., dtype: _DTypeLike[_SCT] = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def arange( stop: Any, /, *, dtype: DTypeLike, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload def arange( @@ -801,7 +802,7 @@ def arange( step: Any = ..., dtype: DTypeLike = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... def datetime_data( diff --git a/numpy/core/numeric.pyi b/numpy/core/numeric.pyi index 8b92abab4..d5e28d24c 100644 --- a/numpy/core/numeric.pyi +++ b/numpy/core/numeric.pyi @@ -37,6 +37,7 @@ from numpy.typing import ( _SupportsDType, _FiniteNestedSequence, _SupportsArray, + _SupportsArrayFunc, _ScalarLike_co, _ArrayLikeBool_co, _ArrayLikeUInt_co, @@ -108,7 +109,7 @@ def ones( dtype: None = ..., order: _OrderCF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[float64]: ... @overload def ones( @@ -116,7 +117,7 @@ def ones( dtype: _DTypeLike[_SCT], order: _OrderCF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def ones( @@ -124,7 +125,7 @@ def ones( dtype: DTypeLike, order: _OrderCF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload @@ -175,7 +176,7 @@ def full( dtype: None = ..., order: _OrderCF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload def full( @@ -184,7 +185,7 @@ def full( dtype: _DTypeLike[_SCT], order: _OrderCF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def full( @@ -193,7 +194,7 @@ def full( dtype: DTypeLike, order: _OrderCF = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... @overload @@ -563,7 +564,7 @@ def fromfunction( shape: Sequence[int], *, dtype: DTypeLike = ..., - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., **kwargs: Any, ) -> _T: ... @@ -584,21 +585,21 @@ def identity( n: int, dtype: None = ..., *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[float64]: ... @overload def identity( n: int, dtype: _DTypeLike[_SCT], *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[_SCT]: ... @overload def identity( n: int, dtype: DTypeLike, *, - like: ArrayLike = ..., + like: _SupportsArrayFunc = ..., ) -> NDArray[Any]: ... def allclose( diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py index 8e5de852b..3d1cb6fd1 100644 --- a/numpy/core/numerictypes.py +++ b/numpy/core/numerictypes.py @@ -516,7 +516,7 @@ def _scalar_type_key(typ): return (dt.kind.lower(), dt.itemsize) -ScalarType = [int, float, complex, int, bool, bytes, str, memoryview] +ScalarType = [int, float, complex, bool, bytes, str, memoryview] ScalarType += sorted(_concrete_types, key=_scalar_type_key) ScalarType = tuple(ScalarType) diff --git a/numpy/core/numerictypes.pyi b/numpy/core/numerictypes.pyi index f8c49b957..09c180a0f 100644 --- a/numpy/core/numerictypes.pyi +++ b/numpy/core/numerictypes.pyi @@ -137,7 +137,6 @@ ScalarType: tuple[ type[int], type[float], type[complex], - type[int], type[bool], type[bytes], type[str], diff --git a/numpy/core/setup.py b/numpy/core/setup.py index a67a4cab6..22cac1e9a 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -999,6 +999,7 @@ def configuration(parent_package='',top_path=None): join('src', 'umath', 'loops_unary_fp.dispatch.c.src'), join('src', 'umath', 'loops_arithm_fp.dispatch.c.src'), join('src', 'umath', 'loops_arithmetic.dispatch.c.src'), + join('src', 'umath', 'loops_minmax.dispatch.c.src'), join('src', 'umath', 'loops_trigonometric.dispatch.c.src'), join('src', 'umath', 'loops_umath_fp.dispatch.c.src'), join('src', 'umath', 'loops_exponent_log.dispatch.c.src'), diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index e60cacc18..25eb91977 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -875,16 +875,39 @@ PyArray_NewFromDescr_int( /* * call the __array_finalize__ method if a subtype was requested. * If obj is NULL use Py_None for the Python callback. + * For speed, we skip if __array_finalize__ is inherited from ndarray + * (since that function does nothing), or, for backward compatibility, + * if it is None. */ if (subtype != &PyArray_Type) { PyObject *res, *func; - - func = PyObject_GetAttr((PyObject *)fa, npy_ma_str_array_finalize); + static PyObject *ndarray_array_finalize = NULL; + /* First time, cache ndarray's __array_finalize__ */ + if (ndarray_array_finalize == NULL) { + ndarray_array_finalize = PyObject_GetAttr( + (PyObject *)&PyArray_Type, npy_ma_str_array_finalize); + } + func = PyObject_GetAttr((PyObject *)subtype, npy_ma_str_array_finalize); if (func == NULL) { goto fail; } + else if (func == ndarray_array_finalize) { + Py_DECREF(func); + } else if (func == Py_None) { Py_DECREF(func); + /* + * 2022-01-08, NumPy 1.23; when deprecation period is over, remove this + * whole stanza so one gets a "NoneType object is not callable" TypeError. + */ + if (DEPRECATE( + "Setting __array_finalize__ = None to indicate no finalization" + "should be done is deprecated. Instead, just inherit from " + "ndarray or, if that is not possible, explicitly set to " + "ndarray.__array_function__; this will raise a TypeError " + "in the future. (Deprecated since NumPy 1.23)") < 0) { + goto fail; + } } else { if (PyCapsule_CheckExact(func)) { @@ -903,7 +926,7 @@ PyArray_NewFromDescr_int( if (obj == NULL) { obj = Py_None; } - res = PyObject_CallFunctionObjArgs(func, obj, NULL); + res = PyObject_CallFunctionObjArgs(func, (PyObject *)fa, obj, NULL); Py_DECREF(func); if (res == NULL) { goto fail; diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c index ac6465acd..a4f972ba4 100644 --- a/numpy/core/src/multiarray/getset.c +++ b/numpy/core/src/multiarray/getset.c @@ -926,15 +926,6 @@ array_transpose_get(PyArrayObject *self, void *NPY_UNUSED(ignored)) return PyArray_Transpose(self, NULL); } -/* If this is None, no function call is made - --- default sub-class behavior -*/ -static PyObject * -array_finalize_get(PyArrayObject *NPY_UNUSED(self), void *NPY_UNUSED(ignored)) -{ - Py_RETURN_NONE; -} - NPY_NO_EXPORT PyGetSetDef array_getsetlist[] = { {"ndim", (getter)array_ndim_get, @@ -1008,10 +999,6 @@ NPY_NO_EXPORT PyGetSetDef array_getsetlist[] = { (getter)array_priority_get, NULL, NULL, NULL}, - {"__array_finalize__", - (getter)array_finalize_get, - NULL, - NULL, NULL}, {NULL, NULL, NULL, NULL, NULL}, /* Sentinel */ }; diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 8dd7c112f..33f78dff2 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -859,7 +859,7 @@ array_astype(PyArrayObject *self, * and it's not a subtype if subok is False, then we * can skip the copy. */ - if (forcecopy != NPY_COPY_ALWAYS && + if (forcecopy != NPY_COPY_ALWAYS && (order == NPY_KEEPORDER || (order == NPY_ANYORDER && (PyArray_IS_C_CONTIGUOUS(self) || @@ -881,7 +881,7 @@ array_astype(PyArrayObject *self, Py_DECREF(dtype); return NULL; } - + if (!PyArray_CanCastArrayTo(self, dtype, casting)) { PyErr_Clear(); npy_set_invalid_cast_error( @@ -926,6 +926,13 @@ array_astype(PyArrayObject *self, static PyObject * +array_finalizearray(PyArrayObject *self, PyObject *obj) +{ + Py_RETURN_NONE; +} + + +static PyObject * array_wraparray(PyArrayObject *self, PyObject *args) { PyArrayObject *arr; @@ -2777,6 +2784,9 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = { {"__array_prepare__", (PyCFunction)array_preparearray, METH_VARARGS, NULL}, + {"__array_finalize__", + (PyCFunction)array_finalizearray, + METH_O, NULL}, {"__array_wrap__", (PyCFunction)array_wraparray, METH_VARARGS, NULL}, diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index aaa694f34..5f054d0a9 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -724,32 +724,6 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void /**end repeat1**/ -/**begin repeat1 - * #kind = maximum, minimum# - * #OP = >, <# - **/ - -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) -{ - if (IS_BINARY_REDUCE) { - BINARY_REDUCE_LOOP(@type@) { - const @type@ in2 = *(@type@ *)ip2; - io1 = (io1 @OP@ in2) ? io1 : in2; - } - *((@type@ *)iop1) = io1; - } - else { - BINARY_LOOP { - const @type@ in1 = *(@type@ *)ip1; - const @type@ in2 = *(@type@ *)ip2; - *((@type@ *)op1) = (in1 @OP@ in2) ? in1 : in2; - } - } -} - -/**end repeat1**/ - NPY_NO_EXPORT void @TYPE@_power(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -1684,93 +1658,6 @@ NPY_NO_EXPORT void } } -/**begin repeat1 - * #kind = maximum, minimum# - * #OP = >=, <=# - **/ -NPY_NO_EXPORT void -@TYPE@_@kind@_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) -{ - /* */ - if (IS_BINARY_REDUCE) { - if (!run_unary_reduce_simd_@kind@_@TYPE@(args, dimensions, steps)) { - BINARY_REDUCE_LOOP(@type@) { - const @type@ in2 = *(@type@ *)ip2; - /* Order of operations important for MSVC 2015 */ - io1 = (io1 @OP@ in2 || npy_isnan(io1)) ? io1 : in2; - } - *((@type@ *)iop1) = io1; - } - } - else { - if (!run_binary_avx512f_@kind@_@TYPE@(args, dimensions, steps)) { - BINARY_LOOP { - @type@ in1 = *(@type@ *)ip1; - const @type@ in2 = *(@type@ *)ip2; - /* Order of operations important for MSVC 2015 */ - in1 = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2; - *((@type@ *)op1) = in1; - } - } - } - npy_clear_floatstatus_barrier((char*)dimensions); -} - -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) -{ - /* */ - if (IS_BINARY_REDUCE) { - if (!run_unary_reduce_simd_@kind@_@TYPE@(args, dimensions, steps)) { - BINARY_REDUCE_LOOP(@type@) { - const @type@ in2 = *(@type@ *)ip2; - /* Order of operations important for MSVC 2015 */ - io1 = (io1 @OP@ in2 || npy_isnan(io1)) ? io1 : in2; - } - *((@type@ *)iop1) = io1; - } - } - else { - BINARY_LOOP { - @type@ in1 = *(@type@ *)ip1; - const @type@ in2 = *(@type@ *)ip2; - /* Order of operations important for MSVC 2015 */ - in1 = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2; - *((@type@ *)op1) = in1; - } - } - npy_clear_floatstatus_barrier((char*)dimensions); -} -/**end repeat1**/ - -/**begin repeat1 - * #kind = fmax, fmin# - * #OP = >=, <=# - **/ -NPY_NO_EXPORT void -@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) -{ - /* */ - if (IS_BINARY_REDUCE) { - BINARY_REDUCE_LOOP(@type@) { - const @type@ in2 = *(@type@ *)ip2; - /* Order of operations important for MSVC 2015 */ - io1 = (io1 @OP@ in2 || npy_isnan(in2)) ? io1 : in2; - } - *((@type@ *)iop1) = io1; - } - else { - BINARY_LOOP { - const @type@ in1 = *(@type@ *)ip1; - const @type@ in2 = *(@type@ *)ip2; - /* Order of operations important for MSVC 2015 */ - *((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2; - } - } - npy_clear_floatstatus_barrier((char*)dimensions); -} -/**end repeat1**/ - NPY_NO_EXPORT void @TYPE@_floor_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src index 081ca9957..3eafbdf66 100644 --- a/numpy/core/src/umath/loops.h.src +++ b/numpy/core/src/umath/loops.h.src @@ -22,7 +22,6 @@ #define BOOL_fmax BOOL_maximum #define BOOL_fmin BOOL_minimum - /* ***************************************************************************** ** BOOLEAN LOOPS ** @@ -660,6 +659,43 @@ PyUFunc_OOO_O(char **args, npy_intp const *dimensions, npy_intp const *steps, vo /* ***************************************************************************** + ** MIN/MAX LOOPS ** + ***************************************************************************** + */ + +#ifndef NPY_DISABLE_OPTIMIZATION + #include "loops_minmax.dispatch.h" +#endif + +//---------- Integers ---------- + +/**begin repeat + * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG# + */ +/**begin repeat1 + * #kind = maximum, minimum# + */ + NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, + (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))) +/**end repeat1**/ +/**end repeat**/ + +//---------- Float ---------- + + /**begin repeat + * #TYPE = FLOAT, DOUBLE, LONGDOUBLE# + */ +/**begin repeat1 + * #kind = maximum, minimum, fmax, fmin# + */ + NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, + (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))) +/**end repeat1**/ +/**end repeat**/ + +/* + ***************************************************************************** ** END LOOPS ** ***************************************************************************** */ diff --git a/numpy/core/src/umath/loops_minmax.dispatch.c.src b/numpy/core/src/umath/loops_minmax.dispatch.c.src new file mode 100644 index 000000000..dbd158db9 --- /dev/null +++ b/numpy/core/src/umath/loops_minmax.dispatch.c.src @@ -0,0 +1,551 @@ +/*@targets + ** $maxopt baseline + ** neon asimd + ** sse2 avx2 avx512_skx + ** vsx2 + **/ +#define _UMATHMODULE +#define _MULTIARRAYMODULE +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "simd/simd.h" +#include "loops_utils.h" +#include "loops.h" +#include "lowlevel_strided_loops.h" +// Provides the various *_LOOP macros +#include "fast_loop_macros.h" + +/******************************************************************************* + ** Scalar intrinsics + ******************************************************************************/ +// signed/unsigned int +#define scalar_max_i(A, B) ((A > B) ? A : B) +#define scalar_min_i(A, B) ((A < B) ? A : B) +// fp, propagates NaNs +#define scalar_max_f(A, B) ((A >= B || npy_isnan(A)) ? A : B) +#define scalar_max_d scalar_max_f +#define scalar_max_l scalar_max_f +#define scalar_min_f(A, B) ((A <= B || npy_isnan(A)) ? A : B) +#define scalar_min_d scalar_min_f +#define scalar_min_l scalar_min_f +// fp, ignores NaNs +#define scalar_maxp_f fmaxf +#define scalar_maxp_d fmax +#define scalar_maxp_l fmaxl +#define scalar_minp_f fminf +#define scalar_minp_d fmin +#define scalar_minp_l fminl + +// special optimization for fp scalars propagates NaNs +// since there're no C99 support for it +#ifndef NPY_DISABLE_OPTIMIZATION +/**begin repeat + * #type = npy_float, npy_double# + * #sfx = f32, f64# + * #c_sfx = f, d# + * #isa_sfx = s, d# + * #sse_type = __m128, __m128d# + */ +/**begin repeat1 + * #op = max, min# + * #neon_instr = fmax, fmin# + */ +#ifdef NPY_HAVE_SSE2 +#undef scalar_@op@_@c_sfx@ +NPY_FINLINE @type@ scalar_@op@_@c_sfx@(@type@ a, @type@ b) { + @sse_type@ va = _mm_set_s@isa_sfx@(a); + @sse_type@ vb = _mm_set_s@isa_sfx@(b); + @sse_type@ rv = _mm_@op@_s@isa_sfx@(va, vb); + // X86 handel second operand + @sse_type@ nn = _mm_cmpord_s@isa_sfx@(va, va); + #ifdef NPY_HAVE_SSE41 + rv = _mm_blendv_p@isa_sfx@(va, rv, nn); + #else + rv = _mm_xor_p@isa_sfx@(va, _mm_and_p@isa_sfx@(_mm_xor_p@isa_sfx@(va, rv), nn)); + #endif + return _mm_cvts@isa_sfx@_@sfx@(rv); +} +#endif // SSE2 +#ifdef __aarch64__ +#undef scalar_@op@_@c_sfx@ +NPY_FINLINE @type@ scalar_@op@_@c_sfx@(@type@ a, @type@ b) { + @type@ result = 0; + __asm( + "@neon_instr@ %@isa_sfx@[result], %@isa_sfx@[a], %@isa_sfx@[b]" + : [result] "=w" (result) + : [a] "w" (a), [b] "w" (b) + ); + return result; +} +#endif // __aarch64__ +/**end repeat1**/ +/**end repeat**/ +#endif // NPY_DISABLE_OPTIMIZATION +// mapping to double if its possible +#if NPY_BITSOF_DOUBLE == NPY_BITSOF_LONGDOUBLE +/**begin repeat + * #op = max, min, maxp, minp# + */ + #undef scalar_@op@_l + #define scalar_@op@_l scalar_@op@_d +/**end repeat**/ +#endif + +/******************************************************************************* + ** extra SIMD intrinsics + ******************************************************************************/ + +#if NPY_SIMD +/**begin repeat + * #sfx = s8, u8, s16, u16, s32, u32, s64, u64# + * #is_64 = 0*6, 1*2# + */ +#if defined(NPY_HAVE_ASIMD) && defined(__aarch64__) + #if !@is_64@ + #define npyv_reduce_min_@sfx@ vminvq_@sfx@ + #define npyv_reduce_max_@sfx@ vmaxvq_@sfx@ + #else + NPY_FINLINE npyv_lanetype_@sfx@ npyv_reduce_min_@sfx@(npyv_@sfx@ v) + { + npyv_lanetype_@sfx@ a = vgetq_lane_@sfx@(v, 0); + npyv_lanetype_@sfx@ b = vgetq_lane_@sfx@(v, 1); + npyv_lanetype_@sfx@ result = (a < b) ? a : b; + return result; + } + NPY_FINLINE npyv_lanetype_@sfx@ npyv_reduce_max_@sfx@(npyv_@sfx@ v) + { + npyv_lanetype_@sfx@ a = vgetq_lane_@sfx@(v, 0); + npyv_lanetype_@sfx@ b = vgetq_lane_@sfx@(v, 1); + npyv_lanetype_@sfx@ result = (a > b) ? a : b; + return result; + } + #endif // !@is_64@ +#else + /**begin repeat1 + * #intrin = min, max# + */ + NPY_FINLINE npyv_lanetype_@sfx@ npyv_reduce_@intrin@_@sfx@(npyv_@sfx@ v) + { + npyv_lanetype_@sfx@ NPY_DECL_ALIGNED(NPY_SIMD_WIDTH) s[npyv_nlanes_@sfx@]; + npyv_storea_@sfx@(s, v); + npyv_lanetype_@sfx@ result = s[0]; + for(int i=1; i<npyv_nlanes_@sfx@; ++i){ + result = scalar_@intrin@_i(result, s[i]); + } + return result; + } + /**end repeat1**/ +#endif +/**end repeat**/ +#endif // NPY_SIMD + +/**begin repeat + * #sfx = f32, f64# + * #bsfx = b32, b64# + * #simd_chk = NPY_SIMD, NPY_SIMD_F64# + * #scalar_sfx = f, d# + */ +#if @simd_chk@ +#if defined(NPY_HAVE_ASIMD) && defined(__aarch64__) + #define npyv_minn_@sfx@ vminq_@sfx@ + #define npyv_maxn_@sfx@ vmaxq_@sfx@ + #define npyv_reduce_minn_@sfx@ vminvq_@sfx@ + #define npyv_reduce_maxn_@sfx@ vmaxvq_@sfx@ + #define npyv_reduce_minp_@sfx@ vminnmvq_@sfx@ + #define npyv_reduce_maxp_@sfx@ vmaxnmvq_@sfx@ +#else + /**begin repeat1 + * #intrin = min, max# + */ + // propagates NaNs + NPY_FINLINE npyv_@sfx@ npyv_@intrin@n_@sfx@(npyv_@sfx@ a, npyv_@sfx@ b) + { + npyv_@sfx@ result = npyv_@intrin@_@sfx@(a, b); + // result = npyv_select_@sfx@(npyv_notnan_@sfx@(b), result, b); + // X86 handle second operand + #ifndef NPY_HAVE_SSE2 + result = npyv_select_@sfx@(npyv_notnan_@sfx@(b), result, b); + #endif + result = npyv_select_@sfx@(npyv_notnan_@sfx@(a), result, a); + return result; + } + /**end repeat1**/ + /**begin repeat1 + * #intrin = minn, maxn, minp, maxp# + * #scalar_intrin = min, max, minp, maxp# + */ + NPY_FINLINE npyv_lanetype_@sfx@ npyv_reduce_@intrin@_@sfx@(npyv_@sfx@ v) + { + npyv_lanetype_@sfx@ NPY_DECL_ALIGNED(NPY_SIMD_WIDTH) s[npyv_nlanes_@sfx@]; + npyv_storea_@sfx@(s, v); + npyv_lanetype_@sfx@ result = s[0]; + for(int i=1; i<npyv_nlanes_@sfx@; ++i){ + result = scalar_@scalar_intrin@_@scalar_sfx@(result, s[i]); + } + return result; + } + /**end repeat1**/ +#endif +#endif // simd_chk +/**end repeat**/ + +/******************************************************************************* + ** Defining the SIMD kernels + ******************************************************************************/ +/**begin repeat + * #sfx = s8, u8, s16, u16, s32, u32, s64, u64, f32, f64# + * #simd_chk = NPY_SIMD*9, NPY_SIMD_F64# + * #is_fp = 0*8, 1, 1# + * #scalar_sfx = i*8, f, d# + */ +/**begin repeat1 + * # intrin = max, min, maxp, minp# + * # fp_only = 0, 0, 1, 1# + */ +#define SCALAR_OP scalar_@intrin@_@scalar_sfx@ +#if @simd_chk@ && (!@fp_only@ || (@is_fp@ && @fp_only@)) + +#if @is_fp@ && !@fp_only@ + #define V_INTRIN npyv_@intrin@n_@sfx@ // propagates NaNs + #define V_REDUCE_INTRIN npyv_reduce_@intrin@n_@sfx@ +#else + #define V_INTRIN npyv_@intrin@_@sfx@ + #define V_REDUCE_INTRIN npyv_reduce_@intrin@_@sfx@ +#endif + +// contiguous input. +static inline void +simd_reduce_c_@intrin@_@sfx@(const npyv_lanetype_@sfx@ *ip, npyv_lanetype_@sfx@ *op1, npy_intp len) +{ + if (len < 1) { + return; + } + const int vstep = npyv_nlanes_@sfx@; + const int wstep = vstep*8; + npyv_@sfx@ acc = npyv_setall_@sfx@(op1[0]); + for (; len >= wstep; len -= wstep, ip += wstep) { + #ifdef NPY_HAVE_SSE2 + NPY_PREFETCH(ip + wstep, 0, 3); + #endif + npyv_@sfx@ v0 = npyv_load_@sfx@(ip + vstep * 0); + npyv_@sfx@ v1 = npyv_load_@sfx@(ip + vstep * 1); + npyv_@sfx@ v2 = npyv_load_@sfx@(ip + vstep * 2); + npyv_@sfx@ v3 = npyv_load_@sfx@(ip + vstep * 3); + + npyv_@sfx@ v4 = npyv_load_@sfx@(ip + vstep * 4); + npyv_@sfx@ v5 = npyv_load_@sfx@(ip + vstep * 5); + npyv_@sfx@ v6 = npyv_load_@sfx@(ip + vstep * 6); + npyv_@sfx@ v7 = npyv_load_@sfx@(ip + vstep * 7); + + npyv_@sfx@ r01 = V_INTRIN(v0, v1); + npyv_@sfx@ r23 = V_INTRIN(v2, v3); + npyv_@sfx@ r45 = V_INTRIN(v4, v5); + npyv_@sfx@ r67 = V_INTRIN(v6, v7); + acc = V_INTRIN(acc, V_INTRIN(V_INTRIN(r01, r23), V_INTRIN(r45, r67))); + } + for (; len >= vstep; len -= vstep, ip += vstep) { + acc = V_INTRIN(acc, npyv_load_@sfx@(ip)); + } + npyv_lanetype_@sfx@ r = V_REDUCE_INTRIN(acc); + // Scalar - finish up any remaining iterations + for (; len > 0; --len, ++ip) { + const npyv_lanetype_@sfx@ in2 = *ip; + r = SCALAR_OP(r, in2); + } + op1[0] = r; +} + +// contiguous inputs and output. +static inline void +simd_binary_ccc_@intrin@_@sfx@(const npyv_lanetype_@sfx@ *ip1, const npyv_lanetype_@sfx@ *ip2, + npyv_lanetype_@sfx@ *op1, npy_intp len) +{ +#if NPY_SIMD_WIDTH == 128 + // Note, 6x unroll was chosen for best results on Apple M1 + const int vectorsPerLoop = 6; +#else + // To avoid memory bandwidth bottleneck + const int vectorsPerLoop = 2; +#endif + const int elemPerVector = npyv_nlanes_@sfx@; + int elemPerLoop = vectorsPerLoop * elemPerVector; + + npy_intp i = 0; + + for (; (i+elemPerLoop) <= len; i += elemPerLoop) { + npyv_@sfx@ v0 = npyv_load_@sfx@(&ip1[i + 0 * elemPerVector]); + npyv_@sfx@ v1 = npyv_load_@sfx@(&ip1[i + 1 * elemPerVector]); + #if NPY_SIMD_WIDTH == 128 + npyv_@sfx@ v2 = npyv_load_@sfx@(&ip1[i + 2 * elemPerVector]); + npyv_@sfx@ v3 = npyv_load_@sfx@(&ip1[i + 3 * elemPerVector]); + npyv_@sfx@ v4 = npyv_load_@sfx@(&ip1[i + 4 * elemPerVector]); + npyv_@sfx@ v5 = npyv_load_@sfx@(&ip1[i + 5 * elemPerVector]); + #endif + npyv_@sfx@ u0 = npyv_load_@sfx@(&ip2[i + 0 * elemPerVector]); + npyv_@sfx@ u1 = npyv_load_@sfx@(&ip2[i + 1 * elemPerVector]); + #if NPY_SIMD_WIDTH == 128 + npyv_@sfx@ u2 = npyv_load_@sfx@(&ip2[i + 2 * elemPerVector]); + npyv_@sfx@ u3 = npyv_load_@sfx@(&ip2[i + 3 * elemPerVector]); + npyv_@sfx@ u4 = npyv_load_@sfx@(&ip2[i + 4 * elemPerVector]); + npyv_@sfx@ u5 = npyv_load_@sfx@(&ip2[i + 5 * elemPerVector]); + #endif + npyv_@sfx@ m0 = V_INTRIN(v0, u0); + npyv_@sfx@ m1 = V_INTRIN(v1, u1); + #if NPY_SIMD_WIDTH == 128 + npyv_@sfx@ m2 = V_INTRIN(v2, u2); + npyv_@sfx@ m3 = V_INTRIN(v3, u3); + npyv_@sfx@ m4 = V_INTRIN(v4, u4); + npyv_@sfx@ m5 = V_INTRIN(v5, u5); + #endif + npyv_store_@sfx@(&op1[i + 0 * elemPerVector], m0); + npyv_store_@sfx@(&op1[i + 1 * elemPerVector], m1); + #if NPY_SIMD_WIDTH == 128 + npyv_store_@sfx@(&op1[i + 2 * elemPerVector], m2); + npyv_store_@sfx@(&op1[i + 3 * elemPerVector], m3); + npyv_store_@sfx@(&op1[i + 4 * elemPerVector], m4); + npyv_store_@sfx@(&op1[i + 5 * elemPerVector], m5); + #endif + } + for (; (i+elemPerVector) <= len; i += elemPerVector) { + npyv_@sfx@ v0 = npyv_load_@sfx@(ip1 + i); + npyv_@sfx@ u0 = npyv_load_@sfx@(ip2 + i); + npyv_@sfx@ m0 = V_INTRIN(v0, u0); + npyv_store_@sfx@(op1 + i, m0); + } + // Scalar - finish up any remaining iterations + for (; i < len; ++i) { + const npyv_lanetype_@sfx@ in1 = ip1[i]; + const npyv_lanetype_@sfx@ in2 = ip2[i]; + op1[i] = SCALAR_OP(in1, in2); + } +} +// non-contiguous for float 32/64-bit memory access +#if @is_fp@ +static inline void +simd_binary_@intrin@_@sfx@(const npyv_lanetype_@sfx@ *ip1, npy_intp sip1, + const npyv_lanetype_@sfx@ *ip2, npy_intp sip2, + npyv_lanetype_@sfx@ *op1, npy_intp sop1, + npy_intp len) +{ + const int vstep = npyv_nlanes_@sfx@; + for (; len >= vstep; len -= vstep, ip1 += sip1*vstep, + ip2 += sip2*vstep, op1 += sop1*vstep + ) { + npyv_@sfx@ a, b; + if (sip1 == 1) { + a = npyv_load_@sfx@(ip1); + } else { + a = npyv_loadn_@sfx@(ip1, sip1); + } + if (sip2 == 1) { + b = npyv_load_@sfx@(ip2); + } else { + b = npyv_loadn_@sfx@(ip2, sip2); + } + npyv_@sfx@ r = V_INTRIN(a, b); + if (sop1 == 1) { + npyv_store_@sfx@(op1, r); + } else { + npyv_storen_@sfx@(op1, sop1, r); + } + } + for (; len > 0; --len, ip1 += sip1, ip2 += sip2, op1 += sop1) { + const npyv_lanetype_@sfx@ a = *ip1; + const npyv_lanetype_@sfx@ b = *ip2; + *op1 = SCALAR_OP(a, b); + } +} +#endif + +#undef V_INTRIN +#undef V_REDUCE_INTRIN + +#endif // simd_chk && (!fp_only || (is_fp && fp_only)) + +#undef SCALAR_OP +/**end repeat1**/ +/**end repeat**/ + +/******************************************************************************* + ** Defining ufunc inner functions + ******************************************************************************/ +/**begin repeat + * #TYPE = UBYTE, USHORT, UINT, ULONG, ULONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * FLOAT, DOUBLE, LONGDOUBLE# + * + * #BTYPE = BYTE, SHORT, INT, LONG, LONGLONG, + * BYTE, SHORT, INT, LONG, LONGLONG, + * FLOAT, DOUBLE, LONGDOUBLE# + * #type = npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong, + * npy_byte, npy_short, npy_int, npy_long, npy_longlong, + * npy_float, npy_double, npy_longdouble# + * + * #is_fp = 0*10, 1*3# + * #is_unsigned = 1*5, 0*5, 0*3# + * #scalar_sfx = i*10, f, d, l# + */ +#undef TO_SIMD_SFX +#if 0 +/**begin repeat1 + * #len = 8, 16, 32, 64# + */ +#elif NPY_SIMD && NPY_BITSOF_@BTYPE@ == @len@ + #if @is_fp@ + #define TO_SIMD_SFX(X) X##_f@len@ + #if NPY_BITSOF_@BTYPE@ == 64 && !NPY_SIMD_F64 + #undef TO_SIMD_SFX + #endif + #elif @is_unsigned@ + #define TO_SIMD_SFX(X) X##_u@len@ + #else + #define TO_SIMD_SFX(X) X##_s@len@ + #endif +/**end repeat1**/ +#endif + +/**begin repeat1 + * # kind = maximum, minimum, fmax, fmin# + * # intrin = max, min, maxp, minp# + * # fp_only = 0, 0, 1, 1# + */ +#if !@fp_only@ || (@is_fp@ && @fp_only@) +#define SCALAR_OP scalar_@intrin@_@scalar_sfx@ + +NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@) +(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) +{ + char *ip1 = args[0], *ip2 = args[1], *op1 = args[2]; + npy_intp is1 = steps[0], is2 = steps[1], os1 = steps[2], + len = dimensions[0]; + npy_intp i = 0; +#ifdef TO_SIMD_SFX + #undef STYPE + #define STYPE TO_SIMD_SFX(npyv_lanetype) + if (IS_BINARY_REDUCE) { + // reduce and contiguous + if (is2 == sizeof(@type@)) { + TO_SIMD_SFX(simd_reduce_c_@intrin@)( + (STYPE*)ip2, (STYPE*)op1, len + ); + goto clear_fp; + } + } + else if (!is_mem_overlap(ip1, is1, op1, os1, len) && + !is_mem_overlap(ip2, is2, op1, os1, len) + ) { + // no overlap and operands are binary contiguous + if (IS_BINARY_CONT(@type@, @type@)) { + TO_SIMD_SFX(simd_binary_ccc_@intrin@)( + (STYPE*)ip1, (STYPE*)ip2, (STYPE*)op1, len + ); + goto clear_fp; + } + // unroll scalars faster than non-contiguous vector load/store on Arm + #if !defined(NPY_HAVE_NEON) && @is_fp@ + if (TO_SIMD_SFX(npyv_loadable_stride)(is1/sizeof(STYPE)) && + TO_SIMD_SFX(npyv_loadable_stride)(is2/sizeof(STYPE)) && + TO_SIMD_SFX(npyv_storable_stride)(os1/sizeof(STYPE)) + ) { + TO_SIMD_SFX(simd_binary_@intrin@)( + (STYPE*)ip1, is1/sizeof(STYPE), + (STYPE*)ip2, is2/sizeof(STYPE), + (STYPE*)op1, os1/sizeof(STYPE), len + ); + goto clear_fp; + } + #endif + } +#endif // TO_SIMD_SFX +#ifndef NPY_DISABLE_OPTIMIZATION + // scalar unrolls + if (IS_BINARY_REDUCE) { + // Note, 8x unroll was chosen for best results on Apple M1 + npy_intp elemPerLoop = 8; + if((i+elemPerLoop) <= len){ + @type@ m0 = *((@type@ *)(ip2 + (i + 0) * is2)); + @type@ m1 = *((@type@ *)(ip2 + (i + 1) * is2)); + @type@ m2 = *((@type@ *)(ip2 + (i + 2) * is2)); + @type@ m3 = *((@type@ *)(ip2 + (i + 3) * is2)); + @type@ m4 = *((@type@ *)(ip2 + (i + 4) * is2)); + @type@ m5 = *((@type@ *)(ip2 + (i + 5) * is2)); + @type@ m6 = *((@type@ *)(ip2 + (i + 6) * is2)); + @type@ m7 = *((@type@ *)(ip2 + (i + 7) * is2)); + + i += elemPerLoop; + for(; (i+elemPerLoop)<=len; i+=elemPerLoop){ + @type@ v0 = *((@type@ *)(ip2 + (i + 0) * is2)); + @type@ v1 = *((@type@ *)(ip2 + (i + 1) * is2)); + @type@ v2 = *((@type@ *)(ip2 + (i + 2) * is2)); + @type@ v3 = *((@type@ *)(ip2 + (i + 3) * is2)); + @type@ v4 = *((@type@ *)(ip2 + (i + 4) * is2)); + @type@ v5 = *((@type@ *)(ip2 + (i + 5) * is2)); + @type@ v6 = *((@type@ *)(ip2 + (i + 6) * is2)); + @type@ v7 = *((@type@ *)(ip2 + (i + 7) * is2)); + + m0 = SCALAR_OP(m0, v0); + m1 = SCALAR_OP(m1, v1); + m2 = SCALAR_OP(m2, v2); + m3 = SCALAR_OP(m3, v3); + m4 = SCALAR_OP(m4, v4); + m5 = SCALAR_OP(m5, v5); + m6 = SCALAR_OP(m6, v6); + m7 = SCALAR_OP(m7, v7); + } + + m0 = SCALAR_OP(m0, m1); + m2 = SCALAR_OP(m2, m3); + m4 = SCALAR_OP(m4, m5); + m6 = SCALAR_OP(m6, m7); + + m0 = SCALAR_OP(m0, m2); + m4 = SCALAR_OP(m4, m6); + + m0 = SCALAR_OP(m0, m4); + + *((@type@ *)op1) = SCALAR_OP(*((@type@ *)op1), m0); + } + } else{ + // Note, 4x unroll was chosen for best results on Apple M1 + npy_intp elemPerLoop = 4; + for(; (i+elemPerLoop)<=len; i+=elemPerLoop){ + /* Note, we can't just load all, do all ops, then store all here. + * Sometimes ufuncs are called with `accumulate`, which makes the + * assumption that previous iterations have finished before next + * iteration. For example, the output of iteration 2 depends on the + * result of iteration 1. + */ + + /**begin repeat2 + * #unroll = 0, 1, 2, 3# + */ + @type@ v@unroll@ = *((@type@ *)(ip1 + (i + @unroll@) * is1)); + @type@ u@unroll@ = *((@type@ *)(ip2 + (i + @unroll@) * is2)); + *((@type@ *)(op1 + (i + @unroll@) * os1)) = SCALAR_OP(v@unroll@, u@unroll@); + /**end repeat2**/ + } + } +#endif // NPY_DISABLE_OPTIMIZATION + ip1 += is1 * i; + ip2 += is2 * i; + op1 += os1 * i; + for (; i < len; ++i, ip1 += is1, ip2 += is2, op1 += os1) { + const @type@ in1 = *(@type@ *)ip1; + const @type@ in2 = *(@type@ *)ip2; + *((@type@ *)op1) = SCALAR_OP(in1, in2); + } +#ifdef TO_SIMD_SFX +clear_fp: + npyv_cleanup(); +#endif +#if @is_fp@ + npy_clear_floatstatus_barrier((char*)dimensions); +#endif +} + +#undef SCALAR_OP + +#endif // !fp_only || (is_fp && fp_only) +/**end repeat1**/ +/**end repeat**/ + diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 0e2c1ab8b..8b833ee56 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -95,38 +95,6 @@ run_unary_avx512f_@func@_@TYPE@(char **args, const npy_intp *dimensions, const n */ /**begin repeat1 - * #func = maximum, minimum# - */ - -#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS && @EXISTS@ -static NPY_INLINE NPY_GCC_TARGET_AVX512F void -AVX512F_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp const *steps); -#endif - -static NPY_INLINE int -run_binary_avx512f_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp const *steps) -{ -#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS && @EXISTS@ - if (IS_BINARY_SMALL_STEPS_AND_NOMEMOVERLAP) { - AVX512F_@func@_@TYPE@(args, dimensions, steps); - return 1; - } - else - return 0; -#endif - return 0; -} -/**end repeat1**/ - -/**end repeat**/ - -/**begin repeat - * #type = npy_float, npy_double, npy_longdouble# - * #TYPE = FLOAT, DOUBLE, LONGDOUBLE# - * #EXISTS = 1, 1, 0# - */ - -/**begin repeat1 * #func = isnan, isfinite, isinf, signbit# */ @@ -204,9 +172,9 @@ run_unary_@isa@_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp */ /**begin repeat1 - * #func = negative, minimum, maximum# - * #check = IS_BLOCKABLE_UNARY, IS_BLOCKABLE_REDUCE*2 # - * #name = unary, unary_reduce*2# + * #func = negative# + * #check = IS_BLOCKABLE_UNARY# + * #name = unary# */ #if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS @@ -678,55 +646,6 @@ sse2_negative_@TYPE@(@type@ * op, @type@ * ip, const npy_intp n) } /**end repeat1**/ - -/**begin repeat1 - * #kind = maximum, minimum# - * #VOP = max, min# - * #OP = >=, <=# - **/ -/* arguments swapped as unary reduce has the swapped compared to unary */ -static void -sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) -{ - const npy_intp stride = VECTOR_SIZE_BYTES / (npy_intp)sizeof(@type@); - LOOP_BLOCK_ALIGN_VAR(ip, @type@, VECTOR_SIZE_BYTES) { - /* Order of operations important for MSVC 2015 */ - *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i]; - } - assert(n < stride || npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES)); - if (i + 3 * stride <= n) { - /* load the first elements */ - @vtype@ c1 = @vpre@_load_@vsuf@((@type@*)&ip[i]); - @vtype@ c2 = @vpre@_load_@vsuf@((@type@*)&ip[i + stride]); - i += 2 * stride; - - /* minps/minpd will set invalid flag if nan is encountered */ - npy_clear_floatstatus_barrier((char*)&c1); - LOOP_BLOCKED(@type@, 2 * VECTOR_SIZE_BYTES) { - @vtype@ v1 = @vpre@_load_@vsuf@((@type@*)&ip[i]); - @vtype@ v2 = @vpre@_load_@vsuf@((@type@*)&ip[i + stride]); - c1 = @vpre@_@VOP@_@vsuf@(c1, v1); - c2 = @vpre@_@VOP@_@vsuf@(c2, v2); - } - c1 = @vpre@_@VOP@_@vsuf@(c1, c2); - - if (npy_get_floatstatus_barrier((char*)&c1) & NPY_FPE_INVALID) { - *op = @nan@; - } - else { - @type@ tmp = sse2_horizontal_@VOP@_@vtype@(c1); - /* Order of operations important for MSVC 2015 */ - *op = (*op @OP@ tmp || npy_isnan(*op)) ? *op : tmp; - } - } - LOOP_BLOCKED_END { - /* Order of operations important for MSVC 2015 */ - *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i]; - } - npy_clear_floatstatus_barrier((char*)op); -} -/**end repeat1**/ - /**end repeat**/ /* bunch of helper functions used in ISA_exp/log_FLOAT*/ @@ -1200,107 +1119,6 @@ AVX512_SKX_@func@_@TYPE@(npy_bool* op, @type@* ip, const npy_intp array_size, co /**end repeat**/ /**begin repeat - * #type = npy_float, npy_double# - * #TYPE = FLOAT, DOUBLE# - * #num_lanes = 16, 8# - * #vsuffix = ps, pd# - * #mask = __mmask16, __mmask8# - * #vtype1 = __m512, __m512d# - * #vtype2 = __m512i, __m256i# - * #scale = 4, 8# - * #vindextype = __m512i, __m256i# - * #vindexsize = 512, 256# - * #vindexload = _mm512_loadu_si512, _mm256_loadu_si256# - * #vtype2_load = _mm512_maskz_loadu_epi32, _mm256_maskz_loadu_epi32# - * #vtype2_gather = _mm512_mask_i32gather_epi32, _mm256_mmask_i32gather_epi32# - * #vtype2_store = _mm512_mask_storeu_epi32, _mm256_mask_storeu_epi32# - * #vtype2_scatter = _mm512_mask_i32scatter_epi32, _mm256_mask_i32scatter_epi32# - * #setzero = _mm512_setzero_epi32, _mm256_setzero_si256# - */ -/**begin repeat1 - * #func = maximum, minimum# - * #vectorf = max, min# - */ - -#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS -static NPY_INLINE NPY_GCC_TARGET_AVX512F void -AVX512F_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp const *steps) -{ - const npy_intp stride_ip1 = steps[0]/(npy_intp)sizeof(@type@); - const npy_intp stride_ip2 = steps[1]/(npy_intp)sizeof(@type@); - const npy_intp stride_op = steps[2]/(npy_intp)sizeof(@type@); - const npy_intp array_size = dimensions[0]; - npy_intp num_remaining_elements = array_size; - @type@* ip1 = (@type@*) args[0]; - @type@* ip2 = (@type@*) args[1]; - @type@* op = (@type@*) args[2]; - - @mask@ load_mask = avx512_get_full_load_mask_@vsuffix@(); - - /* - * Note: while generally indices are npy_intp, we ensure that our maximum index - * will fit in an int32 as a precondition for this function via - * IS_BINARY_SMALL_STEPS_AND_NOMEMOVERLAP - */ - - npy_int32 index_ip1[@num_lanes@], index_ip2[@num_lanes@], index_op[@num_lanes@]; - for (npy_int32 ii = 0; ii < @num_lanes@; ii++) { - index_ip1[ii] = ii*stride_ip1; - index_ip2[ii] = ii*stride_ip2; - index_op[ii] = ii*stride_op; - } - @vindextype@ vindex_ip1 = @vindexload@((@vindextype@*)&index_ip1[0]); - @vindextype@ vindex_ip2 = @vindexload@((@vindextype@*)&index_ip2[0]); - @vindextype@ vindex_op = @vindexload@((@vindextype@*)&index_op[0]); - @vtype1@ zeros_f = _mm512_setzero_@vsuffix@(); - - while (num_remaining_elements > 0) { - if (num_remaining_elements < @num_lanes@) { - load_mask = avx512_get_partial_load_mask_@vsuffix@( - num_remaining_elements, @num_lanes@); - } - @vtype1@ x1, x2; - if (stride_ip1 == 1) { - x1 = avx512_masked_load_@vsuffix@(load_mask, ip1); - } - else { - x1 = avx512_masked_gather_@vsuffix@(zeros_f, ip1, vindex_ip1, load_mask); - } - if (stride_ip2 == 1) { - x2 = avx512_masked_load_@vsuffix@(load_mask, ip2); - } - else { - x2 = avx512_masked_gather_@vsuffix@(zeros_f, ip2, vindex_ip2, load_mask); - } - - /* - * when only one of the argument is a nan, the maxps/maxpd instruction - * returns the second argument. The additional blend instruction fixes - * this issue to conform with NumPy behaviour. - */ - @mask@ nan_mask = _mm512_cmp_@vsuffix@_mask(x1, x1, _CMP_NEQ_UQ); - @vtype1@ out = _mm512_@vectorf@_@vsuffix@(x1, x2); - out = _mm512_mask_blend_@vsuffix@(nan_mask, out, x1); - - if (stride_op == 1) { - _mm512_mask_storeu_@vsuffix@(op, load_mask, out); - } - else { - /* scatter! */ - _mm512_mask_i32scatter_@vsuffix@(op, load_mask, vindex_op, out, @scale@); - } - - ip1 += @num_lanes@*stride_ip1; - ip2 += @num_lanes@*stride_ip2; - op += @num_lanes@*stride_op; - num_remaining_elements -= @num_lanes@; - } -} -#endif -/**end repeat1**/ -/**end repeat**/ - -/**begin repeat * #ISA = FMA, AVX512F# * #isa = fma, avx512# * #vsize = 256, 512# diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 27a2cfcd5..7b0db9e04 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -2720,6 +2720,21 @@ reducelike_promote_and_resolve(PyUFuncObject *ufunc, * is NULL) so we pass `arr` instead in that case. */ PyArrayObject *ops[3] = {out ? out : arr, arr, out}; + + /* + * TODO: This is a dangerous hack, that works by relying on the GIL, it is + * terrible, terrifying, and trusts that nobody does crazy stuff + * in their type-resolvers. + * By mutating the `out` dimension, we ensure that reduce-likes + * live in a future without value-based promotion even when legacy + * promotion has to be used. + */ + npy_bool evil_ndim_mutating_hack = NPY_FALSE; + if (out != NULL && PyArray_NDIM(out) == 0 && PyArray_NDIM(arr) != 0) { + evil_ndim_mutating_hack = NPY_TRUE; + ((PyArrayObject_fields *)out)->nd = 1; + } + /* * TODO: If `out` is not provided, arguably `initial` could define * the first DType (and maybe also the out one), that way @@ -2740,6 +2755,9 @@ reducelike_promote_and_resolve(PyUFuncObject *ufunc, PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc, ops, signature, operation_DTypes, NPY_FALSE, NPY_TRUE, NPY_TRUE); + if (evil_ndim_mutating_hack) { + ((PyArrayObject_fields *)out)->nd = 0; + } /* DTypes may currently get filled in fallbacks and XDECREF for error: */ Py_XDECREF(operation_DTypes[0]); Py_XDECREF(operation_DTypes[1]); diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 76486f755..d2a69d4cf 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -1239,3 +1239,14 @@ class TestMemEventHook(_DeprecationTestCase): with pytest.warns(DeprecationWarning, match='PyDataMem_SetEventHook is deprecated'): ma_tests.test_pydatamem_seteventhook_end() + + +class TestArrayFinalizeNone(_DeprecationTestCase): + message = "Setting __array_finalize__ = None" + + def test_use_none_is_deprecated(self): + # Deprecated way that ndarray itself showed nothing needs finalizing. + class NoFinalize(np.ndarray): + __array_finalize__ = None + + self.assert_deprecated(lambda: np.array(1).view(NoFinalize)) diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 2529705d5..708e82910 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -8954,12 +8954,28 @@ class TestArrayFinalize: a = np.array(1).view(SavesBase) assert_(a.saved_base is a.base) - def test_bad_finalize(self): + def test_bad_finalize1(self): class BadAttributeArray(np.ndarray): @property def __array_finalize__(self): raise RuntimeError("boohoo!") + with pytest.raises(TypeError, match="not callable"): + np.arange(10).view(BadAttributeArray) + + def test_bad_finalize2(self): + class BadAttributeArray(np.ndarray): + def __array_finalize__(self): + raise RuntimeError("boohoo!") + + with pytest.raises(TypeError, match="takes 1 positional"): + np.arange(10).view(BadAttributeArray) + + def test_bad_finalize3(self): + class BadAttributeArray(np.ndarray): + def __array_finalize__(self, obj): + raise RuntimeError("boohoo!") + with pytest.raises(RuntimeError, match="boohoo!"): np.arange(10).view(BadAttributeArray) @@ -8997,6 +9013,14 @@ class TestArrayFinalize: break_cycles() assert_(obj_ref() is None, "no references should remain") + def test_can_use_super(self): + class SuperFinalize(np.ndarray): + def __array_finalize__(self, obj): + self.saved_result = super().__array_finalize__(obj) + + a = np.array(1).view(SuperFinalize) + assert_(a.saved_result is None) + def test_orderconverter_with_nonASCII_unicode_ordering(): # gh-7475 @@ -9201,7 +9225,7 @@ class TestViewDtype: # x is non-contiguous x = np.arange(10, dtype='<i4')[::2] with pytest.raises(ValueError, - match='the last axis must be contiguous'): + match='the last axis must be contiguous'): x.view('<i2') expected = [[0, 0], [2, 0], [4, 0], [6, 0], [8, 0]] assert_array_equal(x[:, np.newaxis].view('<i2'), expected) |