summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/__init__.py14
-rw-r--r--numpy/__init__.pyi1
-rw-r--r--numpy/_pytesttester.py4
-rw-r--r--numpy/array_api/linalg.py39
-rw-r--r--numpy/core/_add_newdocs.py18
-rw-r--r--numpy/core/_asarray.py24
-rw-r--r--numpy/core/code_generators/generate_numpy_api.py13
-rw-r--r--numpy/core/code_generators/ufunc_docstrings.py18
-rw-r--r--numpy/core/include/numpy/experimental_dtype_api.h2
-rw-r--r--numpy/core/include/numpy/ndarraytypes.h5
-rw-r--r--numpy/core/overrides.py23
-rw-r--r--numpy/core/setup.py8
-rw-r--r--numpy/core/src/_simd/_simd.dispatch.c.src32
-rw-r--r--numpy/core/src/_simd/_simd_convert.inc6
-rw-r--r--numpy/core/src/_simd/_simd_inc.h.src11
-rw-r--r--numpy/core/src/common/lowlevel_strided_loops.h16
-rw-r--r--numpy/core/src/common/npy_cpu_dispatch.h4
-rw-r--r--numpy/core/src/common/numpyos.h8
-rw-r--r--numpy/core/src/common/simd/avx2/avx2.h2
-rw-r--r--numpy/core/src/common/simd/avx512/avx512.h2
-rw-r--r--numpy/core/src/common/simd/emulate_maskop.h4
-rw-r--r--numpy/core/src/common/simd/intdiv.h20
-rw-r--r--numpy/core/src/common/simd/neon/math.h2
-rw-r--r--numpy/core/src/common/simd/neon/neon.h3
-rw-r--r--numpy/core/src/common/simd/simd.h18
-rw-r--r--numpy/core/src/common/simd/sse/sse.h3
-rw-r--r--numpy/core/src/common/simd/vec/arithmetic.h (renamed from numpy/core/src/common/simd/vsx/arithmetic.h)188
-rw-r--r--numpy/core/src/common/simd/vec/conversion.h228
-rw-r--r--numpy/core/src/common/simd/vec/math.h (renamed from numpy/core/src/common/simd/vsx/math.h)85
-rw-r--r--numpy/core/src/common/simd/vec/memory.h (renamed from numpy/core/src/common/simd/vsx/memory.h)130
-rw-r--r--numpy/core/src/common/simd/vec/misc.h (renamed from numpy/core/src/common/simd/vsx/misc.h)113
-rw-r--r--numpy/core/src/common/simd/vec/operators.h (renamed from numpy/core/src/common/simd/vsx/operators.h)105
-rw-r--r--numpy/core/src/common/simd/vec/reorder.h (renamed from numpy/core/src/common/simd/vsx/reorder.h)44
-rw-r--r--numpy/core/src/common/simd/vec/utils.h84
-rw-r--r--numpy/core/src/common/simd/vec/vec.h (renamed from numpy/core/src/common/simd/vsx/vsx.h)33
-rw-r--r--numpy/core/src/common/simd/vsx/conversion.h146
-rw-r--r--numpy/core/src/common/umathmodule.h6
-rw-r--r--numpy/core/src/multiarray/argfunc.dispatch.c.src6
-rw-r--r--numpy/core/src/multiarray/array_assign_array.c49
-rw-r--r--numpy/core/src/multiarray/array_assign_scalar.c49
-rw-r--r--numpy/core/src/multiarray/array_coercion.c129
-rw-r--r--numpy/core/src/multiarray/array_method.h29
-rw-r--r--numpy/core/src/multiarray/arrayobject.c412
-rw-r--r--numpy/core/src/multiarray/arraytypes.c.src80
-rw-r--r--numpy/core/src/multiarray/common_dtype.h8
-rw-r--r--numpy/core/src/multiarray/convert.c166
-rw-r--r--numpy/core/src/multiarray/convert_datatype.c32
-rw-r--r--numpy/core/src/multiarray/convert_datatype.h12
-rw-r--r--numpy/core/src/multiarray/ctors.c92
-rw-r--r--numpy/core/src/multiarray/dtype_transfer.c115
-rw-r--r--numpy/core/src/multiarray/dtypemeta.c1
-rw-r--r--numpy/core/src/multiarray/dtypemeta.h7
-rw-r--r--numpy/core/src/multiarray/einsum_sumprod.c.src2
-rw-r--r--numpy/core/src/multiarray/experimental_public_dtype_api.c32
-rw-r--r--numpy/core/src/multiarray/iterators.c3
-rw-r--r--numpy/core/src/multiarray/lowlevel_strided_loops.c.src63
-rw-r--r--numpy/core/src/multiarray/mapping.c174
-rw-r--r--numpy/core/src/multiarray/mapping.h2
-rw-r--r--numpy/core/src/multiarray/multiarraymodule.c14
-rw-r--r--numpy/core/src/multiarray/nditer_api.c22
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c24
-rw-r--r--numpy/core/src/multiarray/nditer_impl.h41
-rw-r--r--numpy/core/src/multiarray/textreading/readtext.c4
-rw-r--r--numpy/core/src/multiarray/textreading/rows.c24
-rw-r--r--numpy/core/src/npymath/ieee754.c.src237
-rw-r--r--numpy/core/src/npymath/ieee754.cpp24
-rw-r--r--numpy/core/src/umath/dispatching.c32
-rw-r--r--numpy/core/src/umath/dispatching.h9
-rw-r--r--numpy/core/src/umath/extobj.c27
-rw-r--r--numpy/core/src/umath/loops_arithm_fp.dispatch.c.src5
-rw-r--r--numpy/core/src/umath/loops_arithmetic.dispatch.c.src50
-rw-r--r--numpy/core/src/umath/loops_comparison.dispatch.c.src5
-rw-r--r--numpy/core/src/umath/loops_hyperbolic.dispatch.c.src6
-rw-r--r--numpy/core/src/umath/loops_minmax.dispatch.c.src8
-rw-r--r--numpy/core/src/umath/loops_trigonometric.dispatch.c.src5
-rw-r--r--numpy/core/src/umath/loops_unary_fp.dispatch.c.src9
-rw-r--r--numpy/core/src/umath/scalarmath.c.src37
-rw-r--r--numpy/core/src/umath/string_ufuncs.cpp449
-rw-r--r--numpy/core/src/umath/string_ufuncs.h19
-rw-r--r--numpy/core/src/umath/ufunc_object.c23
-rw-r--r--numpy/core/src/umath/umathmodule.c7
-rw-r--r--numpy/core/tests/test_abc.py24
-rw-r--r--numpy/core/tests/test_array_coercion.py29
-rw-r--r--numpy/core/tests/test_casting_floatingpoint_errors.py153
-rw-r--r--numpy/core/tests/test_deprecations.py2
-rw-r--r--numpy/core/tests/test_dtype.py10
-rw-r--r--numpy/core/tests/test_half.py10
-rw-r--r--numpy/core/tests/test_indexing.py5
-rw-r--r--numpy/core/tests/test_multiarray.py14
-rw-r--r--numpy/core/tests/test_numeric.py4
-rw-r--r--numpy/core/tests/test_overrides.py39
-rw-r--r--numpy/core/tests/test_regression.py20
-rw-r--r--numpy/core/tests/test_scalarmath.py19
-rw-r--r--numpy/core/tests/test_simd.py27
-rw-r--r--numpy/core/tests/test_simd_module.py4
-rw-r--r--numpy/core/tests/test_strings.py85
-rw-r--r--numpy/core/tests/test_ufunc.py47
-rw-r--r--numpy/core/tests/test_umath.py30
-rw-r--r--numpy/core/tests/test_unicode.py9
-rw-r--r--numpy/distutils/ccompiler_opt.py67
-rw-r--r--numpy/distutils/checks/cpu_asimd.c8
-rw-r--r--numpy/distutils/checks/cpu_asimddp.c5
-rw-r--r--numpy/distutils/checks/cpu_asimdfhm.c12
-rw-r--r--numpy/distutils/checks/cpu_asimdhp.c7
-rw-r--r--numpy/distutils/checks/cpu_neon.c10
-rw-r--r--numpy/distutils/checks/cpu_neon_fp16.c6
-rw-r--r--numpy/distutils/checks/cpu_neon_vfpv4.c16
-rw-r--r--numpy/distutils/misc_util.py2
-rw-r--r--numpy/f2py/capi_maps.py2
-rwxr-xr-xnumpy/f2py/rules.py3
-rw-r--r--numpy/f2py/src/fortranobject.h4
-rw-r--r--numpy/f2py/tests/src/f2cmap/.f2py_f2cmap1
-rw-r--r--numpy/f2py/tests/src/f2cmap/isoFortranEnvMap.f909
-rw-r--r--numpy/f2py/tests/test_f2cmap.py15
-rw-r--r--numpy/lib/tests/test_loadtxt.py7
-rw-r--r--numpy/linalg/lapack_lite/f2c.c13
-rw-r--r--numpy/linalg/lapack_lite/f2c.h10
-rw-r--r--numpy/linalg/setup.py10
-rw-r--r--numpy/ma/core.py11
-rw-r--r--numpy/ma/tests/test_core.py17
-rw-r--r--numpy/polynomial/__init__.py6
-rw-r--r--numpy/polynomial/_polybase.py14
-rw-r--r--numpy/polynomial/polynomial.py10
-rw-r--r--numpy/polynomial/polyutils.py41
-rw-r--r--numpy/polynomial/polyutils.pyi1
-rw-r--r--numpy/polynomial/tests/test_printing.py147
-rw-r--r--numpy/random/_generator.pyx5
-rw-r--r--numpy/random/tests/test_generator_mt19937.py6
-rw-r--r--numpy/typing/tests/data/pass/arithmetic.py6
129 files changed, 3270 insertions, 1738 deletions
diff --git a/numpy/__init__.py b/numpy/__init__.py
index aae5c95ac..83487dc97 100644
--- a/numpy/__init__.py
+++ b/numpy/__init__.py
@@ -274,6 +274,7 @@ else:
def __getattr__(attr):
# Warn for expired attributes, and return a dummy function
# that always raises an exception.
+ import warnings
try:
msg = __expired_functions__[attr]
except KeyError:
@@ -312,7 +313,11 @@ else:
"{!r}".format(__name__, attr))
def __dir__():
- return list(globals().keys() | {'Tester', 'testing'})
+ public_symbols = globals().keys() | {'Tester', 'testing'}
+ public_symbols -= {
+ "core", "matrixlib",
+ }
+ return list(public_symbols)
# Pytest testing
from numpy._pytesttester import PytestTester
@@ -358,7 +363,6 @@ else:
except ValueError:
pass
- import sys
if sys.platform == "darwin":
with warnings.catch_warnings(record=True) as w:
_mac_os_check()
@@ -414,6 +418,12 @@ else:
from pathlib import Path
return [str(Path(__file__).with_name("_pyinstaller").resolve())]
+ # Remove symbols imported for internal use
+ del os
+
# get the version using versioneer
from .version import __version__, git_revision as __git_version__
+
+# Remove symbols imported for internal use
+del sys, warnings
diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi
index 2eb4a0634..d6faa9ca3 100644
--- a/numpy/__init__.pyi
+++ b/numpy/__init__.pyi
@@ -203,7 +203,6 @@ from numpy import (
lib as lib,
linalg as linalg,
ma as ma,
- matrixlib as matrixlib,
polynomial as polynomial,
random as random,
testing as testing,
diff --git a/numpy/_pytesttester.py b/numpy/_pytesttester.py
index 8decb9dd7..01ddaaf98 100644
--- a/numpy/_pytesttester.py
+++ b/numpy/_pytesttester.py
@@ -33,7 +33,6 @@ import os
__all__ = ['PytestTester']
-
def _show_numpy_info():
import numpy as np
@@ -44,7 +43,6 @@ def _show_numpy_info():
print("NumPy CPU features: ", (info if info else 'nothing enabled'))
-
class PytestTester:
"""
Pytest test runner.
@@ -167,7 +165,7 @@ class PytestTester:
]
if doctests:
- raise ValueError("Doctests not supported")
+ pytest_args += ["--doctest-modules"]
if extra_argv:
pytest_args += list(extra_argv)
diff --git a/numpy/array_api/linalg.py b/numpy/array_api/linalg.py
index f422e1c27..a4a2f23e4 100644
--- a/numpy/array_api/linalg.py
+++ b/numpy/array_api/linalg.py
@@ -1,8 +1,11 @@
from __future__ import annotations
from ._dtypes import _floating_dtypes, _numeric_dtypes
+from ._manipulation_functions import reshape
from ._array_object import Array
+from ..core.numeric import normalize_axis_tuple
+
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ._typing import Literal, Optional, Sequence, Tuple, Union
@@ -395,18 +398,38 @@ def vector_norm(x: Array, /, *, axis: Optional[Union[int, Tuple[int, ...]]] = No
if x.dtype not in _floating_dtypes:
raise TypeError('Only floating-point dtypes are allowed in norm')
+ # np.linalg.norm tries to do a matrix norm whenever axis is a 2-tuple or
+ # when axis=None and the input is 2-D, so to force a vector norm, we make
+ # it so the input is 1-D (for axis=None), or reshape so that norm is done
+ # on a single dimension.
a = x._array
if axis is None:
- a = a.flatten()
- axis = 0
+ # Note: np.linalg.norm() doesn't handle 0-D arrays
+ a = a.ravel()
+ _axis = 0
elif isinstance(axis, tuple):
- # Note: The axis argument supports any number of axes, whereas norm()
- # only supports a single axis for vector norm.
- rest = tuple(i for i in range(a.ndim) if i not in axis)
+ # Note: The axis argument supports any number of axes, whereas
+ # np.linalg.norm() only supports a single axis for vector norm.
+ normalized_axis = normalize_axis_tuple(axis, x.ndim)
+ rest = tuple(i for i in range(a.ndim) if i not in normalized_axis)
newshape = axis + rest
- a = np.transpose(a, newshape).reshape((np.prod([a.shape[i] for i in axis]), *[a.shape[i] for i in rest]))
- axis = 0
- return Array._new(np.linalg.norm(a, axis=axis, keepdims=keepdims, ord=ord))
+ a = np.transpose(a, newshape).reshape(
+ (np.prod([a.shape[i] for i in axis], dtype=int), *[a.shape[i] for i in rest]))
+ _axis = 0
+ else:
+ _axis = axis
+
+ res = Array._new(np.linalg.norm(a, axis=_axis, ord=ord))
+
+ if keepdims:
+ # We can't reuse np.linalg.norm(keepdims) because of the reshape hacks
+ # above to avoid matrix norm logic.
+ shape = list(x.shape)
+ _axis = normalize_axis_tuple(range(x.ndim) if axis is None else axis, x.ndim)
+ for i in _axis:
+ shape[i] = 1
+ res = reshape(res, tuple(shape))
+ return res
__all__ = ['cholesky', 'cross', 'det', 'diagonal', 'eigh', 'eigvalsh', 'inv', 'matmul', 'matrix_norm', 'matrix_power', 'matrix_rank', 'matrix_transpose', 'outer', 'pinv', 'qr', 'slogdet', 'solve', 'svd', 'svdvals', 'tensordot', 'trace', 'vecdot', 'vector_norm']
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index fb9c30d93..3e8df6d46 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -3437,6 +3437,24 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('fill',
>>> a
array([1., 1.])
+ Fill expects a scalar value and always behaves the same as assigning
+ to a single array element. The following is a rare example where this
+ distinction is important:
+
+ >>> a = np.array([None, None], dtype=object)
+ >>> a[0] = np.array(3)
+ >>> a
+ array([array(3), None], dtype=object)
+ >>> a.fill(np.array(3))
+ >>> a
+ array([array(3), array(3)], dtype=object)
+
+ Where other forms of assignments will unpack the array being assigned:
+
+ >>> a[...] = np.array(3)
+ >>> a
+ array([3, 3], dtype=object)
+
"""))
diff --git a/numpy/core/_asarray.py b/numpy/core/_asarray.py
index 89d422e99..cbaab8c3f 100644
--- a/numpy/core/_asarray.py
+++ b/numpy/core/_asarray.py
@@ -14,6 +14,15 @@ from .multiarray import array, asanyarray
__all__ = ["require"]
+POSSIBLE_FLAGS = {
+ 'C': 'C', 'C_CONTIGUOUS': 'C', 'CONTIGUOUS': 'C',
+ 'F': 'F', 'F_CONTIGUOUS': 'F', 'FORTRAN': 'F',
+ 'A': 'A', 'ALIGNED': 'A',
+ 'W': 'W', 'WRITEABLE': 'W',
+ 'O': 'O', 'OWNDATA': 'O',
+ 'E': 'E', 'ENSUREARRAY': 'E'
+}
+
def _require_dispatcher(a, dtype=None, requirements=None, *, like=None):
return (like,)
@@ -36,7 +45,7 @@ def require(a, dtype=None, requirements=None, *, like=None):
The required data-type. If None preserve the current dtype. If your
application requires the data to be in native byteorder, include
a byteorder specification as a part of the dtype specification.
- requirements : str or list of str
+ requirements : str or sequence of str
The requirements list can be any of the following
* 'F_CONTIGUOUS' ('F') - ensure a Fortran-contiguous array
@@ -97,16 +106,10 @@ def require(a, dtype=None, requirements=None, *, like=None):
like=like,
)
- possible_flags = {'C': 'C', 'C_CONTIGUOUS': 'C', 'CONTIGUOUS': 'C',
- 'F': 'F', 'F_CONTIGUOUS': 'F', 'FORTRAN': 'F',
- 'A': 'A', 'ALIGNED': 'A',
- 'W': 'W', 'WRITEABLE': 'W',
- 'O': 'O', 'OWNDATA': 'O',
- 'E': 'E', 'ENSUREARRAY': 'E'}
if not requirements:
return asanyarray(a, dtype=dtype)
- else:
- requirements = {possible_flags[x.upper()] for x in requirements}
+
+ requirements = {POSSIBLE_FLAGS[x.upper()] for x in requirements}
if 'E' in requirements:
requirements.remove('E')
@@ -128,8 +131,7 @@ def require(a, dtype=None, requirements=None, *, like=None):
for prop in requirements:
if not arr.flags[prop]:
- arr = arr.copy(order)
- break
+ return arr.copy(order)
return arr
diff --git a/numpy/core/code_generators/generate_numpy_api.py b/numpy/core/code_generators/generate_numpy_api.py
index 37975966f..a966be57d 100644
--- a/numpy/core/code_generators/generate_numpy_api.py
+++ b/numpy/core/code_generators/generate_numpy_api.py
@@ -89,19 +89,22 @@ _import_array(void)
*/
st = PyArray_GetEndianness();
if (st == NPY_CPU_UNKNOWN_ENDIAN) {
- PyErr_Format(PyExc_RuntimeError, "FATAL: module compiled as unknown endian");
+ PyErr_SetString(PyExc_RuntimeError,
+ "FATAL: module compiled as unknown endian");
return -1;
}
#if NPY_BYTE_ORDER == NPY_BIG_ENDIAN
if (st != NPY_CPU_BIG) {
- PyErr_Format(PyExc_RuntimeError, "FATAL: module compiled as "\
- "big endian, but detected different endianness at runtime");
+ PyErr_SetString(PyExc_RuntimeError,
+ "FATAL: module compiled as big endian, but "
+ "detected different endianness at runtime");
return -1;
}
#elif NPY_BYTE_ORDER == NPY_LITTLE_ENDIAN
if (st != NPY_CPU_LITTLE) {
- PyErr_Format(PyExc_RuntimeError, "FATAL: module compiled as "\
- "little endian, but detected different endianness at runtime");
+ PyErr_SetString(PyExc_RuntimeError,
+ "FATAL: module compiled as little endian, but "
+ "detected different endianness at runtime");
return -1;
}
#endif
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index 24b707a12..7c020fa2e 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -2011,7 +2011,7 @@ add_newdoc('numpy.core.umath', 'log',
-----
Logarithm is a multivalued function: for each `x` there is an infinite
number of `z` such that `exp(z) = x`. The convention is to return the
- `z` whose imaginary part lies in `[-pi, pi]`.
+ `z` whose imaginary part lies in `(-pi, pi]`.
For real-valued input data types, `log` always returns real output. For
each value that cannot be expressed as a real number or infinity, it
@@ -2021,6 +2021,10 @@ add_newdoc('numpy.core.umath', 'log',
has a branch cut `[-inf, 0]` and is continuous from above on it. `log`
handles the floating-point negative zero as an infinitesimal negative
number, conforming to the C99 standard.
+
+ In the cases where the input has a negative real part and a very small
+ negative complex part (approaching 0), the result is so close to `-pi`
+ that it evaluates to exactly `-pi`.
References
----------
@@ -2061,7 +2065,7 @@ add_newdoc('numpy.core.umath', 'log10',
-----
Logarithm is a multivalued function: for each `x` there is an infinite
number of `z` such that `10**z = x`. The convention is to return the
- `z` whose imaginary part lies in `[-pi, pi]`.
+ `z` whose imaginary part lies in `(-pi, pi]`.
For real-valued input data types, `log10` always returns real output.
For each value that cannot be expressed as a real number or infinity,
@@ -2072,6 +2076,10 @@ add_newdoc('numpy.core.umath', 'log10',
`log10` handles the floating-point negative zero as an infinitesimal
negative number, conforming to the C99 standard.
+ In the cases where the input has a negative real part and a very small
+ negative complex part (approaching 0), the result is so close to `-pi`
+ that it evaluates to exactly `-pi`.
+
References
----------
.. [1] M. Abramowitz and I.A. Stegun, "Handbook of Mathematical Functions",
@@ -2112,7 +2120,7 @@ add_newdoc('numpy.core.umath', 'log2',
Logarithm is a multivalued function: for each `x` there is an infinite
number of `z` such that `2**z = x`. The convention is to return the `z`
- whose imaginary part lies in `[-pi, pi]`.
+ whose imaginary part lies in `(-pi, pi]`.
For real-valued input data types, `log2` always returns real output.
For each value that cannot be expressed as a real number or infinity,
@@ -2123,6 +2131,10 @@ add_newdoc('numpy.core.umath', 'log2',
handles the floating-point negative zero as an infinitesimal negative
number, conforming to the C99 standard.
+ In the cases where the input has a negative real part and a very small
+ negative complex part (approaching 0), the result is so close to `-pi`
+ that it evaluates to exactly `-pi`.
+
Examples
--------
>>> x = np.array([0, 1, 2, 2**4])
diff --git a/numpy/core/include/numpy/experimental_dtype_api.h b/numpy/core/include/numpy/experimental_dtype_api.h
index 1dd6215e6..23e9a8d21 100644
--- a/numpy/core/include/numpy/experimental_dtype_api.h
+++ b/numpy/core/include/numpy/experimental_dtype_api.h
@@ -214,7 +214,7 @@ typedef struct {
} PyArrayMethod_Spec;
-typedef PyObject *_ufunc_addloop_fromspec_func(
+typedef int _ufunc_addloop_fromspec_func(
PyObject *ufunc, PyArrayMethod_Spec *spec);
/*
* The main ufunc registration function. This adds a new implementation/loop
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index c295f34bb..97e0f4e2a 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1380,7 +1380,10 @@ typedef struct {
int nd_fancy;
npy_intp fancy_dims[NPY_MAXDIMS];
- /* Whether the iterator (any of the iterators) requires API */
+ /*
+ * Whether the iterator (any of the iterators) requires API. This is
+ * unused by NumPy itself; ArrayMethod flags are more precise.
+ */
int needs_api;
/*
diff --git a/numpy/core/overrides.py b/numpy/core/overrides.py
index cb550152e..663436a4c 100644
--- a/numpy/core/overrides.py
+++ b/numpy/core/overrides.py
@@ -2,6 +2,7 @@
import collections
import functools
import os
+import sys
from numpy.core._multiarray_umath import (
add_docstring, implement_array_function, _get_implementing_args)
@@ -176,7 +177,27 @@ def array_function_dispatch(dispatcher, module=None, verify=True,
@functools.wraps(implementation)
def public_api(*args, **kwargs):
- relevant_args = dispatcher(*args, **kwargs)
+ try:
+ relevant_args = dispatcher(*args, **kwargs)
+ except TypeError as exc:
+ # Try to clean up a signature related TypeError. Such an
+ # error will be something like:
+ # dispatcher.__name__() got an unexpected keyword argument
+ #
+ # So replace the dispatcher name in this case. In principle
+ # TypeErrors may be raised from _within_ the dispatcher, so
+ # we check that the traceback contains a string that starts
+ # with the name. (In principle we could also check the
+ # traceback length, as it would be deeper.)
+ msg = exc.args[0]
+ disp_name = dispatcher.__name__
+ if not isinstance(msg, str) or not msg.startswith(disp_name):
+ raise
+
+ # Replace with the correct name and re-raise:
+ new_msg = msg.replace(disp_name, public_api.__name__)
+ raise TypeError(new_msg) from None
+
return implement_array_function(
implementation, public_api, relevant_args, args, kwargs)
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 7d072c15c..543b6ae39 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -1,9 +1,9 @@
import os
import sys
+import sysconfig
import pickle
import copy
import warnings
-import platform
import textwrap
import glob
from os.path import join
@@ -79,9 +79,8 @@ def can_link_svml():
"""
if NPY_DISABLE_SVML:
return False
- machine = platform.machine()
- system = platform.system()
- return "x86_64" in machine and system == "Linux"
+ platform = sysconfig.get_platform()
+ return "x86_64" in platform and "linux" in platform
def check_svml_submodule(svmlpath):
if not os.path.exists(svmlpath + "/README.md"):
@@ -1081,6 +1080,7 @@ def configuration(parent_package='',top_path=None):
join('src', 'umath', 'scalarmath.c.src'),
join('src', 'umath', 'ufunc_type_resolution.c'),
join('src', 'umath', 'override.c'),
+ join('src', 'umath', 'string_ufuncs.cpp'),
# For testing. Eventually, should use public API and be separate:
join('src', 'umath', '_scaled_float_dtype.c'),
]
diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src
index 0f3e4fc8f..997205957 100644
--- a/numpy/core/src/_simd/_simd.dispatch.c.src
+++ b/numpy/core/src/_simd/_simd.dispatch.c.src
@@ -18,7 +18,7 @@
* #esfx = u16,s8, u32, s16, u32, s32, u64, s64, f32, f64#
* #size = 8, 8, 16, 16, 32, 32, 64, 64, 32, 64#
* #expand_sup= 1, 0, 1, 0, 0, 0, 0, 0, 0, 0#
- * #simd_sup = 1, 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F64#
+ * #simd_sup = 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F32, NPY_SIMD_F64#
* #fp_only = 0, 0, 0, 0, 0, 0, 0, 0, 1, 1#
* #sat_sup = 1, 1, 1, 1, 0, 0, 0, 0, 0, 0#
* #mul_sup = 1, 1, 1, 1, 1, 1, 0, 0, 1, 1#
@@ -252,7 +252,7 @@ SIMD_IMPL_INTRIN_3(select_@sfx@, v@sfx@, v@bsfx@, v@sfx@, v@sfx@)
/**begin repeat1
* #sfx_to = u8, s8, u16, s16, u32, s32, u64, s64, f32, f64#
- * #simd_sup2 = 1, 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F64#
+ * #simd_sup2 = 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F32, NPY_SIMD_F64#
*/
#if @simd_sup2@
SIMD_IMPL_INTRIN_1(reinterpret_@sfx_to@_@sfx@, v@sfx_to@, v@sfx@)
@@ -442,7 +442,9 @@ SIMD_IMPL_INTRIN_0N(cleanup)
* Operators
***************************/
// check special cases
-SIMD_IMPL_INTRIN_1(notnan_f32, vb32, vf32)
+#if NPY_SIMD_F32
+ SIMD_IMPL_INTRIN_1(notnan_f32, vb32, vf32)
+#endif
#if NPY_SIMD_F64
SIMD_IMPL_INTRIN_1(notnan_f64, vb64, vf64)
#endif
@@ -450,7 +452,9 @@ SIMD_IMPL_INTRIN_1(notnan_f32, vb32, vf32)
* Conversions
***************************/
// round to nearest integer (assume even)
-SIMD_IMPL_INTRIN_1(round_s32_f32, vs32, vf32)
+#if NPY_SIMD_F32
+ SIMD_IMPL_INTRIN_1(round_s32_f32, vs32, vf32)
+#endif
#if NPY_SIMD_F64
SIMD_IMPL_INTRIN_2(round_s32_f64, vs32, vf64, vf64)
#endif
@@ -492,10 +496,10 @@ static PyMethodDef simd__intrinsics_methods[] = {
/**begin repeat
* #sfx = u8, s8, u16, s16, u32, s32, u64, s64, f32, f64#
* #bsfx = b8, b8, b16, b16, b32, b32, b64, b64, b32, b64#
- * #esfx = u16,s8, u32, s16, u32, s32, u64, s64, f32, f64#
* #size = 8, 8, 16, 16, 32, 32, 64, 64, 32, 64#
+ * #esfx = u16, s8, u32,s16, u32, s32, u64, s64, f32, f64#
* #expand_sup= 1, 0, 1, 0, 0, 0, 0, 0, 0, 0#
- * #simd_sup = 1, 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F64#
+ * #simd_sup = 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F32, NPY_SIMD_F64#
* #fp_only = 0, 0, 0, 0, 0, 0, 0, 0, 1, 1#
* #sat_sup = 1, 1, 1, 1, 0, 0, 0, 0, 0, 0#
* #mul_sup = 1, 1, 1, 1, 1, 1, 0, 0, 1, 1#
@@ -547,7 +551,7 @@ SIMD_INTRIN_DEF(lut16_@sfx@)
***************************/
/**begin repeat1
* #sfx_to = u8, s8, u16, s16, u32, s32, u64, s64, f32, f64#
- * #simd_sup2 = 1, 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F64#
+ * #simd_sup2 = 1, 1, 1, 1, 1, 1, 1, 1, NPY_SIMD_F32, NPY_SIMD_F64#
*/
#if @simd_sup2@
SIMD_INTRIN_DEF(reinterpret_@sfx_to@_@sfx@)
@@ -698,7 +702,9 @@ SIMD_INTRIN_DEF(cleanup)
* Operators
***************************/
// check special cases
-SIMD_INTRIN_DEF(notnan_f32)
+#if NPY_SIMD_F32
+ SIMD_INTRIN_DEF(notnan_f32)
+#endif
#if NPY_SIMD_F64
SIMD_INTRIN_DEF(notnan_f64)
#endif
@@ -706,7 +712,9 @@ SIMD_INTRIN_DEF(notnan_f32)
* Conversions
***************************/
// round to nearest integer (assume even)
-SIMD_INTRIN_DEF(round_s32_f32)
+#if NPY_SIMD_F32
+ SIMD_INTRIN_DEF(round_s32_f32)
+#endif
#if NPY_SIMD_F64
SIMD_INTRIN_DEF(round_s32_f64)
#endif
@@ -777,12 +785,18 @@ NPY_CPU_DISPATCH_CURFX(simd_create_module)(void)
if (PyModule_AddIntConstant(m, "simd_f64", NPY_SIMD_F64)) {
goto err;
}
+ if (PyModule_AddIntConstant(m, "simd_f32", NPY_SIMD_F32)) {
+ goto err;
+ }
if (PyModule_AddIntConstant(m, "simd_fma3", NPY_SIMD_FMA3)) {
goto err;
}
if (PyModule_AddIntConstant(m, "simd_width", NPY_SIMD_WIDTH)) {
goto err;
}
+ if (PyModule_AddIntConstant(m, "simd_bigendian", NPY_SIMD_BIGENDIAN)) {
+ goto err;
+ }
#if NPY_SIMD
if (PySIMDVectorType_Init(m)) {
goto err;
diff --git a/numpy/core/src/_simd/_simd_convert.inc b/numpy/core/src/_simd/_simd_convert.inc
index 46e044479..58eb90d69 100644
--- a/numpy/core/src/_simd/_simd_convert.inc
+++ b/numpy/core/src/_simd/_simd_convert.inc
@@ -20,6 +20,10 @@ simd_scalar_from_number(PyObject *obj, simd_data_type dtype)
}
} else {
data.u64 = PyLong_AsUnsignedLongLongMask(obj);
+ #if NPY_SIMD_BIGENDIAN
+ int leftb = (sizeof(npyv_lanetype_u64) - info->lane_size) * 8;
+ data.u64 <<= leftb;
+ #endif
}
return data;
}
@@ -36,7 +40,9 @@ simd_scalar_to_number(simd_data data, simd_data_type dtype)
return PyFloat_FromDouble(data.f64);
}
int leftb = (sizeof(npyv_lanetype_u64) - info->lane_size) * 8;
+#if !NPY_SIMD_BIGENDIAN
data.u64 <<= leftb;
+#endif
if (info->is_signed) {
return PyLong_FromLongLong(data.s64 >> leftb);
}
diff --git a/numpy/core/src/_simd/_simd_inc.h.src b/numpy/core/src/_simd/_simd_inc.h.src
index fbdf982c2..887545414 100644
--- a/numpy/core/src/_simd/_simd_inc.h.src
+++ b/numpy/core/src/_simd/_simd_inc.h.src
@@ -27,22 +27,27 @@ typedef union
/**end repeat**/
// vectors
/**begin repeat
- * #sfx = u8, u16, u32, u64, s8, s16, s32, s64, f32, b8, b16, b32, b64#
+ * #sfx = u8, u16, u32, u64, s8, s16, s32, s64, b8, b16, b32, b64#
*/
npyv_@sfx@ v@sfx@;
/**end repeat**/
// multi-vectors x2
/**begin repeat
- * #sfx = u8, u16, u32, u64, s8, s16, s32, s64, f32#
+ * #sfx = u8, u16, u32, u64, s8, s16, s32, s64#
*/
npyv_@sfx@x2 v@sfx@x2;
/**end repeat**/
// multi-vectors x3
/**begin repeat
- * #sfx = u8, u16, u32, u64, s8, s16, s32, s64, f32#
+ * #sfx = u8, u16, u32, u64, s8, s16, s32, s64#
*/
npyv_@sfx@x3 v@sfx@x3;
/**end repeat**/
+#if NPY_SIMD_F32
+ npyv_f32 vf32;
+ npyv_f32x2 vf32x2;
+ npyv_f32x3 vf32x3;
+#endif
#if NPY_SIMD_F64
npyv_f64 vf64;
npyv_f64x2 vf64x2;
diff --git a/numpy/core/src/common/lowlevel_strided_loops.h b/numpy/core/src/common/lowlevel_strided_loops.h
index 118ce9cb1..924a34db5 100644
--- a/numpy/core/src/common/lowlevel_strided_loops.h
+++ b/numpy/core/src/common/lowlevel_strided_loops.h
@@ -196,7 +196,7 @@ PyArray_GetDTypeTransferFunction(int aligned,
PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
int move_references,
NPY_cast_info *cast_info,
- int *out_needs_api);
+ NPY_ARRAYMETHOD_FLAGS *out_flags);
NPY_NO_EXPORT int
get_fields_transfer_function(int aligned,
@@ -205,7 +205,7 @@ get_fields_transfer_function(int aligned,
int move_references,
PyArrayMethod_StridedLoop **out_stransfer,
NpyAuxData **out_transferdata,
- int *out_needs_api);
+ NPY_ARRAYMETHOD_FLAGS *out_flags);
NPY_NO_EXPORT int
get_subarray_transfer_function(int aligned,
@@ -214,7 +214,7 @@ get_subarray_transfer_function(int aligned,
int move_references,
PyArrayMethod_StridedLoop **out_stransfer,
NpyAuxData **out_transferdata,
- int *out_needs_api);
+ NPY_ARRAYMETHOD_FLAGS *out_flags);
/*
* This is identical to PyArray_GetDTypeTransferFunction, but returns a
@@ -241,7 +241,7 @@ PyArray_GetMaskedDTypeTransferFunction(int aligned,
PyArray_Descr *mask_dtype,
int move_references,
NPY_cast_info *cast_info,
- int *out_needs_api);
+ NPY_ARRAYMETHOD_FLAGS *out_flags);
/*
* Casts the specified number of elements from 'src' with data type
@@ -336,10 +336,14 @@ mapiter_trivial_set(PyArrayObject *self, PyArrayObject *ind,
PyArrayObject *result);
NPY_NO_EXPORT int
-mapiter_get(PyArrayMapIterObject *mit);
+mapiter_get(
+ PyArrayMapIterObject *mit, NPY_cast_info *cast_info,
+ NPY_ARRAYMETHOD_FLAGS flags, int is_aligned);
NPY_NO_EXPORT int
-mapiter_set(PyArrayMapIterObject *mit);
+mapiter_set(
+ PyArrayMapIterObject *mit, NPY_cast_info *cast_info,
+ NPY_ARRAYMETHOD_FLAGS flags, int is_aligned);
/*
* Prepares shape and strides for a simple raw array iteration.
diff --git a/numpy/core/src/common/npy_cpu_dispatch.h b/numpy/core/src/common/npy_cpu_dispatch.h
index e814cd425..4d5addec8 100644
--- a/numpy/core/src/common/npy_cpu_dispatch.h
+++ b/numpy/core/src/common/npy_cpu_dispatch.h
@@ -22,7 +22,7 @@
* which is explicitly disabling the module ccompiler_opt.
*/
#ifndef NPY_DISABLE_OPTIMIZATION
- #if defined(__powerpc64__) && !defined(__cplusplus) && defined(bool)
+ #if (defined(__s390x__) || defined(__powerpc64__)) && !defined(__cplusplus) && defined(bool)
/**
* "altivec.h" header contains the definitions(bool, vector, pixel),
* usually in c++ we undefine them after including the header.
@@ -34,7 +34,7 @@
typedef bool npy__dispatch_bkbool;
#endif
#include "npy_cpu_dispatch_config.h"
- #ifdef NPY_HAVE_VSX
+ #if defined(NPY_HAVE_VSX) || defined(NPY_HAVE_VX)
#undef bool
#undef vector
#undef pixel
diff --git a/numpy/core/src/common/numpyos.h b/numpy/core/src/common/numpyos.h
index ce49cbea7..6e526af17 100644
--- a/numpy/core/src/common/numpyos.h
+++ b/numpy/core/src/common/numpyos.h
@@ -1,6 +1,10 @@
#ifndef NUMPY_CORE_SRC_COMMON_NPY_NUMPYOS_H_
#define NUMPY_CORE_SRC_COMMON_NPY_NUMPYOS_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
NPY_NO_EXPORT char*
NumPyOS_ascii_formatd(char *buffer, size_t buf_size,
const char *format,
@@ -39,4 +43,8 @@ NumPyOS_strtoll(const char *str, char **endptr, int base);
NPY_NO_EXPORT npy_ulonglong
NumPyOS_strtoull(const char *str, char **endptr, int base);
+#ifdef __cplusplus
+}
+#endif
+
#endif /* NUMPY_CORE_SRC_COMMON_NPY_NUMPYOS_H_ */
diff --git a/numpy/core/src/common/simd/avx2/avx2.h b/numpy/core/src/common/simd/avx2/avx2.h
index 02ff536fb..8cb74df2b 100644
--- a/numpy/core/src/common/simd/avx2/avx2.h
+++ b/numpy/core/src/common/simd/avx2/avx2.h
@@ -3,12 +3,14 @@
#endif
#define NPY_SIMD 256
#define NPY_SIMD_WIDTH 32
+#define NPY_SIMD_F32 1
#define NPY_SIMD_F64 1
#ifdef NPY_HAVE_FMA3
#define NPY_SIMD_FMA3 1 // native support
#else
#define NPY_SIMD_FMA3 0 // fast emulated
#endif
+#define NPY_SIMD_BIGENDIAN 0
// Enough limit to allow us to use _mm256_i32gather_*
#define NPY_SIMD_MAXLOAD_STRIDE32 (0x7fffffff / 8)
diff --git a/numpy/core/src/common/simd/avx512/avx512.h b/numpy/core/src/common/simd/avx512/avx512.h
index f38686834..0946e6443 100644
--- a/numpy/core/src/common/simd/avx512/avx512.h
+++ b/numpy/core/src/common/simd/avx512/avx512.h
@@ -3,8 +3,10 @@
#endif
#define NPY_SIMD 512
#define NPY_SIMD_WIDTH 64
+#define NPY_SIMD_F32 1
#define NPY_SIMD_F64 1
#define NPY_SIMD_FMA3 1 // native support
+#define NPY_SIMD_BIGENDIAN 0
// Enough limit to allow us to use _mm512_i32gather_* and _mm512_i32scatter_*
#define NPY_SIMD_MAXLOAD_STRIDE32 (0x7fffffff / 16)
#define NPY_SIMD_MAXSTORE_STRIDE32 (0x7fffffff / 16)
diff --git a/numpy/core/src/common/simd/emulate_maskop.h b/numpy/core/src/common/simd/emulate_maskop.h
index 41e397c2d..2a808a153 100644
--- a/numpy/core/src/common/simd/emulate_maskop.h
+++ b/numpy/core/src/common/simd/emulate_maskop.h
@@ -36,7 +36,9 @@ NPYV_IMPL_EMULATE_MASK_ADDSUB(u32, b32)
NPYV_IMPL_EMULATE_MASK_ADDSUB(s32, b32)
NPYV_IMPL_EMULATE_MASK_ADDSUB(u64, b64)
NPYV_IMPL_EMULATE_MASK_ADDSUB(s64, b64)
-NPYV_IMPL_EMULATE_MASK_ADDSUB(f32, b32)
+#if NPY_SIMD_F32
+ NPYV_IMPL_EMULATE_MASK_ADDSUB(f32, b32)
+#endif
#if NPY_SIMD_F64
NPYV_IMPL_EMULATE_MASK_ADDSUB(f64, b64)
#endif
diff --git a/numpy/core/src/common/simd/intdiv.h b/numpy/core/src/common/simd/intdiv.h
index 8b65b3a76..f5066b59b 100644
--- a/numpy/core/src/common/simd/intdiv.h
+++ b/numpy/core/src/common/simd/intdiv.h
@@ -89,7 +89,9 @@ NPY_FINLINE unsigned npyv__bitscan_revnz_u32(npy_uint32 a)
unsigned long rl;
(void)_BitScanReverse(&rl, (unsigned long)a);
r = (unsigned)rl;
-#elif defined(NPY_HAVE_SSE2) && (defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER))
+
+#elif defined(NPY_HAVE_SSE2) && (defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)) \
+ && (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64))
__asm__("bsr %1, %0" : "=r" (r) : "r"(a));
#elif defined(__GNUC__) || defined(__clang__)
r = 31 - __builtin_clz(a); // performs on arm -> clz, ppc -> cntlzw
@@ -206,7 +208,7 @@ NPY_FINLINE npyv_u8x3 npyv_divisor_u8(npy_uint8 d)
divisor.val[0] = npyv_setall_u16(m);
divisor.val[1] = npyv_set_u8(sh1);
divisor.val[2] = npyv_set_u8(sh2);
-#elif defined(NPY_HAVE_VSX2)
+#elif defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX)
divisor.val[0] = npyv_setall_u8(m);
divisor.val[1] = npyv_setall_u8(sh1);
divisor.val[2] = npyv_setall_u8(sh2);
@@ -247,7 +249,7 @@ NPY_FINLINE npyv_s8x3 npyv_divisor_s8(npy_int8 d)
npyv_s8x3 divisor;
divisor.val[0] = npyv_setall_s8(m);
divisor.val[2] = npyv_setall_s8(d < 0 ? -1 : 0);
- #ifdef NPY_HAVE_VSX2
+ #if defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX)
divisor.val[1] = npyv_setall_s8(sh);
#elif defined(NPY_HAVE_NEON)
divisor.val[1] = npyv_setall_s8(-sh);
@@ -283,7 +285,7 @@ NPY_FINLINE npyv_u16x3 npyv_divisor_u16(npy_uint16 d)
#ifdef NPY_HAVE_SSE2 // SSE/AVX2/AVX512
divisor.val[1] = npyv_set_u16(sh1);
divisor.val[2] = npyv_set_u16(sh2);
-#elif defined(NPY_HAVE_VSX2)
+#elif defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX)
divisor.val[1] = npyv_setall_u16(sh1);
divisor.val[2] = npyv_setall_u16(sh2);
#elif defined(NPY_HAVE_NEON)
@@ -315,7 +317,7 @@ NPY_FINLINE npyv_s16x3 npyv_divisor_s16(npy_int16 d)
divisor.val[2] = npyv_setall_s16(d < 0 ? -1 : 0); // sign of divisor
#ifdef NPY_HAVE_SSE2 // SSE/AVX2/AVX512
divisor.val[1] = npyv_set_s16(sh);
-#elif defined(NPY_HAVE_VSX2)
+#elif defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX)
divisor.val[1] = npyv_setall_s16(sh);
#elif defined(NPY_HAVE_NEON)
divisor.val[1] = npyv_setall_s16(-sh);
@@ -350,7 +352,7 @@ NPY_FINLINE npyv_u32x3 npyv_divisor_u32(npy_uint32 d)
#ifdef NPY_HAVE_SSE2 // SSE/AVX2/AVX512
divisor.val[1] = npyv_set_u32(sh1);
divisor.val[2] = npyv_set_u32(sh2);
-#elif defined(NPY_HAVE_VSX2)
+#elif defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX)
divisor.val[1] = npyv_setall_u32(sh1);
divisor.val[2] = npyv_setall_u32(sh2);
#elif defined(NPY_HAVE_NEON)
@@ -387,7 +389,7 @@ NPY_FINLINE npyv_s32x3 npyv_divisor_s32(npy_int32 d)
divisor.val[2] = npyv_setall_s32(d < 0 ? -1 : 0); // sign of divisor
#ifdef NPY_HAVE_SSE2 // SSE/AVX2/AVX512
divisor.val[1] = npyv_set_s32(sh);
-#elif defined(NPY_HAVE_VSX2)
+#elif defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX)
divisor.val[1] = npyv_setall_s32(sh);
#elif defined(NPY_HAVE_NEON)
divisor.val[1] = npyv_setall_s32(-sh);
@@ -400,7 +402,7 @@ NPY_FINLINE npyv_s32x3 npyv_divisor_s32(npy_int32 d)
NPY_FINLINE npyv_u64x3 npyv_divisor_u64(npy_uint64 d)
{
npyv_u64x3 divisor;
-#if defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_NEON)
+#if defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX) || defined(NPY_HAVE_NEON)
divisor.val[0] = npyv_setall_u64(d);
#else
npy_uint64 l, l2, sh1, sh2, m;
@@ -435,7 +437,7 @@ NPY_FINLINE npyv_u64x3 npyv_divisor_u64(npy_uint64 d)
NPY_FINLINE npyv_s64x3 npyv_divisor_s64(npy_int64 d)
{
npyv_s64x3 divisor;
-#if defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_NEON)
+#if defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX) || defined(NPY_HAVE_NEON)
divisor.val[0] = npyv_setall_s64(d);
divisor.val[1] = npyv_cvt_s64_b64(
npyv_cmpeq_s64(npyv_setall_s64(-1), divisor.val[0])
diff --git a/numpy/core/src/common/simd/neon/math.h b/numpy/core/src/common/simd/neon/math.h
index 4607d6f27..8f4680c8f 100644
--- a/numpy/core/src/common/simd/neon/math.h
+++ b/numpy/core/src/common/simd/neon/math.h
@@ -161,7 +161,7 @@ NPY_FINLINE npyv_f32 npyv_rint_f32(npyv_f32 a)
#else
// ARMv7 NEON only supports fp to int truncate conversion.
// a magic trick of adding 1.5 * 2**23 is used for rounding
- // to nearest even and then substract this magic number to get
+ // to nearest even and then subtract this magic number to get
// the integer.
const npyv_s32 szero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0f));
const npyv_f32 magic = vdupq_n_f32(12582912.0f); // 1.5 * 2**23
diff --git a/numpy/core/src/common/simd/neon/neon.h b/numpy/core/src/common/simd/neon/neon.h
index e6f6a7324..b08071527 100644
--- a/numpy/core/src/common/simd/neon/neon.h
+++ b/numpy/core/src/common/simd/neon/neon.h
@@ -4,7 +4,7 @@
#define NPY_SIMD 128
#define NPY_SIMD_WIDTH 16
-
+#define NPY_SIMD_F32 1
#ifdef __aarch64__
#define NPY_SIMD_F64 1
#else
@@ -15,6 +15,7 @@
#else
#define NPY_SIMD_FMA3 0 // HW emulated
#endif
+#define NPY_SIMD_BIGENDIAN 0
typedef uint8x16_t npyv_u8;
typedef int8x16_t npyv_s8;
diff --git a/numpy/core/src/common/simd/simd.h b/numpy/core/src/common/simd/simd.h
index 08b2a7d00..b1492500f 100644
--- a/numpy/core/src/common/simd/simd.h
+++ b/numpy/core/src/common/simd/simd.h
@@ -34,7 +34,7 @@ typedef double npyv_lanetype_f64;
* They had bad impact on the generated instructions,
* sometimes the compiler deal with them without the respect
* of 32-bit mode which lead to crush due to execute 64-bit
- * instructions and other times generate bad emulated instructions.
+ * instructions and other times generate bad emulated instructions.
*/
#undef _mm512_set1_epi64
#undef _mm256_set1_epi64x
@@ -54,9 +54,9 @@ typedef double npyv_lanetype_f64;
#include "sse/sse.h"
#endif
-// TODO: Add support for VSX(2.06) and BE Mode
-#if defined(NPY_HAVE_VSX2) && defined(__LITTLE_ENDIAN__)
- #include "vsx/vsx.h"
+// TODO: Add support for VSX(2.06) and BE Mode for VSX
+#if defined(NPY_HAVE_VX) || (defined(NPY_HAVE_VSX2) && defined(__LITTLE_ENDIAN__))
+ #include "vec/vec.h"
#endif
#ifdef NPY_HAVE_NEON
@@ -64,10 +64,20 @@ typedef double npyv_lanetype_f64;
#endif
#ifndef NPY_SIMD
+ /// SIMD width in bits or 0 if there's no SIMD extension available.
#define NPY_SIMD 0
+ /// SIMD width in bytes or 0 if there's no SIMD extension available.
#define NPY_SIMD_WIDTH 0
+ /// 1 if the enabled SIMD extension supports single-precision otherwise 0.
+ #define NPY_SIMD_F32 0
+ /// 1 if the enabled SIMD extension supports double-precision otherwise 0.
#define NPY_SIMD_F64 0
+ /// 1 if the enabled SIMD extension supports native FMA otherwise 0.
+ /// note: we still emulate(fast) FMA intrinsics even if they
+ /// aren't supported but they shouldn't be used if the precision is matters.
#define NPY_SIMD_FMA3 0
+ /// 1 if the enabled SIMD extension is running on big-endian mode otherwise 0.
+ #define NPY_SIMD_BIGENDIAN 0
#endif
// enable emulated mask operations for all SIMD extension except for AVX512
diff --git a/numpy/core/src/common/simd/sse/sse.h b/numpy/core/src/common/simd/sse/sse.h
index 0bb404312..c21bbfda7 100644
--- a/numpy/core/src/common/simd/sse/sse.h
+++ b/numpy/core/src/common/simd/sse/sse.h
@@ -4,12 +4,15 @@
#define NPY_SIMD 128
#define NPY_SIMD_WIDTH 16
+#define NPY_SIMD_F32 1
#define NPY_SIMD_F64 1
#if defined(NPY_HAVE_FMA3) || defined(NPY_HAVE_FMA4)
#define NPY_SIMD_FMA3 1 // native support
#else
#define NPY_SIMD_FMA3 0 // fast emulated
#endif
+#define NPY_SIMD_BIGENDIAN 0
+
typedef __m128i npyv_u8;
typedef __m128i npyv_s8;
typedef __m128i npyv_u16;
diff --git a/numpy/core/src/common/simd/vsx/arithmetic.h b/numpy/core/src/common/simd/vec/arithmetic.h
index 01dbf5480..a2e9d07eb 100644
--- a/numpy/core/src/common/simd/vsx/arithmetic.h
+++ b/numpy/core/src/common/simd/vec/arithmetic.h
@@ -2,8 +2,8 @@
#error "Not a standalone header"
#endif
-#ifndef _NPY_SIMD_VSX_ARITHMETIC_H
-#define _NPY_SIMD_VSX_ARITHMETIC_H
+#ifndef _NPY_SIMD_VEC_ARITHMETIC_H
+#define _NPY_SIMD_VEC_ARITHMETIC_H
/***************************
* Addition
@@ -17,15 +17,32 @@
#define npyv_add_s32 vec_add
#define npyv_add_u64 vec_add
#define npyv_add_s64 vec_add
+#if NPY_SIMD_F32
#define npyv_add_f32 vec_add
+#endif
#define npyv_add_f64 vec_add
// saturated
-#define npyv_adds_u8 vec_adds
-#define npyv_adds_s8 vec_adds
-#define npyv_adds_u16 vec_adds
-#define npyv_adds_s16 vec_adds
+#ifdef NPY_HAVE_VX
+ #define NPYV_IMPL_VX_ADDS(SFX, PSFX) \
+ NPY_FINLINE npyv_##SFX npyv_adds_##SFX(npyv_##SFX a, npyv_##SFX b)\
+ { \
+ return vec_pack##PSFX( \
+ vec_add(vec_unpackh(a), vec_unpackh(b)), \
+ vec_add(vec_unpackl(a), vec_unpackl(b)) \
+ ); \
+ }
+ NPYV_IMPL_VX_ADDS(u8, su)
+ NPYV_IMPL_VX_ADDS(s8, s)
+ NPYV_IMPL_VX_ADDS(u16, su)
+ NPYV_IMPL_VX_ADDS(s16, s)
+#else // VSX
+ #define npyv_adds_u8 vec_adds
+ #define npyv_adds_s8 vec_adds
+ #define npyv_adds_u16 vec_adds
+ #define npyv_adds_s16 vec_adds
+#endif
/***************************
* Subtraction
***************************/
@@ -38,21 +55,39 @@
#define npyv_sub_s32 vec_sub
#define npyv_sub_u64 vec_sub
#define npyv_sub_s64 vec_sub
+#if NPY_SIMD_F32
#define npyv_sub_f32 vec_sub
+#endif
#define npyv_sub_f64 vec_sub
// saturated
-#define npyv_subs_u8 vec_subs
-#define npyv_subs_s8 vec_subs
-#define npyv_subs_u16 vec_subs
-#define npyv_subs_s16 vec_subs
+#ifdef NPY_HAVE_VX
+ #define NPYV_IMPL_VX_SUBS(SFX, PSFX) \
+ NPY_FINLINE npyv_##SFX npyv_subs_##SFX(npyv_##SFX a, npyv_##SFX b)\
+ { \
+ return vec_pack##PSFX( \
+ vec_sub(vec_unpackh(a), vec_unpackh(b)), \
+ vec_sub(vec_unpackl(a), vec_unpackl(b)) \
+ ); \
+ }
+
+ NPYV_IMPL_VX_SUBS(u8, su)
+ NPYV_IMPL_VX_SUBS(s8, s)
+ NPYV_IMPL_VX_SUBS(u16, su)
+ NPYV_IMPL_VX_SUBS(s16, s)
+#else // VSX
+ #define npyv_subs_u8 vec_subs
+ #define npyv_subs_s8 vec_subs
+ #define npyv_subs_u16 vec_subs
+ #define npyv_subs_s16 vec_subs
+#endif
/***************************
* Multiplication
***************************/
// non-saturated
// up to GCC 6 vec_mul only supports precisions and llong
-#if defined(__GNUC__) && __GNUC__ < 7
+#if defined(NPY_HAVE_VSX) && defined(__GNUC__) && __GNUC__ < 7
#define NPYV_IMPL_VSX_MUL(T_VEC, SFX, ...) \
NPY_FINLINE T_VEC npyv_mul_##SFX(T_VEC a, T_VEC b) \
{ \
@@ -91,7 +126,9 @@
#define npyv_mul_u32 vec_mul
#define npyv_mul_s32 vec_mul
#endif
+#if NPY_SIMD_F32
#define npyv_mul_f32 vec_mul
+#endif
#define npyv_mul_f64 vec_mul
/***************************
@@ -101,6 +138,9 @@
// divide each unsigned 8-bit element by a precomputed divisor
NPY_FINLINE npyv_u8 npyv_divc_u8(npyv_u8 a, const npyv_u8x3 divisor)
{
+#ifdef NPY_HAVE_VX
+ npyv_u8 mulhi = vec_mulh(a, divisor.val[0]);
+#else // VSX
const npyv_u8 mergeo_perm = {
1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31
};
@@ -108,6 +148,7 @@ NPY_FINLINE npyv_u8 npyv_divc_u8(npyv_u8 a, const npyv_u8x3 divisor)
npyv_u16 mul_even = vec_mule(a, divisor.val[0]);
npyv_u16 mul_odd = vec_mulo(a, divisor.val[0]);
npyv_u8 mulhi = (npyv_u8)vec_perm(mul_even, mul_odd, mergeo_perm);
+#endif
// floor(a/d) = (mulhi + ((a-mulhi) >> sh1)) >> sh2
npyv_u8 q = vec_sub(a, mulhi);
q = vec_sr(q, divisor.val[1]);
@@ -118,6 +159,9 @@ NPY_FINLINE npyv_u8 npyv_divc_u8(npyv_u8 a, const npyv_u8x3 divisor)
// divide each signed 8-bit element by a precomputed divisor
NPY_FINLINE npyv_s8 npyv_divc_s8(npyv_s8 a, const npyv_s8x3 divisor)
{
+#ifdef NPY_HAVE_VX
+ npyv_s8 mulhi = vec_mulh(a, divisor.val[0]);
+#else
const npyv_u8 mergeo_perm = {
1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31
};
@@ -125,16 +169,20 @@ NPY_FINLINE npyv_s8 npyv_divc_s8(npyv_s8 a, const npyv_s8x3 divisor)
npyv_s16 mul_even = vec_mule(a, divisor.val[0]);
npyv_s16 mul_odd = vec_mulo(a, divisor.val[0]);
npyv_s8 mulhi = (npyv_s8)vec_perm(mul_even, mul_odd, mergeo_perm);
+#endif
// q = ((a + mulhi) >> sh1) - XSIGN(a)
// trunc(a/d) = (q ^ dsign) - dsign
- npyv_s8 q = vec_sra(vec_add(a, mulhi), (npyv_u8)divisor.val[1]);
- q = vec_sub(q, vec_sra(a, npyv_setall_u8(7)));
+ npyv_s8 q = vec_sra_s8(vec_add(a, mulhi), (npyv_u8)divisor.val[1]);
+ q = vec_sub(q, vec_sra_s8(a, npyv_setall_u8(7)));
q = vec_sub(vec_xor(q, divisor.val[2]), divisor.val[2]);
return q;
}
// divide each unsigned 16-bit element by a precomputed divisor
NPY_FINLINE npyv_u16 npyv_divc_u16(npyv_u16 a, const npyv_u16x3 divisor)
{
+#ifdef NPY_HAVE_VX
+ npyv_u16 mulhi = vec_mulh(a, divisor.val[0]);
+#else // VSX
const npyv_u8 mergeo_perm = {
2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31
};
@@ -142,6 +190,7 @@ NPY_FINLINE npyv_u16 npyv_divc_u16(npyv_u16 a, const npyv_u16x3 divisor)
npyv_u32 mul_even = vec_mule(a, divisor.val[0]);
npyv_u32 mul_odd = vec_mulo(a, divisor.val[0]);
npyv_u16 mulhi = (npyv_u16)vec_perm(mul_even, mul_odd, mergeo_perm);
+#endif
// floor(a/d) = (mulhi + ((a-mulhi) >> sh1)) >> sh2
npyv_u16 q = vec_sub(a, mulhi);
q = vec_sr(q, divisor.val[1]);
@@ -152,6 +201,9 @@ NPY_FINLINE npyv_u16 npyv_divc_u16(npyv_u16 a, const npyv_u16x3 divisor)
// divide each signed 16-bit element by a precomputed divisor (round towards zero)
NPY_FINLINE npyv_s16 npyv_divc_s16(npyv_s16 a, const npyv_s16x3 divisor)
{
+#ifdef NPY_HAVE_VX
+ npyv_s16 mulhi = vec_mulh(a, divisor.val[0]);
+#else // VSX
const npyv_u8 mergeo_perm = {
2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31
};
@@ -159,30 +211,31 @@ NPY_FINLINE npyv_s16 npyv_divc_s16(npyv_s16 a, const npyv_s16x3 divisor)
npyv_s32 mul_even = vec_mule(a, divisor.val[0]);
npyv_s32 mul_odd = vec_mulo(a, divisor.val[0]);
npyv_s16 mulhi = (npyv_s16)vec_perm(mul_even, mul_odd, mergeo_perm);
+#endif
// q = ((a + mulhi) >> sh1) - XSIGN(a)
// trunc(a/d) = (q ^ dsign) - dsign
- npyv_s16 q = vec_sra(vec_add(a, mulhi), (npyv_u16)divisor.val[1]);
- q = vec_sub(q, vec_sra(a, npyv_setall_u16(15)));
+ npyv_s16 q = vec_sra_s16(vec_add(a, mulhi), (npyv_u16)divisor.val[1]);
+ q = vec_sub(q, vec_sra_s16(a, npyv_setall_u16(15)));
q = vec_sub(vec_xor(q, divisor.val[2]), divisor.val[2]);
return q;
}
// divide each unsigned 32-bit element by a precomputed divisor
NPY_FINLINE npyv_u32 npyv_divc_u32(npyv_u32 a, const npyv_u32x3 divisor)
{
-#if defined(NPY_HAVE_VSX4)
+#if defined(NPY_HAVE_VSX4) || defined(NPY_HAVE_VX)
// high part of unsigned multiplication
npyv_u32 mulhi = vec_mulh(a, divisor.val[0]);
-#else
-#if defined(__GNUC__) && __GNUC__ < 8
- // Doubleword integer wide multiplication supported by GCC 8+
- npyv_u64 mul_even, mul_odd;
- __asm__ ("vmulouw %0,%1,%2" : "=v" (mul_even) : "v" (a), "v" (divisor.val[0]));
- __asm__ ("vmuleuw %0,%1,%2" : "=v" (mul_odd) : "v" (a), "v" (divisor.val[0]));
-#else
- // Doubleword integer wide multiplication supported by GCC 8+
- npyv_u64 mul_even = vec_mule(a, divisor.val[0]);
- npyv_u64 mul_odd = vec_mulo(a, divisor.val[0]);
-#endif
+#else // VSX
+ #if defined(__GNUC__) && __GNUC__ < 8
+ // Doubleword integer wide multiplication supported by GCC 8+
+ npyv_u64 mul_even, mul_odd;
+ __asm__ ("vmulouw %0,%1,%2" : "=v" (mul_even) : "v" (a), "v" (divisor.val[0]));
+ __asm__ ("vmuleuw %0,%1,%2" : "=v" (mul_odd) : "v" (a), "v" (divisor.val[0]));
+ #else
+ // Doubleword integer wide multiplication supported by GCC 8+
+ npyv_u64 mul_even = vec_mule(a, divisor.val[0]);
+ npyv_u64 mul_odd = vec_mulo(a, divisor.val[0]);
+ #endif
// high part of unsigned multiplication
npyv_u32 mulhi = vec_mergeo((npyv_u32)mul_even, (npyv_u32)mul_odd);
#endif
@@ -196,27 +249,27 @@ NPY_FINLINE npyv_u32 npyv_divc_u32(npyv_u32 a, const npyv_u32x3 divisor)
// divide each signed 32-bit element by a precomputed divisor (round towards zero)
NPY_FINLINE npyv_s32 npyv_divc_s32(npyv_s32 a, const npyv_s32x3 divisor)
{
-#if defined(NPY_HAVE_VSX4)
+#if defined(NPY_HAVE_VSX4) || defined(NPY_HAVE_VX)
// high part of signed multiplication
npyv_s32 mulhi = vec_mulh(a, divisor.val[0]);
#else
-#if defined(__GNUC__) && __GNUC__ < 8
- // Doubleword integer wide multiplication supported by GCC8+
- npyv_s64 mul_even, mul_odd;
- __asm__ ("vmulosw %0,%1,%2" : "=v" (mul_even) : "v" (a), "v" (divisor.val[0]));
- __asm__ ("vmulesw %0,%1,%2" : "=v" (mul_odd) : "v" (a), "v" (divisor.val[0]));
-#else
- // Doubleword integer wide multiplication supported by GCC8+
- npyv_s64 mul_even = vec_mule(a, divisor.val[0]);
- npyv_s64 mul_odd = vec_mulo(a, divisor.val[0]);
-#endif
+ #if defined(__GNUC__) && __GNUC__ < 8
+ // Doubleword integer wide multiplication supported by GCC8+
+ npyv_s64 mul_even, mul_odd;
+ __asm__ ("vmulosw %0,%1,%2" : "=v" (mul_even) : "v" (a), "v" (divisor.val[0]));
+ __asm__ ("vmulesw %0,%1,%2" : "=v" (mul_odd) : "v" (a), "v" (divisor.val[0]));
+ #else
+ // Doubleword integer wide multiplication supported by GCC8+
+ npyv_s64 mul_even = vec_mule(a, divisor.val[0]);
+ npyv_s64 mul_odd = vec_mulo(a, divisor.val[0]);
+ #endif
// high part of signed multiplication
npyv_s32 mulhi = vec_mergeo((npyv_s32)mul_even, (npyv_s32)mul_odd);
#endif
// q = ((a + mulhi) >> sh1) - XSIGN(a)
// trunc(a/d) = (q ^ dsign) - dsign
- npyv_s32 q = vec_sra(vec_add(a, mulhi), (npyv_u32)divisor.val[1]);
- q = vec_sub(q, vec_sra(a, npyv_setall_u32(31)));
+ npyv_s32 q = vec_sra_s32(vec_add(a, mulhi), (npyv_u32)divisor.val[1]);
+ q = vec_sub(q, vec_sra_s32(a, npyv_setall_u32(31)));
q = vec_sub(vec_xor(q, divisor.val[2]), divisor.val[2]);
return q;
}
@@ -240,45 +293,67 @@ NPY_FINLINE npyv_s64 npyv_divc_s64(npyv_s64 a, const npyv_s64x3 divisor)
/***************************
* Division
***************************/
-#define npyv_div_f32 vec_div
+#if NPY_SIMD_F32
+ #define npyv_div_f32 vec_div
+#endif
#define npyv_div_f64 vec_div
/***************************
* FUSED
***************************/
// multiply and add, a*b + c
-#define npyv_muladd_f32 vec_madd
#define npyv_muladd_f64 vec_madd
// multiply and subtract, a*b - c
-#define npyv_mulsub_f32 vec_msub
#define npyv_mulsub_f64 vec_msub
-// negate multiply and add, -(a*b) + c
-#define npyv_nmuladd_f32 vec_nmsub // equivalent to -(a*b - c)
-#define npyv_nmuladd_f64 vec_nmsub
-// negate multiply and subtract, -(a*b) - c
-#define npyv_nmulsub_f32 vec_nmadd // equivalent to -(a*b + c)
-#define npyv_nmulsub_f64 vec_nmadd
-
+#if NPY_SIMD_F32
+ #define npyv_muladd_f32 vec_madd
+ #define npyv_mulsub_f32 vec_msub
+#endif
+#if defined(NPY_HAVE_VXE) || defined(NPY_HAVE_VSX)
+ // negate multiply and add, -(a*b) + c
+ #define npyv_nmuladd_f32 vec_nmsub // equivalent to -(a*b - c)
+ #define npyv_nmuladd_f64 vec_nmsub
+ // negate multiply and subtract, -(a*b) - c
+ #define npyv_nmulsub_f64 vec_nmadd
+ #define npyv_nmulsub_f32 vec_nmadd // equivalent to -(a*b + c)
+#else
+ NPY_FINLINE npyv_f64 npyv_nmuladd_f64(npyv_f64 a, npyv_f64 b, npyv_f64 c)
+ { return vec_neg(vec_msub(a, b, c)); }
+ NPY_FINLINE npyv_f64 npyv_nmulsub_f64(npyv_f64 a, npyv_f64 b, npyv_f64 c)
+ { return vec_neg(vec_madd(a, b, c)); }
+#endif
/***************************
* Summation
***************************/
// reduce sum across vector
NPY_FINLINE npy_uint64 npyv_sum_u64(npyv_u64 a)
{
+#ifdef NPY_HAVE_VX
+ const npyv_u64 zero = npyv_zero_u64();
+ return vec_extract((npyv_u64)vec_sum_u128(a, zero), 1);
+#else
return vec_extract(vec_add(a, vec_mergel(a, a)), 0);
+#endif
}
NPY_FINLINE npy_uint32 npyv_sum_u32(npyv_u32 a)
{
+#ifdef NPY_HAVE_VX
+ const npyv_u32 zero = npyv_zero_u32();
+ return vec_extract((npyv_u32)vec_sum_u128(a, zero), 3);
+#else
const npyv_u32 rs = vec_add(a, vec_sld(a, a, 8));
return vec_extract(vec_add(rs, vec_sld(rs, rs, 4)), 0);
+#endif
}
+#if NPY_SIMD_F32
NPY_FINLINE float npyv_sum_f32(npyv_f32 a)
{
npyv_f32 sum = vec_add(a, npyv_combineh_f32(a, a));
return vec_extract(sum, 0) + vec_extract(sum, 1);
}
+#endif
NPY_FINLINE double npyv_sum_f64(npyv_f64 a)
{
@@ -288,19 +363,30 @@ NPY_FINLINE double npyv_sum_f64(npyv_f64 a)
// expand the source vector and performs sum reduce
NPY_FINLINE npy_uint16 npyv_sumup_u8(npyv_u8 a)
{
+#ifdef NPY_HAVE_VX
+ const npyv_u8 zero = npyv_zero_u8();
+ npyv_u32 sum4 = vec_sum4(a, zero);
+ return (npy_uint16)npyv_sum_u32(sum4);
+#else
const npyv_u32 zero = npyv_zero_u32();
npyv_u32 four = vec_sum4s(a, zero);
npyv_s32 one = vec_sums((npyv_s32)four, (npyv_s32)zero);
return (npy_uint16)vec_extract(one, 3);
+#endif
}
NPY_FINLINE npy_uint32 npyv_sumup_u16(npyv_u16 a)
{
+#ifdef NPY_HAVE_VX
+ npyv_u64 sum = vec_sum2(a, npyv_zero_u16());
+ return (npy_uint32)npyv_sum_u64(sum);
+#else // VSX
const npyv_s32 zero = npyv_zero_s32();
npyv_u32x2 eight = npyv_expand_u32_u16(a);
npyv_u32 four = vec_add(eight.val[0], eight.val[1]);
npyv_s32 one = vec_sums((npyv_s32)four, zero);
return (npy_uint32)vec_extract(one, 3);
+#endif
}
-#endif // _NPY_SIMD_VSX_ARITHMETIC_H
+#endif // _NPY_SIMD_VEC_ARITHMETIC_H
diff --git a/numpy/core/src/common/simd/vec/conversion.h b/numpy/core/src/common/simd/vec/conversion.h
new file mode 100644
index 000000000..f0d625c55
--- /dev/null
+++ b/numpy/core/src/common/simd/vec/conversion.h
@@ -0,0 +1,228 @@
+#ifndef NPY_SIMD
+ #error "Not a standalone header"
+#endif
+
+#ifndef _NPY_SIMD_VEC_CVT_H
+#define _NPY_SIMD_VEC_CVT_H
+
+// convert boolean vectors to integer vectors
+#define npyv_cvt_u8_b8(BL) ((npyv_u8) BL)
+#define npyv_cvt_s8_b8(BL) ((npyv_s8) BL)
+#define npyv_cvt_u16_b16(BL) ((npyv_u16) BL)
+#define npyv_cvt_s16_b16(BL) ((npyv_s16) BL)
+#define npyv_cvt_u32_b32(BL) ((npyv_u32) BL)
+#define npyv_cvt_s32_b32(BL) ((npyv_s32) BL)
+#define npyv_cvt_u64_b64(BL) ((npyv_u64) BL)
+#define npyv_cvt_s64_b64(BL) ((npyv_s64) BL)
+#if NPY_SIMD_F32
+ #define npyv_cvt_f32_b32(BL) ((npyv_f32) BL)
+#endif
+#define npyv_cvt_f64_b64(BL) ((npyv_f64) BL)
+
+// convert integer vectors to boolean vectors
+#define npyv_cvt_b8_u8(A) ((npyv_b8) A)
+#define npyv_cvt_b8_s8(A) ((npyv_b8) A)
+#define npyv_cvt_b16_u16(A) ((npyv_b16) A)
+#define npyv_cvt_b16_s16(A) ((npyv_b16) A)
+#define npyv_cvt_b32_u32(A) ((npyv_b32) A)
+#define npyv_cvt_b32_s32(A) ((npyv_b32) A)
+#define npyv_cvt_b64_u64(A) ((npyv_b64) A)
+#define npyv_cvt_b64_s64(A) ((npyv_b64) A)
+#if NPY_SIMD_F32
+ #define npyv_cvt_b32_f32(A) ((npyv_b32) A)
+#endif
+#define npyv_cvt_b64_f64(A) ((npyv_b64) A)
+
+//expand
+NPY_FINLINE npyv_u16x2 npyv_expand_u16_u8(npyv_u8 data)
+{
+ npyv_u16x2 r;
+#ifdef NPY_HAVE_VX
+ r.val[0] = vec_unpackh(data);
+ r.val[1] = vec_unpackl(data);
+#else
+ npyv_u8 zero = npyv_zero_u8();
+ r.val[0] = (npyv_u16)vec_mergeh(data, zero);
+ r.val[1] = (npyv_u16)vec_mergel(data, zero);
+#endif
+ return r;
+}
+
+NPY_FINLINE npyv_u32x2 npyv_expand_u32_u16(npyv_u16 data)
+{
+ npyv_u32x2 r;
+#ifdef NPY_HAVE_VX
+ r.val[0] = vec_unpackh(data);
+ r.val[1] = vec_unpackl(data);
+#else
+ npyv_u16 zero = npyv_zero_u16();
+ r.val[0] = (npyv_u32)vec_mergeh(data, zero);
+ r.val[1] = (npyv_u32)vec_mergel(data, zero);
+#endif
+ return r;
+}
+
+// pack two 16-bit boolean into one 8-bit boolean vector
+NPY_FINLINE npyv_b8 npyv_pack_b8_b16(npyv_b16 a, npyv_b16 b) {
+ return vec_pack(a, b);
+}
+
+// pack four 32-bit boolean vectors into one 8-bit boolean vector
+NPY_FINLINE npyv_b8 npyv_pack_b8_b32(npyv_b32 a, npyv_b32 b, npyv_b32 c, npyv_b32 d) {
+ npyv_b16 ab = vec_pack(a, b);
+ npyv_b16 cd = vec_pack(c, d);
+ return npyv_pack_b8_b16(ab, cd);
+}
+
+// pack eight 64-bit boolean vectors into one 8-bit boolean vector
+NPY_FINLINE npyv_b8
+npyv_pack_b8_b64(npyv_b64 a, npyv_b64 b, npyv_b64 c, npyv_b64 d,
+ npyv_b64 e, npyv_b64 f, npyv_b64 g, npyv_b64 h) {
+ npyv_b32 ab = vec_pack(a, b);
+ npyv_b32 cd = vec_pack(c, d);
+ npyv_b32 ef = vec_pack(e, f);
+ npyv_b32 gh = vec_pack(g, h);
+ return npyv_pack_b8_b32(ab, cd, ef, gh);
+}
+
+// convert boolean vector to integer bitfield
+#if defined(NPY_HAVE_VXE) || defined(NPY_HAVE_VSX2)
+ NPY_FINLINE npy_uint64 npyv_tobits_b8(npyv_b8 a)
+ {
+ const npyv_u8 qperm = npyv_set_u8(120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0);
+ npyv_u16 r = (npyv_u16)vec_vbpermq((npyv_u8)a, qperm);
+ #ifdef NPY_HAVE_VXE
+ return vec_extract(r, 3);
+ #else
+ return vec_extract(r, 4);
+ #endif
+ }
+ NPY_FINLINE npy_uint64 npyv_tobits_b16(npyv_b16 a)
+ {
+ const npyv_u8 qperm = npyv_setf_u8(128, 112, 96, 80, 64, 48, 32, 16, 0);
+ npyv_u8 r = (npyv_u8)vec_vbpermq((npyv_u8)a, qperm);
+ #ifdef NPY_HAVE_VXE
+ return vec_extract(r, 6);
+ #else
+ return vec_extract(r, 8);
+ #endif
+ }
+ NPY_FINLINE npy_uint64 npyv_tobits_b32(npyv_b32 a)
+ {
+ #ifdef NPY_HAVE_VXE
+ const npyv_u8 qperm = npyv_setf_u8(128, 128, 128, 128, 128, 96, 64, 32, 0);
+ #else
+ const npyv_u8 qperm = npyv_setf_u8(128, 96, 64, 32, 0);
+ #endif
+ npyv_u8 r = (npyv_u8)vec_vbpermq((npyv_u8)a, qperm);
+ #ifdef NPY_HAVE_VXE
+ return vec_extract(r, 6);
+ #else
+ return vec_extract(r, 8);
+ #endif
+ }
+ NPY_FINLINE npy_uint64 npyv_tobits_b64(npyv_b64 a)
+ {
+ #ifdef NPY_HAVE_VXE
+ const npyv_u8 qperm = npyv_setf_u8(128, 128, 128, 128, 128, 128, 128, 64, 0);
+ #else
+ const npyv_u8 qperm = npyv_setf_u8(128, 64, 0);
+ #endif
+ npyv_u8 r = (npyv_u8)vec_vbpermq((npyv_u8)a, qperm);
+ #ifdef NPY_HAVE_VXE
+ return vec_extract(r, 6);
+ #else
+ return vec_extract(r, 8);
+ #endif
+ }
+#else
+ NPY_FINLINE npy_uint64 npyv_tobits_b8(npyv_b8 a)
+ {
+ const npyv_u8 scale = npyv_set_u8(1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128);
+ npyv_u8 seq_scale = vec_and((npyv_u8)a, scale);
+ npyv_u64 sum = vec_sum2(vec_sum4(seq_scale, npyv_zero_u8()), npyv_zero_u32());
+ return vec_extract(sum, 0) + ((int)vec_extract(sum, 1) << 8);
+ }
+ NPY_FINLINE npy_uint64 npyv_tobits_b16(npyv_b16 a)
+ {
+ const npyv_u16 scale = npyv_set_u16(1, 2, 4, 8, 16, 32, 64, 128);
+ npyv_u16 seq_scale = vec_and((npyv_u16)a, scale);
+ npyv_u64 sum = vec_sum2(seq_scale, npyv_zero_u16());
+ return vec_extract(vec_sum_u128(sum, npyv_zero_u64()), 15);
+ }
+ NPY_FINLINE npy_uint64 npyv_tobits_b32(npyv_b32 a)
+ {
+ const npyv_u32 scale = npyv_set_u32(1, 2, 4, 8);
+ npyv_u32 seq_scale = vec_and((npyv_u32)a, scale);
+ return vec_extract(vec_sum_u128(seq_scale, npyv_zero_u32()), 15);
+ }
+ NPY_FINLINE npy_uint64 npyv_tobits_b64(npyv_b64 a)
+ {
+ const npyv_u64 scale = npyv_set_u64(1, 2);
+ npyv_u64 seq_scale = vec_and((npyv_u64)a, scale);
+ return vec_extract(vec_sum_u128(seq_scale, npyv_zero_u64()), 15);
+ }
+#endif
+// truncate compatible with all compilers(internal use for now)
+#if NPY_SIMD_F32
+ NPY_FINLINE npyv_s32 npyv__trunc_s32_f32(npyv_f32 a)
+ {
+ #ifdef NPY_HAVE_VXE2
+ return vec_signed(a);
+ #elif defined(NPY_HAVE_VXE)
+ return vec_packs(vec_signed(npyv_doublee(a)), vec_signed(npyv_doublee(vec_mergel(a, a))));
+ // VSX
+ #elif defined(__IBMC__)
+ return vec_cts(a, 0);
+ #elif defined(__clang__)
+ /**
+ * old versions of CLANG doesn't support %x<n> in the inline asm template
+ * which fixes register number when using any of the register constraints wa, wd, wf.
+ * therefore, we count on built-in functions.
+ */
+ return __builtin_convertvector(a, npyv_s32);
+ #else // gcc
+ npyv_s32 ret;
+ __asm__ ("xvcvspsxws %x0,%x1" : "=wa" (ret) : "wa" (a));
+ return ret;
+ #endif
+ }
+#endif
+
+NPY_FINLINE npyv_s32 npyv__trunc_s32_f64(npyv_f64 a, npyv_f64 b)
+{
+#ifdef NPY_HAVE_VX
+ return vec_packs(vec_signed(a), vec_signed(b));
+// VSX
+#elif defined(__IBMC__)
+ const npyv_u8 seq_even = npyv_set_u8(0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27);
+ // unfortunately, XLC missing asm register vsx fixer
+ // hopefully, xlc can optimize around big-endian compatibility
+ npyv_s32 lo_even = vec_cts(a, 0);
+ npyv_s32 hi_even = vec_cts(b, 0);
+ return vec_perm(lo_even, hi_even, seq_even);
+#else
+ const npyv_u8 seq_odd = npyv_set_u8(4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31);
+ #ifdef __clang__
+ // __builtin_convertvector doesn't support this conversion on wide range of versions
+ // fortunately, almost all versions have direct builtin of 'xvcvdpsxws'
+ npyv_s32 lo_odd = __builtin_vsx_xvcvdpsxws(a);
+ npyv_s32 hi_odd = __builtin_vsx_xvcvdpsxws(b);
+ #else // gcc
+ npyv_s32 lo_odd, hi_odd;
+ __asm__ ("xvcvdpsxws %x0,%x1" : "=wa" (lo_odd) : "wa" (a));
+ __asm__ ("xvcvdpsxws %x0,%x1" : "=wa" (hi_odd) : "wa" (b));
+ #endif
+ return vec_perm(lo_odd, hi_odd, seq_odd);
+#endif
+}
+
+// round to nearest integer (assuming even)
+#if NPY_SIMD_F32
+ NPY_FINLINE npyv_s32 npyv_round_s32_f32(npyv_f32 a)
+ { return npyv__trunc_s32_f32(vec_rint(a)); }
+#endif
+NPY_FINLINE npyv_s32 npyv_round_s32_f64(npyv_f64 a, npyv_f64 b)
+{ return npyv__trunc_s32_f64(vec_rint(a), vec_rint(b)); }
+
+#endif // _NPY_SIMD_VEC_CVT_H
diff --git a/numpy/core/src/common/simd/vsx/math.h b/numpy/core/src/common/simd/vec/math.h
index 444bc9e54..7714a612d 100644
--- a/numpy/core/src/common/simd/vsx/math.h
+++ b/numpy/core/src/common/simd/vec/math.h
@@ -2,21 +2,25 @@
#error "Not a standalone header"
#endif
-#ifndef _NPY_SIMD_VSX_MATH_H
-#define _NPY_SIMD_VSX_MATH_H
+#ifndef _NPY_SIMD_VEC_MATH_H
+#define _NPY_SIMD_VEC_MATH_H
/***************************
* Elementary
***************************/
// Square root
-#define npyv_sqrt_f32 vec_sqrt
+#if NPY_SIMD_F32
+ #define npyv_sqrt_f32 vec_sqrt
+#endif
#define npyv_sqrt_f64 vec_sqrt
// Reciprocal
-NPY_FINLINE npyv_f32 npyv_recip_f32(npyv_f32 a)
-{
- const npyv_f32 one = npyv_setall_f32(1.0f);
- return vec_div(one, a);
-}
+#if NPY_SIMD_F32
+ NPY_FINLINE npyv_f32 npyv_recip_f32(npyv_f32 a)
+ {
+ const npyv_f32 one = npyv_setall_f32(1.0f);
+ return vec_div(one, a);
+ }
+#endif
NPY_FINLINE npyv_f64 npyv_recip_f64(npyv_f64 a)
{
const npyv_f64 one = npyv_setall_f64(1.0);
@@ -24,23 +28,41 @@ NPY_FINLINE npyv_f64 npyv_recip_f64(npyv_f64 a)
}
// Absolute
-#define npyv_abs_f32 vec_abs
+#if NPY_SIMD_F32
+ #define npyv_abs_f32 vec_abs
+#endif
#define npyv_abs_f64 vec_abs
// Square
-NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
-{ return vec_mul(a, a); }
+#if NPY_SIMD_F32
+ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
+ { return vec_mul(a, a); }
+#endif
NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
{ return vec_mul(a, a); }
// Maximum, natively mapping with no guarantees to handle NaN.
-#define npyv_max_f32 vec_max
+#if NPY_SIMD_F32
+ #define npyv_max_f32 vec_max
+#endif
#define npyv_max_f64 vec_max
// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
// - Only if both corresponded elements are NaN, NaN is set.
-#define npyv_maxp_f32 vec_max
-#define npyv_maxp_f64 vec_max
+#if NPY_SIMD_F32
+ #define npyv_maxp_f32 vec_max
+#endif
+#if defined(NPY_HAVE_VXE) || defined(NPY_HAVE_VSX)
+ #define npyv_maxp_f64 vec_max
+#else
+ // vfmindb & vfmaxdb appears in zarch12
+ NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b)
+ {
+ npyv_b64 nn_a = npyv_notnan_f64(a);
+ npyv_b64 nn_b = npyv_notnan_f64(b);
+ return vec_max(vec_sel(b, a, nn_a), vec_sel(a, b, nn_b));
+ }
+#endif
// Maximum, integer operations
#define npyv_max_u8 vec_max
#define npyv_max_s8 vec_max
@@ -52,13 +74,27 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
#define npyv_max_s64 vec_max
// Minimum, natively mapping with no guarantees to handle NaN.
-#define npyv_min_f32 vec_min
+#if NPY_SIMD_F32
+ #define npyv_min_f32 vec_min
+#endif
#define npyv_min_f64 vec_min
// Minimum, supports IEEE floating-point arithmetic (IEC 60559),
// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
// - Only if both corresponded elements are NaN, NaN is set.
-#define npyv_minp_f32 vec_min
-#define npyv_minp_f64 vec_min
+#if NPY_SIMD_F32
+ #define npyv_minp_f32 vec_min
+#endif
+#if defined(NPY_HAVE_VXE) || defined(NPY_HAVE_VSX)
+ #define npyv_minp_f64 vec_min
+#else
+ // vfmindb & vfmaxdb appears in zarch12
+ NPY_FINLINE npyv_f64 npyv_minp_f64(npyv_f64 a, npyv_f64 b)
+ {
+ npyv_b64 nn_a = npyv_notnan_f64(a);
+ npyv_b64 nn_b = npyv_notnan_f64(b);
+ return vec_min(vec_sel(b, a, nn_a), vec_sel(a, b, nn_b));
+ }
+#endif
// Minimum, integer operations
#define npyv_min_u8 vec_min
#define npyv_min_s8 vec_min
@@ -70,19 +106,18 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
#define npyv_min_s64 vec_min
// round to nearest int even
-#define npyv_rint_f32 vec_rint
#define npyv_rint_f64 vec_rint
-
// ceil
-#define npyv_ceil_f32 vec_ceil
#define npyv_ceil_f64 vec_ceil
-
// trunc
-#define npyv_trunc_f32 vec_trunc
#define npyv_trunc_f64 vec_trunc
-
// floor
-#define npyv_floor_f32 vec_floor
#define npyv_floor_f64 vec_floor
+#if NPY_SIMD_F32
+ #define npyv_rint_f32 vec_rint
+ #define npyv_ceil_f32 vec_ceil
+ #define npyv_trunc_f32 vec_trunc
+ #define npyv_floor_f32 vec_floor
+#endif
-#endif // _NPY_SIMD_VSX_MATH_H
+#endif // _NPY_SIMD_VEC_MATH_H
diff --git a/numpy/core/src/common/simd/vsx/memory.h b/numpy/core/src/common/simd/vec/memory.h
index 3007584ef..e8f588ef2 100644
--- a/numpy/core/src/common/simd/vsx/memory.h
+++ b/numpy/core/src/common/simd/vec/memory.h
@@ -2,8 +2,8 @@
#error "Not a standalone header"
#endif
-#ifndef _NPY_SIMD_VSX_MEMORY_H
-#define _NPY_SIMD_VSX_MEMORY_H
+#ifndef _NPY_SIMD_VEC_MEMORY_H
+#define _NPY_SIMD_VEC_MEMORY_H
#include "misc.h"
@@ -19,19 +19,32 @@
* CLANG fails to load unaligned addresses via vec_xl, vec_xst
* so we failback to vec_vsx_ld, vec_vsx_st
*/
- #if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__))
+ #if defined (NPY_HAVE_VSX2) && ( \
+ (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__)) \
+ )
#define npyv__load(T_VEC, PTR) vec_vsx_ld(0, PTR)
- #else
+ #else // VX
#define npyv__load(T_VEC, PTR) vec_xl(0, PTR)
#endif
#endif
// unaligned store
-#if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__))
+#if defined (NPY_HAVE_VSX2) && ( \
+ (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__)) \
+)
#define npyv__store(PTR, VEC) vec_vsx_st(VEC, 0, PTR)
-#else
+#else // VX
#define npyv__store(PTR, VEC) vec_xst(VEC, 0, PTR)
#endif
+// aligned load/store
+#if defined (NPY_HAVE_VSX)
+ #define npyv__loada(PTR) vec_ld(0, PTR)
+ #define npyv__storea(PTR, VEC) vec_st(VEC, 0, PTR)
+#else // VX
+ #define npyv__loada(PTR) vec_xl(0, PTR)
+ #define npyv__storea(PTR, VEC) vec_xst(VEC, 0, PTR)
+#endif
+
// avoid aliasing rules
#ifdef __cplusplus
template<typename T_PTR>
@@ -45,12 +58,16 @@
// load lower part
NPY_FINLINE npyv_u64 npyv__loadl(const void *ptr)
{
+#ifdef NPY_HAVE_VSX
#if defined(__clang__) && !defined(__IBMC__)
// vec_promote doesn't support doubleword on clang
return npyv_setall_u64(*npyv__ptr2u64(ptr));
#else
return vec_promote(*npyv__ptr2u64(ptr), 0);
#endif
+#else // VX
+ return vec_load_len((const unsigned long long*)ptr, 7);
+#endif
}
// store lower part
#define npyv__storel(PTR, VEC) \
@@ -62,11 +79,11 @@ NPY_FINLINE npyv_u64 npyv__loadl(const void *ptr)
/****************************
* load/store
****************************/
-#define NPYV_IMPL_VSX_MEM(SFX, DW_CAST) \
+#define NPYV_IMPL_VEC_MEM(SFX, DW_CAST) \
NPY_FINLINE npyv_##SFX npyv_load_##SFX(const npyv_lanetype_##SFX *ptr) \
{ return (npyv_##SFX)npyv__load(npyv_##SFX, (const npyv_lanetype_##DW_CAST*)ptr); } \
NPY_FINLINE npyv_##SFX npyv_loada_##SFX(const npyv_lanetype_##SFX *ptr) \
- { return (npyv_##SFX)vec_ld(0, (const npyv_lanetype_u32*)ptr); } \
+ { return (npyv_##SFX)npyv__loada((const npyv_lanetype_u32*)ptr); } \
NPY_FINLINE npyv_##SFX npyv_loads_##SFX(const npyv_lanetype_##SFX *ptr) \
{ return npyv_loada_##SFX(ptr); } \
NPY_FINLINE npyv_##SFX npyv_loadl_##SFX(const npyv_lanetype_##SFX *ptr) \
@@ -74,7 +91,7 @@ NPY_FINLINE npyv_u64 npyv__loadl(const void *ptr)
NPY_FINLINE void npyv_store_##SFX(npyv_lanetype_##SFX *ptr, npyv_##SFX vec) \
{ npyv__store((npyv_lanetype_##DW_CAST*)ptr, (npyv_##DW_CAST)vec); } \
NPY_FINLINE void npyv_storea_##SFX(npyv_lanetype_##SFX *ptr, npyv_##SFX vec) \
- { vec_st((npyv_u32)vec, 0, (npyv_lanetype_u32*)ptr); } \
+ { npyv__storea((npyv_lanetype_##DW_CAST*)ptr, (npyv_##DW_CAST)vec); } \
NPY_FINLINE void npyv_stores_##SFX(npyv_lanetype_##SFX *ptr, npyv_##SFX vec) \
{ npyv_storea_##SFX(ptr, vec); } \
NPY_FINLINE void npyv_storel_##SFX(npyv_lanetype_##SFX *ptr, npyv_##SFX vec) \
@@ -82,16 +99,18 @@ NPY_FINLINE npyv_u64 npyv__loadl(const void *ptr)
NPY_FINLINE void npyv_storeh_##SFX(npyv_lanetype_##SFX *ptr, npyv_##SFX vec) \
{ npyv__storeh(ptr, vec); }
-NPYV_IMPL_VSX_MEM(u8, u8)
-NPYV_IMPL_VSX_MEM(s8, s8)
-NPYV_IMPL_VSX_MEM(u16, u16)
-NPYV_IMPL_VSX_MEM(s16, s16)
-NPYV_IMPL_VSX_MEM(u32, u32)
-NPYV_IMPL_VSX_MEM(s32, s32)
-NPYV_IMPL_VSX_MEM(u64, f64)
-NPYV_IMPL_VSX_MEM(s64, f64)
-NPYV_IMPL_VSX_MEM(f32, f32)
-NPYV_IMPL_VSX_MEM(f64, f64)
+NPYV_IMPL_VEC_MEM(u8, u8)
+NPYV_IMPL_VEC_MEM(s8, s8)
+NPYV_IMPL_VEC_MEM(u16, u16)
+NPYV_IMPL_VEC_MEM(s16, s16)
+NPYV_IMPL_VEC_MEM(u32, u32)
+NPYV_IMPL_VEC_MEM(s32, s32)
+NPYV_IMPL_VEC_MEM(u64, f64)
+NPYV_IMPL_VEC_MEM(s64, f64)
+#if NPY_SIMD_F32
+NPYV_IMPL_VEC_MEM(f32, f32)
+#endif
+NPYV_IMPL_VEC_MEM(f64, f64)
/***************************
* Non-contiguous Load
@@ -106,8 +125,10 @@ NPY_FINLINE npyv_u32 npyv_loadn_u32(const npy_uint32 *ptr, npy_intp stride)
}
NPY_FINLINE npyv_s32 npyv_loadn_s32(const npy_int32 *ptr, npy_intp stride)
{ return (npyv_s32)npyv_loadn_u32((const npy_uint32*)ptr, stride); }
+#if NPY_SIMD_F32
NPY_FINLINE npyv_f32 npyv_loadn_f32(const float *ptr, npy_intp stride)
{ return (npyv_f32)npyv_loadn_u32((const npy_uint32*)ptr, stride); }
+#endif
//// 64
NPY_FINLINE npyv_u64 npyv_loadn_u64(const npy_uint64 *ptr, npy_intp stride)
{ return npyv_set_u64(ptr[0], ptr[stride]); }
@@ -128,8 +149,10 @@ NPY_FINLINE void npyv_storen_u32(npy_uint32 *ptr, npy_intp stride, npyv_u32 a)
}
NPY_FINLINE void npyv_storen_s32(npy_int32 *ptr, npy_intp stride, npyv_s32 a)
{ npyv_storen_u32((npy_uint32*)ptr, stride, (npyv_u32)a); }
+#if NPY_SIMD_F32
NPY_FINLINE void npyv_storen_f32(float *ptr, npy_intp stride, npyv_f32 a)
{ npyv_storen_u32((npy_uint32*)ptr, stride, (npyv_u32)a); }
+#endif
//// 64
NPY_FINLINE void npyv_storen_u64(npy_uint64 *ptr, npy_intp stride, npyv_u64 a)
{
@@ -149,6 +172,14 @@ NPY_FINLINE npyv_s32 npyv_load_till_s32(const npy_int32 *ptr, npy_uintp nlane, n
{
assert(nlane > 0);
npyv_s32 vfill = npyv_setall_s32(fill);
+#ifdef NPY_HAVE_VX
+ const unsigned blane = (unsigned short)nlane;
+ const npyv_u32 steps = npyv_set_u32(0, 1, 2, 3);
+ const npyv_u32 vlane = npyv_setall_u32((unsigned)blane);
+ const npyv_b32 mask = vec_cmpgt(vlane, steps);
+ npyv_s32 a = vec_load_len(ptr, blane*4-1);
+ return vec_sel(vfill, a, mask);
+#else
switch(nlane) {
case 1:
return vec_insert(ptr[0], vfill, 0);
@@ -164,10 +195,18 @@ NPY_FINLINE npyv_s32 npyv_load_till_s32(const npy_int32 *ptr, npy_uintp nlane, n
default:
return npyv_load_s32(ptr);
}
+#endif
}
// fill zero to rest lanes
NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32 *ptr, npy_uintp nlane)
-{ return npyv_load_till_s32(ptr, nlane, 0); }
+{
+#ifdef NPY_HAVE_VX
+ unsigned blane = ((unsigned short)nlane)*4 - 1;
+ return vec_load_len(ptr, blane);
+#else
+ return npyv_load_till_s32(ptr, nlane, 0);
+#endif
+}
//// 64
NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, npy_int64 fill)
{
@@ -179,7 +218,14 @@ NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, n
}
// fill zero to rest lanes
NPY_FINLINE npyv_s64 npyv_load_tillz_s64(const npy_int64 *ptr, npy_uintp nlane)
-{ return npyv_load_till_s64(ptr, nlane, 0); }
+{
+#ifdef NPY_HAVE_VX
+ unsigned blane = (unsigned short)nlane;
+ return vec_load_len((const signed long long*)ptr, blane*8-1);
+#else
+ return npyv_load_till_s64(ptr, nlane, 0);
+#endif
+}
/*********************************
* Non-contiguous partial load
*********************************/
@@ -226,6 +272,10 @@ NPY_FINLINE npyv_s64 npyv_loadn_tillz_s64(const npy_int64 *ptr, npy_intp stride,
NPY_FINLINE void npyv_store_till_s32(npy_int32 *ptr, npy_uintp nlane, npyv_s32 a)
{
assert(nlane > 0);
+#ifdef NPY_HAVE_VX
+ unsigned blane = (unsigned short)nlane;
+ vec_store_len(a, ptr, blane*4-1);
+#else
switch(nlane) {
case 1:
*ptr = vec_extract(a, 0);
@@ -240,16 +290,22 @@ NPY_FINLINE void npyv_store_till_s32(npy_int32 *ptr, npy_uintp nlane, npyv_s32 a
default:
npyv_store_s32(ptr, a);
}
+#endif
}
//// 64
NPY_FINLINE void npyv_store_till_s64(npy_int64 *ptr, npy_uintp nlane, npyv_s64 a)
{
assert(nlane > 0);
+#ifdef NPY_HAVE_VX
+ unsigned blane = (unsigned short)nlane;
+ vec_store_len(a, (signed long long*)ptr, blane*8-1);
+#else
if (nlane == 1) {
npyv_storel_s64(ptr, a);
return;
}
npyv_store_s64(ptr, a);
+#endif
}
/*********************************
* Non-contiguous partial store
@@ -283,7 +339,7 @@ NPY_FINLINE void npyv_storen_till_s64(npy_int64 *ptr, npy_intp stride, npy_uintp
/*****************************************************************
* Implement partial load/store for u32/f32/u64/f64... via casting
*****************************************************************/
-#define NPYV_IMPL_VSX_REST_PARTIAL_TYPES(F_SFX, T_SFX) \
+#define NPYV_IMPL_VEC_REST_PARTIAL_TYPES(F_SFX, T_SFX) \
NPY_FINLINE npyv_##F_SFX npyv_load_till_##F_SFX \
(const npyv_lanetype_##F_SFX *ptr, npy_uintp nlane, npyv_lanetype_##F_SFX fill) \
{ \
@@ -338,39 +394,47 @@ NPY_FINLINE void npyv_storen_till_s64(npy_int64 *ptr, npy_intp stride, npy_uintp
); \
}
-NPYV_IMPL_VSX_REST_PARTIAL_TYPES(u32, s32)
-NPYV_IMPL_VSX_REST_PARTIAL_TYPES(f32, s32)
-NPYV_IMPL_VSX_REST_PARTIAL_TYPES(u64, s64)
-NPYV_IMPL_VSX_REST_PARTIAL_TYPES(f64, s64)
+NPYV_IMPL_VEC_REST_PARTIAL_TYPES(u32, s32)
+#if NPY_SIMD_F32
+NPYV_IMPL_VEC_REST_PARTIAL_TYPES(f32, s32)
+#endif
+NPYV_IMPL_VEC_REST_PARTIAL_TYPES(u64, s64)
+NPYV_IMPL_VEC_REST_PARTIAL_TYPES(f64, s64)
/*********************************
* Lookup table
*********************************/
// uses vector as indexes into a table
// that contains 32 elements of float32.
-NPY_FINLINE npyv_f32 npyv_lut32_f32(const float *table, npyv_u32 idx)
+NPY_FINLINE npyv_u32 npyv_lut32_u32(const npy_uint32 *table, npyv_u32 idx)
{
const unsigned i0 = vec_extract(idx, 0);
const unsigned i1 = vec_extract(idx, 1);
const unsigned i2 = vec_extract(idx, 2);
const unsigned i3 = vec_extract(idx, 3);
- npyv_f32 r = vec_promote(table[i0], 0);
+ npyv_u32 r = vec_promote(table[i0], 0);
r = vec_insert(table[i1], r, 1);
r = vec_insert(table[i2], r, 2);
r = vec_insert(table[i3], r, 3);
return r;
}
-NPY_FINLINE npyv_u32 npyv_lut32_u32(const npy_uint32 *table, npyv_u32 idx)
-{ return npyv_reinterpret_u32_f32(npyv_lut32_f32((const float*)table, idx)); }
NPY_FINLINE npyv_s32 npyv_lut32_s32(const npy_int32 *table, npyv_u32 idx)
-{ return npyv_reinterpret_s32_f32(npyv_lut32_f32((const float*)table, idx)); }
-
+{ return (npyv_s32)npyv_lut32_u32((const npy_uint32*)table, idx); }
+#if NPY_SIMD_F32
+ NPY_FINLINE npyv_f32 npyv_lut32_f32(const float *table, npyv_u32 idx)
+ { return (npyv_f32)npyv_lut32_u32((const npy_uint32*)table, idx); }
+#endif
// uses vector as indexes into a table
// that contains 16 elements of float64.
NPY_FINLINE npyv_f64 npyv_lut16_f64(const double *table, npyv_u64 idx)
{
+#ifdef NPY_HAVE_VX
+ const unsigned i0 = vec_extract((npyv_u32)idx, 1);
+ const unsigned i1 = vec_extract((npyv_u32)idx, 3);
+#else
const unsigned i0 = vec_extract((npyv_u32)idx, 0);
const unsigned i1 = vec_extract((npyv_u32)idx, 2);
+#endif
npyv_f64 r = vec_promote(table[i0], 0);
r = vec_insert(table[i1], r, 1);
return r;
@@ -380,4 +444,4 @@ NPY_FINLINE npyv_u64 npyv_lut16_u64(const npy_uint64 *table, npyv_u64 idx)
NPY_FINLINE npyv_s64 npyv_lut16_s64(const npy_int64 *table, npyv_u64 idx)
{ return npyv_reinterpret_s64_f64(npyv_lut16_f64((const double*)table, idx)); }
-#endif // _NPY_SIMD_VSX_MEMORY_H
+#endif // _NPY_SIMD_VEC_MEMORY_H
diff --git a/numpy/core/src/common/simd/vsx/misc.h b/numpy/core/src/common/simd/vec/misc.h
index f7a0cdd5c..c4f35cfc0 100644
--- a/numpy/core/src/common/simd/vsx/misc.h
+++ b/numpy/core/src/common/simd/vec/misc.h
@@ -2,8 +2,8 @@
#error "Not a standalone header"
#endif
-#ifndef _NPY_SIMD_VSX_MISC_H
-#define _NPY_SIMD_VSX_MISC_H
+#ifndef _NPY_SIMD_VEC_MISC_H
+#define _NPY_SIMD_VEC_MISC_H
// vector with zero lanes
#define npyv_zero_u8() ((npyv_u8) npyv_setall_s32(0))
@@ -14,26 +14,30 @@
#define npyv_zero_s32() npyv_setall_s32(0)
#define npyv_zero_u64() ((npyv_u64) npyv_setall_s32(0))
#define npyv_zero_s64() ((npyv_s64) npyv_setall_s32(0))
-#define npyv_zero_f32() npyv_setall_f32(0.0f)
+#if NPY_SIMD_F32
+ #define npyv_zero_f32() npyv_setall_f32(0.0f)
+#endif
#define npyv_zero_f64() npyv_setall_f64(0.0)
// vector with a specific value set to all lanes
// the safest way to generate vsplti* and vsplt* instructions
-#define NPYV_IMPL_VSX_SPLTB(T_VEC, V) ((T_VEC){V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V})
-#define NPYV_IMPL_VSX_SPLTH(T_VEC, V) ((T_VEC){V, V, V, V, V, V, V, V})
-#define NPYV_IMPL_VSX_SPLTW(T_VEC, V) ((T_VEC){V, V, V, V})
-#define NPYV_IMPL_VSX_SPLTD(T_VEC, V) ((T_VEC){V, V})
-
-#define npyv_setall_u8(VAL) NPYV_IMPL_VSX_SPLTB(npyv_u8, (unsigned char)VAL)
-#define npyv_setall_s8(VAL) NPYV_IMPL_VSX_SPLTB(npyv_s8, (signed char)VAL)
-#define npyv_setall_u16(VAL) NPYV_IMPL_VSX_SPLTH(npyv_u16, (unsigned short)VAL)
-#define npyv_setall_s16(VAL) NPYV_IMPL_VSX_SPLTH(npyv_s16, (short)VAL)
-#define npyv_setall_u32(VAL) NPYV_IMPL_VSX_SPLTW(npyv_u32, (unsigned int)VAL)
-#define npyv_setall_s32(VAL) NPYV_IMPL_VSX_SPLTW(npyv_s32, (int)VAL)
-#define npyv_setall_f32(VAL) NPYV_IMPL_VSX_SPLTW(npyv_f32, VAL)
-#define npyv_setall_u64(VAL) NPYV_IMPL_VSX_SPLTD(npyv_u64, (npy_uint64)VAL)
-#define npyv_setall_s64(VAL) NPYV_IMPL_VSX_SPLTD(npyv_s64, (npy_int64)VAL)
-#define npyv_setall_f64(VAL) NPYV_IMPL_VSX_SPLTD(npyv_f64, VAL)
+#define NPYV_IMPL_VEC_SPLTB(T_VEC, V) ((T_VEC){V, V, V, V, V, V, V, V, V, V, V, V, V, V, V, V})
+#define NPYV_IMPL_VEC_SPLTH(T_VEC, V) ((T_VEC){V, V, V, V, V, V, V, V})
+#define NPYV_IMPL_VEC_SPLTW(T_VEC, V) ((T_VEC){V, V, V, V})
+#define NPYV_IMPL_VEC_SPLTD(T_VEC, V) ((T_VEC){V, V})
+
+#define npyv_setall_u8(VAL) NPYV_IMPL_VEC_SPLTB(npyv_u8, (unsigned char)VAL)
+#define npyv_setall_s8(VAL) NPYV_IMPL_VEC_SPLTB(npyv_s8, (signed char)VAL)
+#define npyv_setall_u16(VAL) NPYV_IMPL_VEC_SPLTH(npyv_u16, (unsigned short)VAL)
+#define npyv_setall_s16(VAL) NPYV_IMPL_VEC_SPLTH(npyv_s16, (short)VAL)
+#define npyv_setall_u32(VAL) NPYV_IMPL_VEC_SPLTW(npyv_u32, (unsigned int)VAL)
+#define npyv_setall_s32(VAL) NPYV_IMPL_VEC_SPLTW(npyv_s32, (int)VAL)
+#if NPY_SIMD_F32
+ #define npyv_setall_f32(VAL) NPYV_IMPL_VEC_SPLTW(npyv_f32, VAL)
+#endif
+#define npyv_setall_u64(VAL) NPYV_IMPL_VEC_SPLTD(npyv_u64, (npy_uint64)VAL)
+#define npyv_setall_s64(VAL) NPYV_IMPL_VEC_SPLTD(npyv_s64, (npy_int64)VAL)
+#define npyv_setall_f64(VAL) NPYV_IMPL_VEC_SPLTD(npyv_f64, VAL)
// vector with specific values set to each lane and
// set a specific value to all remained lanes
@@ -45,7 +49,9 @@
#define npyv_setf_s32(FILL, ...) ((npyv_s32){NPYV__SET_FILL_4(int, FILL, __VA_ARGS__)})
#define npyv_setf_u64(FILL, ...) ((npyv_u64){NPYV__SET_FILL_2(npy_int64, FILL, __VA_ARGS__)})
#define npyv_setf_s64(FILL, ...) ((npyv_s64){NPYV__SET_FILL_2(npy_int64, FILL, __VA_ARGS__)})
-#define npyv_setf_f32(FILL, ...) ((npyv_f32){NPYV__SET_FILL_4(float, FILL, __VA_ARGS__)})
+#if NPY_SIMD_F32
+ #define npyv_setf_f32(FILL, ...) ((npyv_f32){NPYV__SET_FILL_4(float, FILL, __VA_ARGS__)})
+#endif
#define npyv_setf_f64(FILL, ...) ((npyv_f64){NPYV__SET_FILL_2(double, FILL, __VA_ARGS__)})
// vector with specific values set to each lane and
@@ -58,7 +64,9 @@
#define npyv_set_s32(...) npyv_setf_s32(0, __VA_ARGS__)
#define npyv_set_u64(...) npyv_setf_u64(0, __VA_ARGS__)
#define npyv_set_s64(...) npyv_setf_s64(0, __VA_ARGS__)
-#define npyv_set_f32(...) npyv_setf_f32(0, __VA_ARGS__)
+#if NPY_SIMD_F32
+ #define npyv_set_f32(...) npyv_setf_f32(0, __VA_ARGS__)
+#endif
#define npyv_set_f64(...) npyv_setf_f64(0, __VA_ARGS__)
// Per lane select
@@ -70,7 +78,9 @@
#define npyv_select_s32 npyv_select_u8
#define npyv_select_u64 npyv_select_u8
#define npyv_select_s64 npyv_select_u8
-#define npyv_select_f32 npyv_select_u8
+#if NPY_SIMD_F32
+ #define npyv_select_f32 npyv_select_u8
+#endif
#define npyv_select_f64 npyv_select_u8
// Reinterpret
@@ -82,7 +92,9 @@
#define npyv_reinterpret_u8_s32 npyv_reinterpret_u8_s8
#define npyv_reinterpret_u8_u64 npyv_reinterpret_u8_s8
#define npyv_reinterpret_u8_s64 npyv_reinterpret_u8_s8
-#define npyv_reinterpret_u8_f32 npyv_reinterpret_u8_s8
+#if NPY_SIMD_F32
+ #define npyv_reinterpret_u8_f32 npyv_reinterpret_u8_s8
+#endif
#define npyv_reinterpret_u8_f64 npyv_reinterpret_u8_s8
#define npyv_reinterpret_s8_s8(X) X
@@ -93,7 +105,9 @@
#define npyv_reinterpret_s8_s32 npyv_reinterpret_s8_u8
#define npyv_reinterpret_s8_u64 npyv_reinterpret_s8_u8
#define npyv_reinterpret_s8_s64 npyv_reinterpret_s8_u8
-#define npyv_reinterpret_s8_f32 npyv_reinterpret_s8_u8
+#if NPY_SIMD_F32
+ #define npyv_reinterpret_s8_f32 npyv_reinterpret_s8_u8
+#endif
#define npyv_reinterpret_s8_f64 npyv_reinterpret_s8_u8
#define npyv_reinterpret_u16_u16(X) X
@@ -104,7 +118,9 @@
#define npyv_reinterpret_u16_s32 npyv_reinterpret_u16_u8
#define npyv_reinterpret_u16_u64 npyv_reinterpret_u16_u8
#define npyv_reinterpret_u16_s64 npyv_reinterpret_u16_u8
-#define npyv_reinterpret_u16_f32 npyv_reinterpret_u16_u8
+#if NPY_SIMD_F32
+ #define npyv_reinterpret_u16_f32 npyv_reinterpret_u16_u8
+#endif
#define npyv_reinterpret_u16_f64 npyv_reinterpret_u16_u8
#define npyv_reinterpret_s16_s16(X) X
@@ -115,7 +131,9 @@
#define npyv_reinterpret_s16_s32 npyv_reinterpret_s16_u8
#define npyv_reinterpret_s16_u64 npyv_reinterpret_s16_u8
#define npyv_reinterpret_s16_s64 npyv_reinterpret_s16_u8
-#define npyv_reinterpret_s16_f32 npyv_reinterpret_s16_u8
+#if NPY_SIMD_F32
+ #define npyv_reinterpret_s16_f32 npyv_reinterpret_s16_u8
+#endif
#define npyv_reinterpret_s16_f64 npyv_reinterpret_s16_u8
#define npyv_reinterpret_u32_u32(X) X
@@ -126,7 +144,9 @@
#define npyv_reinterpret_u32_s32 npyv_reinterpret_u32_u8
#define npyv_reinterpret_u32_u64 npyv_reinterpret_u32_u8
#define npyv_reinterpret_u32_s64 npyv_reinterpret_u32_u8
-#define npyv_reinterpret_u32_f32 npyv_reinterpret_u32_u8
+#if NPY_SIMD_F32
+ #define npyv_reinterpret_u32_f32 npyv_reinterpret_u32_u8
+#endif
#define npyv_reinterpret_u32_f64 npyv_reinterpret_u32_u8
#define npyv_reinterpret_s32_s32(X) X
@@ -137,7 +157,9 @@
#define npyv_reinterpret_s32_u32 npyv_reinterpret_s32_u8
#define npyv_reinterpret_s32_u64 npyv_reinterpret_s32_u8
#define npyv_reinterpret_s32_s64 npyv_reinterpret_s32_u8
-#define npyv_reinterpret_s32_f32 npyv_reinterpret_s32_u8
+#if NPY_SIMD_F32
+ #define npyv_reinterpret_s32_f32 npyv_reinterpret_s32_u8
+#endif
#define npyv_reinterpret_s32_f64 npyv_reinterpret_s32_u8
#define npyv_reinterpret_u64_u64(X) X
@@ -148,7 +170,9 @@
#define npyv_reinterpret_u64_u32 npyv_reinterpret_u64_u8
#define npyv_reinterpret_u64_s32 npyv_reinterpret_u64_u8
#define npyv_reinterpret_u64_s64 npyv_reinterpret_u64_u8
-#define npyv_reinterpret_u64_f32 npyv_reinterpret_u64_u8
+#if NPY_SIMD_F32
+ #define npyv_reinterpret_u64_f32 npyv_reinterpret_u64_u8
+#endif
#define npyv_reinterpret_u64_f64 npyv_reinterpret_u64_u8
#define npyv_reinterpret_s64_s64(X) X
@@ -159,19 +183,23 @@
#define npyv_reinterpret_s64_u32 npyv_reinterpret_s64_u8
#define npyv_reinterpret_s64_s32 npyv_reinterpret_s64_u8
#define npyv_reinterpret_s64_u64 npyv_reinterpret_s64_u8
-#define npyv_reinterpret_s64_f32 npyv_reinterpret_s64_u8
+#if NPY_SIMD_F32
+ #define npyv_reinterpret_s64_f32 npyv_reinterpret_s64_u8
+#endif
#define npyv_reinterpret_s64_f64 npyv_reinterpret_s64_u8
-#define npyv_reinterpret_f32_f32(X) X
-#define npyv_reinterpret_f32_u8(X) ((npyv_f32)X)
-#define npyv_reinterpret_f32_s8 npyv_reinterpret_f32_u8
-#define npyv_reinterpret_f32_u16 npyv_reinterpret_f32_u8
-#define npyv_reinterpret_f32_s16 npyv_reinterpret_f32_u8
-#define npyv_reinterpret_f32_u32 npyv_reinterpret_f32_u8
-#define npyv_reinterpret_f32_s32 npyv_reinterpret_f32_u8
-#define npyv_reinterpret_f32_u64 npyv_reinterpret_f32_u8
-#define npyv_reinterpret_f32_s64 npyv_reinterpret_f32_u8
-#define npyv_reinterpret_f32_f64 npyv_reinterpret_f32_u8
+#if NPY_SIMD_F32
+ #define npyv_reinterpret_f32_f32(X) X
+ #define npyv_reinterpret_f32_u8(X) ((npyv_f32)X)
+ #define npyv_reinterpret_f32_s8 npyv_reinterpret_f32_u8
+ #define npyv_reinterpret_f32_u16 npyv_reinterpret_f32_u8
+ #define npyv_reinterpret_f32_s16 npyv_reinterpret_f32_u8
+ #define npyv_reinterpret_f32_u32 npyv_reinterpret_f32_u8
+ #define npyv_reinterpret_f32_s32 npyv_reinterpret_f32_u8
+ #define npyv_reinterpret_f32_u64 npyv_reinterpret_f32_u8
+ #define npyv_reinterpret_f32_s64 npyv_reinterpret_f32_u8
+ #define npyv_reinterpret_f32_f64 npyv_reinterpret_f32_u8
+#endif
#define npyv_reinterpret_f64_f64(X) X
#define npyv_reinterpret_f64_u8(X) ((npyv_f64)X)
@@ -182,9 +210,10 @@
#define npyv_reinterpret_f64_s32 npyv_reinterpret_f64_u8
#define npyv_reinterpret_f64_u64 npyv_reinterpret_f64_u8
#define npyv_reinterpret_f64_s64 npyv_reinterpret_f64_u8
-#define npyv_reinterpret_f64_f32 npyv_reinterpret_f64_u8
-
+#if NPY_SIMD_F32
+ #define npyv_reinterpret_f64_f32 npyv_reinterpret_f64_u8
+#endif
// Only required by AVX2/AVX512
#define npyv_cleanup() ((void)0)
-#endif // _NPY_SIMD_VSX_MISC_H
+#endif // _NPY_SIMD_VEC_MISC_H
diff --git a/numpy/core/src/common/simd/vsx/operators.h b/numpy/core/src/common/simd/vec/operators.h
index b01d85321..8b58676e7 100644
--- a/numpy/core/src/common/simd/vsx/operators.h
+++ b/numpy/core/src/common/simd/vec/operators.h
@@ -2,8 +2,8 @@
#error "Not a standalone header"
#endif
-#ifndef _NPY_SIMD_VSX_OPERATORS_H
-#define _NPY_SIMD_VSX_OPERATORS_H
+#ifndef _NPY_SIMD_VEC_OPERATORS_H
+#define _NPY_SIMD_VEC_OPERATORS_H
/***************************
* Shifting
@@ -11,11 +11,11 @@
// Left
#define npyv_shl_u16(A, C) vec_sl(A, npyv_setall_u16(C))
-#define npyv_shl_s16(A, C) vec_sl(A, npyv_setall_u16(C))
+#define npyv_shl_s16(A, C) vec_sl_s16(A, npyv_setall_u16(C))
#define npyv_shl_u32(A, C) vec_sl(A, npyv_setall_u32(C))
-#define npyv_shl_s32(A, C) vec_sl(A, npyv_setall_u32(C))
+#define npyv_shl_s32(A, C) vec_sl_s32(A, npyv_setall_u32(C))
#define npyv_shl_u64(A, C) vec_sl(A, npyv_setall_u64(C))
-#define npyv_shl_s64(A, C) vec_sl(A, npyv_setall_u64(C))
+#define npyv_shl_s64(A, C) vec_sl_s64(A, npyv_setall_u64(C))
// Left by an immediate constant
#define npyv_shli_u16 npyv_shl_u16
@@ -27,11 +27,11 @@
// Right
#define npyv_shr_u16(A, C) vec_sr(A, npyv_setall_u16(C))
-#define npyv_shr_s16(A, C) vec_sra(A, npyv_setall_u16(C))
+#define npyv_shr_s16(A, C) vec_sra_s16(A, npyv_setall_u16(C))
#define npyv_shr_u32(A, C) vec_sr(A, npyv_setall_u32(C))
-#define npyv_shr_s32(A, C) vec_sra(A, npyv_setall_u32(C))
+#define npyv_shr_s32(A, C) vec_sra_s32(A, npyv_setall_u32(C))
#define npyv_shr_u64(A, C) vec_sr(A, npyv_setall_u64(C))
-#define npyv_shr_s64(A, C) vec_sra(A, npyv_setall_u64(C))
+#define npyv_shr_s64(A, C) vec_sra_s64(A, npyv_setall_u64(C))
// Right by an immediate constant
#define npyv_shri_u16 npyv_shr_u16
@@ -44,15 +44,15 @@
/***************************
* Logical
***************************/
-#define NPYV_IMPL_VSX_BIN_CAST(INTRIN, SFX, CAST) \
+#define NPYV_IMPL_VEC_BIN_CAST(INTRIN, SFX, CAST) \
NPY_FINLINE npyv_##SFX npyv_##INTRIN##_##SFX(npyv_##SFX a, npyv_##SFX b) \
{ return (npyv_##SFX)vec_##INTRIN((CAST)a, (CAST)b); }
// Up to GCC 6 logical intrinsics don't support bool long long
#if defined(__GNUC__) && __GNUC__ <= 6
- #define NPYV_IMPL_VSX_BIN_B64(INTRIN) NPYV_IMPL_VSX_BIN_CAST(INTRIN, b64, npyv_u64)
+ #define NPYV_IMPL_VEC_BIN_B64(INTRIN) NPYV_IMPL_VEC_BIN_CAST(INTRIN, b64, npyv_u64)
#else
- #define NPYV_IMPL_VSX_BIN_B64(INTRIN) NPYV_IMPL_VSX_BIN_CAST(INTRIN, b64, npyv_b64)
+ #define NPYV_IMPL_VEC_BIN_B64(INTRIN) NPYV_IMPL_VEC_BIN_CAST(INTRIN, b64, npyv_b64)
#endif
// AND
#define npyv_and_u8 vec_and
@@ -63,12 +63,14 @@
#define npyv_and_s32 vec_and
#define npyv_and_u64 vec_and
#define npyv_and_s64 vec_and
-#define npyv_and_f32 vec_and
+#if NPY_SIMD_F32
+ #define npyv_and_f32 vec_and
+#endif
#define npyv_and_f64 vec_and
#define npyv_and_b8 vec_and
#define npyv_and_b16 vec_and
#define npyv_and_b32 vec_and
-NPYV_IMPL_VSX_BIN_B64(and)
+NPYV_IMPL_VEC_BIN_B64(and)
// OR
#define npyv_or_u8 vec_or
@@ -79,12 +81,14 @@ NPYV_IMPL_VSX_BIN_B64(and)
#define npyv_or_s32 vec_or
#define npyv_or_u64 vec_or
#define npyv_or_s64 vec_or
-#define npyv_or_f32 vec_or
+#if NPY_SIMD_F32
+ #define npyv_or_f32 vec_or
+#endif
#define npyv_or_f64 vec_or
#define npyv_or_b8 vec_or
#define npyv_or_b16 vec_or
#define npyv_or_b32 vec_or
-NPYV_IMPL_VSX_BIN_B64(or)
+NPYV_IMPL_VEC_BIN_B64(or)
// XOR
#define npyv_xor_u8 vec_xor
@@ -95,16 +99,18 @@ NPYV_IMPL_VSX_BIN_B64(or)
#define npyv_xor_s32 vec_xor
#define npyv_xor_u64 vec_xor
#define npyv_xor_s64 vec_xor
-#define npyv_xor_f32 vec_xor
+#if NPY_SIMD_F32
+ #define npyv_xor_f32 vec_xor
+#endif
#define npyv_xor_f64 vec_xor
#define npyv_xor_b8 vec_xor
#define npyv_xor_b16 vec_xor
#define npyv_xor_b32 vec_xor
-NPYV_IMPL_VSX_BIN_B64(xor)
+NPYV_IMPL_VEC_BIN_B64(xor)
// NOT
// note: we implement npyv_not_b*(boolean types) for internal use*/
-#define NPYV_IMPL_VSX_NOT_INT(VEC_LEN) \
+#define NPYV_IMPL_VEC_NOT_INT(VEC_LEN) \
NPY_FINLINE npyv_u##VEC_LEN npyv_not_u##VEC_LEN(npyv_u##VEC_LEN a) \
{ return vec_nor(a, a); } \
NPY_FINLINE npyv_s##VEC_LEN npyv_not_s##VEC_LEN(npyv_s##VEC_LEN a) \
@@ -112,13 +118,13 @@ NPYV_IMPL_VSX_BIN_B64(xor)
NPY_FINLINE npyv_b##VEC_LEN npyv_not_b##VEC_LEN(npyv_b##VEC_LEN a) \
{ return vec_nor(a, a); }
-NPYV_IMPL_VSX_NOT_INT(8)
-NPYV_IMPL_VSX_NOT_INT(16)
-NPYV_IMPL_VSX_NOT_INT(32)
+NPYV_IMPL_VEC_NOT_INT(8)
+NPYV_IMPL_VEC_NOT_INT(16)
+NPYV_IMPL_VEC_NOT_INT(32)
-// up to gcc5 vec_nor doesn't support bool long long
-#if defined(__GNUC__) && __GNUC__ > 5
- NPYV_IMPL_VSX_NOT_INT(64)
+// on ppc64, up to gcc5 vec_nor doesn't support bool long long
+#if defined(NPY_HAVE_VSX) && defined(__GNUC__) && __GNUC__ > 5
+ NPYV_IMPL_VEC_NOT_INT(64)
#else
NPY_FINLINE npyv_u64 npyv_not_u64(npyv_u64 a)
{ return vec_nor(a, a); }
@@ -128,16 +134,23 @@ NPYV_IMPL_VSX_NOT_INT(32)
{ return (npyv_b64)vec_nor((npyv_u64)a, (npyv_u64)a); }
#endif
-NPY_FINLINE npyv_f32 npyv_not_f32(npyv_f32 a)
-{ return vec_nor(a, a); }
+#if NPY_SIMD_F32
+ NPY_FINLINE npyv_f32 npyv_not_f32(npyv_f32 a)
+ { return vec_nor(a, a); }
+#endif
NPY_FINLINE npyv_f64 npyv_not_f64(npyv_f64 a)
{ return vec_nor(a, a); }
// ANDC, ORC and XNOR
#define npyv_andc_u8 vec_andc
#define npyv_andc_b8 vec_andc
-#define npyv_orc_b8 vec_orc
-#define npyv_xnor_b8 vec_eqv
+#if defined(NPY_HAVE_VXE) || defined(NPY_HAVE_VSX)
+ #define npyv_orc_b8 vec_orc
+ #define npyv_xnor_b8 vec_eqv
+#else
+ #define npyv_orc_b8(A, B) npyv_or_b8(npyv_not_b8(B), A)
+ #define npyv_xnor_b8(A, B) npyv_not_b8(npyv_xor_b8(B, A))
+#endif
/***************************
* Comparison
@@ -152,7 +165,9 @@ NPY_FINLINE npyv_f64 npyv_not_f64(npyv_f64 a)
#define npyv_cmpeq_s32 vec_cmpeq
#define npyv_cmpeq_u64 vec_cmpeq
#define npyv_cmpeq_s64 vec_cmpeq
-#define npyv_cmpeq_f32 vec_cmpeq
+#if NPY_SIMD_F32
+ #define npyv_cmpeq_f32 vec_cmpeq
+#endif
#define npyv_cmpeq_f64 vec_cmpeq
// Int Not Equal
@@ -177,7 +192,9 @@ NPY_FINLINE npyv_f64 npyv_not_f64(npyv_f64 a)
#define npyv_cmpneq_s32(A, B) npyv_not_b32(vec_cmpeq(A, B))
#define npyv_cmpneq_u64(A, B) npyv_not_b64(vec_cmpeq(A, B))
#define npyv_cmpneq_s64(A, B) npyv_not_b64(vec_cmpeq(A, B))
- #define npyv_cmpneq_f32(A, B) npyv_not_b32(vec_cmpeq(A, B))
+ #if NPY_SIMD_F32
+ #define npyv_cmpneq_f32(A, B) npyv_not_b32(vec_cmpeq(A, B))
+ #endif
#define npyv_cmpneq_f64(A, B) npyv_not_b64(vec_cmpeq(A, B))
#endif
@@ -190,12 +207,14 @@ NPY_FINLINE npyv_f64 npyv_not_f64(npyv_f64 a)
#define npyv_cmpgt_s32 vec_cmpgt
#define npyv_cmpgt_u64 vec_cmpgt
#define npyv_cmpgt_s64 vec_cmpgt
-#define npyv_cmpgt_f32 vec_cmpgt
+#if NPY_SIMD_F32
+ #define npyv_cmpgt_f32 vec_cmpgt
+#endif
#define npyv_cmpgt_f64 vec_cmpgt
// Greater than or equal
-// up to gcc5 vec_cmpge only supports single and double precision
-#if defined(__GNUC__) && __GNUC__ > 5
+// On ppc64le, up to gcc5 vec_cmpge only supports single and double precision
+#if defined(NPY_HAVE_VX) || (defined(__GNUC__) && __GNUC__ > 5)
#define npyv_cmpge_u8 vec_cmpge
#define npyv_cmpge_s8 vec_cmpge
#define npyv_cmpge_u16 vec_cmpge
@@ -214,7 +233,9 @@ NPY_FINLINE npyv_f64 npyv_not_f64(npyv_f64 a)
#define npyv_cmpge_u64(A, B) npyv_not_b64(vec_cmpgt(B, A))
#define npyv_cmpge_s64(A, B) npyv_not_b64(vec_cmpgt(B, A))
#endif
-#define npyv_cmpge_f32 vec_cmpge
+#if NPY_SIMD_F32
+ #define npyv_cmpge_f32 vec_cmpge
+#endif
#define npyv_cmpge_f64 vec_cmpge
// Less than
@@ -226,7 +247,9 @@ NPY_FINLINE npyv_f64 npyv_not_f64(npyv_f64 a)
#define npyv_cmplt_s32(A, B) npyv_cmpgt_s32(B, A)
#define npyv_cmplt_u64(A, B) npyv_cmpgt_u64(B, A)
#define npyv_cmplt_s64(A, B) npyv_cmpgt_s64(B, A)
-#define npyv_cmplt_f32(A, B) npyv_cmpgt_f32(B, A)
+#if NPY_SIMD_F32
+ #define npyv_cmplt_f32(A, B) npyv_cmpgt_f32(B, A)
+#endif
#define npyv_cmplt_f64(A, B) npyv_cmpgt_f64(B, A)
// Less than or equal
@@ -238,13 +261,17 @@ NPY_FINLINE npyv_f64 npyv_not_f64(npyv_f64 a)
#define npyv_cmple_s32(A, B) npyv_cmpge_s32(B, A)
#define npyv_cmple_u64(A, B) npyv_cmpge_u64(B, A)
#define npyv_cmple_s64(A, B) npyv_cmpge_s64(B, A)
-#define npyv_cmple_f32(A, B) npyv_cmpge_f32(B, A)
+#if NPY_SIMD_F32
+ #define npyv_cmple_f32(A, B) npyv_cmpge_f32(B, A)
+#endif
#define npyv_cmple_f64(A, B) npyv_cmpge_f64(B, A)
// check special cases
-NPY_FINLINE npyv_b32 npyv_notnan_f32(npyv_f32 a)
-{ return vec_cmpeq(a, a); }
+#if NPY_SIMD_F32
+ NPY_FINLINE npyv_b32 npyv_notnan_f32(npyv_f32 a)
+ { return vec_cmpeq(a, a); }
+#endif
NPY_FINLINE npyv_b64 npyv_notnan_f64(npyv_f64 a)
{ return vec_cmpeq(a, a); }
-#endif // _NPY_SIMD_VSX_OPERATORS_H
+#endif // _NPY_SIMD_VEC_OPERATORS_H
diff --git a/numpy/core/src/common/simd/vsx/reorder.h b/numpy/core/src/common/simd/vec/reorder.h
index 6533e5093..b60b9287d 100644
--- a/numpy/core/src/common/simd/vsx/reorder.h
+++ b/numpy/core/src/common/simd/vec/reorder.h
@@ -2,8 +2,8 @@
#error "Not a standalone header"
#endif
-#ifndef _NPY_SIMD_VSX_REORDER_H
-#define _NPY_SIMD_VSX_REORDER_H
+#ifndef _NPY_SIMD_VEC_REORDER_H
+#define _NPY_SIMD_VEC_REORDER_H
// combine lower part of two vectors
#define npyv__combinel(A, B) vec_mergeh((npyv_u64)(A), (npyv_u64)(B))
@@ -15,7 +15,9 @@
#define npyv_combinel_s32(A, B) ((npyv_s32)npyv__combinel(A, B))
#define npyv_combinel_u64 vec_mergeh
#define npyv_combinel_s64 vec_mergeh
-#define npyv_combinel_f32(A, B) ((npyv_f32)npyv__combinel(A, B))
+#if NPY_SIMD_F32
+ #define npyv_combinel_f32(A, B) ((npyv_f32)npyv__combinel(A, B))
+#endif
#define npyv_combinel_f64 vec_mergeh
// combine higher part of two vectors
@@ -28,14 +30,16 @@
#define npyv_combineh_s32(A, B) ((npyv_s32)npyv__combineh(A, B))
#define npyv_combineh_u64 vec_mergel
#define npyv_combineh_s64 vec_mergel
-#define npyv_combineh_f32(A, B) ((npyv_f32)npyv__combineh(A, B))
+#if NPY_SIMD_F32
+ #define npyv_combineh_f32(A, B) ((npyv_f32)npyv__combineh(A, B))
+#endif
#define npyv_combineh_f64 vec_mergel
/*
* combine: combine two vectors from lower and higher parts of two other vectors
* zip: interleave two vectors
*/
-#define NPYV_IMPL_VSX_COMBINE_ZIP(T_VEC, SFX) \
+#define NPYV_IMPL_VEC_COMBINE_ZIP(T_VEC, SFX) \
NPY_FINLINE T_VEC##x2 npyv_combine_##SFX(T_VEC a, T_VEC b) \
{ \
T_VEC##x2 r; \
@@ -51,16 +55,18 @@
return r; \
}
-NPYV_IMPL_VSX_COMBINE_ZIP(npyv_u8, u8)
-NPYV_IMPL_VSX_COMBINE_ZIP(npyv_s8, s8)
-NPYV_IMPL_VSX_COMBINE_ZIP(npyv_u16, u16)
-NPYV_IMPL_VSX_COMBINE_ZIP(npyv_s16, s16)
-NPYV_IMPL_VSX_COMBINE_ZIP(npyv_u32, u32)
-NPYV_IMPL_VSX_COMBINE_ZIP(npyv_s32, s32)
-NPYV_IMPL_VSX_COMBINE_ZIP(npyv_u64, u64)
-NPYV_IMPL_VSX_COMBINE_ZIP(npyv_s64, s64)
-NPYV_IMPL_VSX_COMBINE_ZIP(npyv_f32, f32)
-NPYV_IMPL_VSX_COMBINE_ZIP(npyv_f64, f64)
+NPYV_IMPL_VEC_COMBINE_ZIP(npyv_u8, u8)
+NPYV_IMPL_VEC_COMBINE_ZIP(npyv_s8, s8)
+NPYV_IMPL_VEC_COMBINE_ZIP(npyv_u16, u16)
+NPYV_IMPL_VEC_COMBINE_ZIP(npyv_s16, s16)
+NPYV_IMPL_VEC_COMBINE_ZIP(npyv_u32, u32)
+NPYV_IMPL_VEC_COMBINE_ZIP(npyv_s32, s32)
+NPYV_IMPL_VEC_COMBINE_ZIP(npyv_u64, u64)
+NPYV_IMPL_VEC_COMBINE_ZIP(npyv_s64, s64)
+#if NPY_SIMD_F32
+ NPYV_IMPL_VEC_COMBINE_ZIP(npyv_f32, f32)
+#endif
+NPYV_IMPL_VEC_COMBINE_ZIP(npyv_f64, f64)
// Reverse elements of each 64-bit lane
NPY_FINLINE npyv_u8 npyv_rev64_u8(npyv_u8 a)
@@ -100,7 +106,9 @@ NPY_FINLINE npyv_u32 npyv_rev64_u32(npyv_u32 a)
}
NPY_FINLINE npyv_s32 npyv_rev64_s32(npyv_s32 a)
{ return (npyv_s32)npyv_rev64_u32((npyv_u32)a); }
-NPY_FINLINE npyv_f32 npyv_rev64_f32(npyv_f32 a)
-{ return (npyv_f32)npyv_rev64_u32((npyv_u32)a); }
+#if NPY_SIMD_F32
+ NPY_FINLINE npyv_f32 npyv_rev64_f32(npyv_f32 a)
+ { return (npyv_f32)npyv_rev64_u32((npyv_u32)a); }
+#endif
-#endif // _NPY_SIMD_VSX_REORDER_H
+#endif // _NPY_SIMD_VEC_REORDER_H
diff --git a/numpy/core/src/common/simd/vec/utils.h b/numpy/core/src/common/simd/vec/utils.h
new file mode 100644
index 000000000..f8b28cfeb
--- /dev/null
+++ b/numpy/core/src/common/simd/vec/utils.h
@@ -0,0 +1,84 @@
+#ifndef NPY_SIMD
+ #error "Not a standalone header"
+#endif
+
+#ifndef _NPY_SIMD_VEC_UTILS_H
+#define _NPY_SIMD_VEC_UTILS_H
+
+// the following intrinsics may not some|all by zvector API on gcc/clang
+#ifdef NPY_HAVE_VX
+ #ifndef vec_neg
+ #define vec_neg(a) (-(a)) // Vector Negate
+ #endif
+ #ifndef vec_add
+ #define vec_add(a, b) ((a) + (b)) // Vector Add
+ #endif
+ #ifndef vec_sub
+ #define vec_sub(a, b) ((a) - (b)) // Vector Subtract
+ #endif
+ #ifndef vec_mul
+ #define vec_mul(a, b) ((a) * (b)) // Vector Multiply
+ #endif
+ #ifndef vec_div
+ #define vec_div(a, b) ((a) / (b)) // Vector Divide
+ #endif
+ #ifndef vec_neg
+ #define vec_neg(a) (-(a))
+ #endif
+ #ifndef vec_and
+ #define vec_and(a, b) ((a) & (b)) // Vector AND
+ #endif
+ #ifndef vec_or
+ #define vec_or(a, b) ((a) | (b)) // Vector OR
+ #endif
+ #ifndef vec_xor
+ #define vec_xor(a, b) ((a) ^ (b)) // Vector XOR
+ #endif
+ #ifndef vec_sl
+ #define vec_sl(a, b) ((a) << (b)) // Vector Shift Left
+ #endif
+ #ifndef vec_sra
+ #define vec_sra(a, b) ((a) >> (b)) // Vector Shift Right
+ #endif
+ #ifndef vec_sr
+ #define vec_sr(a, b) ((a) >> (b)) // Vector Shift Right Algebraic
+ #endif
+ #ifndef vec_slo
+ #define vec_slo(a, b) vec_slb(a, (b) << 64) // Vector Shift Left by Octet
+ #endif
+ #ifndef vec_sro
+ #define vec_sro(a, b) vec_srb(a, (b) << 64) // Vector Shift Right by Octet
+ #endif
+ // vec_doublee maps to wrong intrin "vfll".
+ // see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100871
+ #if defined(__GNUC__) && !defined(__clang__)
+ #define npyv_doublee __builtin_s390_vflls
+ #else
+ #define npyv_doublee vec_doublee
+ #endif
+ // compatibility with vsx
+ #ifndef vec_vbpermq
+ #define vec_vbpermq vec_bperm_u128
+ #endif
+ // zvector requires second operand to signed while vsx api expected to be
+ // unsigned, the following macros are set to remove this conflict
+ #define vec_sl_s8(a, b) vec_sl(a, (npyv_s8)(b))
+ #define vec_sl_s16(a, b) vec_sl(a, (npyv_s16)(b))
+ #define vec_sl_s32(a, b) vec_sl(a, (npyv_s32)(b))
+ #define vec_sl_s64(a, b) vec_sl(a, (npyv_s64)(b))
+ #define vec_sra_s8(a, b) vec_sra(a, (npyv_s8)(b))
+ #define vec_sra_s16(a, b) vec_sra(a, (npyv_s16)(b))
+ #define vec_sra_s32(a, b) vec_sra(a, (npyv_s32)(b))
+ #define vec_sra_s64(a, b) vec_sra(a, (npyv_s64)(b))
+#else
+ #define vec_sl_s8 vec_sl
+ #define vec_sl_s16 vec_sl
+ #define vec_sl_s32 vec_sl
+ #define vec_sl_s64 vec_sl
+ #define vec_sra_s8 vec_sra
+ #define vec_sra_s16 vec_sra
+ #define vec_sra_s32 vec_sra
+ #define vec_sra_s64 vec_sra
+#endif
+
+#endif // _NPY_SIMD_VEC_UTILS_H
diff --git a/numpy/core/src/common/simd/vsx/vsx.h b/numpy/core/src/common/simd/vec/vec.h
index b4d8172a2..abcd33ce1 100644
--- a/numpy/core/src/common/simd/vsx/vsx.h
+++ b/numpy/core/src/common/simd/vec/vec.h
@@ -1,7 +1,22 @@
+/**
+ * branch /vec(altivec-like) provides the SIMD operations for
+ * both IBM VSX(Power) and VX(ZArch).
+*/
#ifndef _NPY_SIMD_H_
#error "Not a standalone header"
#endif
+#if !defined(NPY_HAVE_VX) && !defined(NPY_HAVE_VSX2)
+ #error "require minimum support VX(zarch11) or VSX2(Power8/ISA2.07)"
+#endif
+
+#if defined(NPY_HAVE_VSX) && !defined(__LITTLE_ENDIAN__)
+ #error "VSX support doesn't cover big-endian mode yet, only zarch."
+#endif
+#if defined(NPY_HAVE_VX) && defined(__LITTLE_ENDIAN__)
+ #error "VX(zarch) support doesn't cover little-endian mode."
+#endif
+
#if defined(__GNUC__) && __GNUC__ <= 7
/**
* GCC <= 7 produces ambiguous warning caused by -Werror=maybe-uninitialized,
@@ -15,8 +30,19 @@
#define NPY_SIMD 128
#define NPY_SIMD_WIDTH 16
#define NPY_SIMD_F64 1
+#if defined(NPY_HAVE_VXE) || defined(NPY_HAVE_VSX)
+ #define NPY_SIMD_F32 1
+#else
+ #define NPY_SIMD_F32 0
+#endif
#define NPY_SIMD_FMA3 1 // native support
+#ifdef NPY_HAVE_VX
+ #define NPY_SIMD_BIGENDIAN 1
+#else
+ #define NPY_SIMD_BIGENDIAN 0
+#endif
+
typedef __vector unsigned char npyv_u8;
typedef __vector signed char npyv_s8;
typedef __vector unsigned short npyv_u16;
@@ -25,7 +51,9 @@ typedef __vector unsigned int npyv_u32;
typedef __vector signed int npyv_s32;
typedef __vector unsigned long long npyv_u64;
typedef __vector signed long long npyv_s64;
+#if NPY_SIMD_F32
typedef __vector float npyv_f32;
+#endif
typedef __vector double npyv_f64;
typedef struct { npyv_u8 val[2]; } npyv_u8x2;
@@ -36,7 +64,9 @@ typedef struct { npyv_u32 val[2]; } npyv_u32x2;
typedef struct { npyv_s32 val[2]; } npyv_s32x2;
typedef struct { npyv_u64 val[2]; } npyv_u64x2;
typedef struct { npyv_s64 val[2]; } npyv_s64x2;
+#if NPY_SIMD_F32
typedef struct { npyv_f32 val[2]; } npyv_f32x2;
+#endif
typedef struct { npyv_f64 val[2]; } npyv_f64x2;
typedef struct { npyv_u8 val[3]; } npyv_u8x3;
@@ -47,7 +77,9 @@ typedef struct { npyv_u32 val[3]; } npyv_u32x3;
typedef struct { npyv_s32 val[3]; } npyv_s32x3;
typedef struct { npyv_u64 val[3]; } npyv_u64x3;
typedef struct { npyv_s64 val[3]; } npyv_s64x3;
+#if NPY_SIMD_F32
typedef struct { npyv_f32 val[3]; } npyv_f32x3;
+#endif
typedef struct { npyv_f64 val[3]; } npyv_f64x3;
#define npyv_nlanes_u8 16
@@ -67,6 +99,7 @@ typedef struct { npyv_f64 val[3]; } npyv_f64x3;
#define npyv_b32 __vector __bool int
#define npyv_b64 __vector __bool long long
+#include "utils.h"
#include "memory.h"
#include "misc.h"
#include "reorder.h"
diff --git a/numpy/core/src/common/simd/vsx/conversion.h b/numpy/core/src/common/simd/vsx/conversion.h
deleted file mode 100644
index a599f3950..000000000
--- a/numpy/core/src/common/simd/vsx/conversion.h
+++ /dev/null
@@ -1,146 +0,0 @@
-#ifndef NPY_SIMD
- #error "Not a standalone header"
-#endif
-
-#ifndef _NPY_SIMD_VSX_CVT_H
-#define _NPY_SIMD_VSX_CVT_H
-
-// convert boolean vectors to integer vectors
-#define npyv_cvt_u8_b8(BL) ((npyv_u8) BL)
-#define npyv_cvt_s8_b8(BL) ((npyv_s8) BL)
-#define npyv_cvt_u16_b16(BL) ((npyv_u16) BL)
-#define npyv_cvt_s16_b16(BL) ((npyv_s16) BL)
-#define npyv_cvt_u32_b32(BL) ((npyv_u32) BL)
-#define npyv_cvt_s32_b32(BL) ((npyv_s32) BL)
-#define npyv_cvt_u64_b64(BL) ((npyv_u64) BL)
-#define npyv_cvt_s64_b64(BL) ((npyv_s64) BL)
-#define npyv_cvt_f32_b32(BL) ((npyv_f32) BL)
-#define npyv_cvt_f64_b64(BL) ((npyv_f64) BL)
-
-// convert integer vectors to boolean vectors
-#define npyv_cvt_b8_u8(A) ((npyv_b8) A)
-#define npyv_cvt_b8_s8(A) ((npyv_b8) A)
-#define npyv_cvt_b16_u16(A) ((npyv_b16) A)
-#define npyv_cvt_b16_s16(A) ((npyv_b16) A)
-#define npyv_cvt_b32_u32(A) ((npyv_b32) A)
-#define npyv_cvt_b32_s32(A) ((npyv_b32) A)
-#define npyv_cvt_b64_u64(A) ((npyv_b64) A)
-#define npyv_cvt_b64_s64(A) ((npyv_b64) A)
-#define npyv_cvt_b32_f32(A) ((npyv_b32) A)
-#define npyv_cvt_b64_f64(A) ((npyv_b64) A)
-
-//expand
-NPY_FINLINE npyv_u16x2 npyv_expand_u16_u8(npyv_u8 data)
-{
- npyv_u16x2 r;
- npyv_u8 zero = npyv_zero_u8();
- r.val[0] = (npyv_u16)vec_mergeh(data, zero);
- r.val[1] = (npyv_u16)vec_mergel(data, zero);
- return r;
-}
-
-NPY_FINLINE npyv_u32x2 npyv_expand_u32_u16(npyv_u16 data)
-{
- npyv_u32x2 r;
- npyv_u16 zero = npyv_zero_u16();
- r.val[0] = (npyv_u32)vec_mergeh(data, zero);
- r.val[1] = (npyv_u32)vec_mergel(data, zero);
- return r;
-}
-
-// pack two 16-bit boolean into one 8-bit boolean vector
-NPY_FINLINE npyv_b8 npyv_pack_b8_b16(npyv_b16 a, npyv_b16 b) {
- return vec_pack(a, b);
-}
-
-// pack four 32-bit boolean vectors into one 8-bit boolean vector
-NPY_FINLINE npyv_b8 npyv_pack_b8_b32(npyv_b32 a, npyv_b32 b, npyv_b32 c, npyv_b32 d) {
- npyv_b16 ab = vec_pack(a, b);
- npyv_b16 cd = vec_pack(c, d);
- return npyv_pack_b8_b16(ab, cd);
-}
-
-// pack eight 64-bit boolean vectors into one 8-bit boolean vector
-NPY_FINLINE npyv_b8
-npyv_pack_b8_b64(npyv_b64 a, npyv_b64 b, npyv_b64 c, npyv_b64 d,
- npyv_b64 e, npyv_b64 f, npyv_b64 g, npyv_b64 h) {
- npyv_b32 ab = vec_pack(a, b);
- npyv_b32 cd = vec_pack(c, d);
- npyv_b32 ef = vec_pack(e, f);
- npyv_b32 gh = vec_pack(g, h);
- return npyv_pack_b8_b32(ab, cd, ef, gh);
-}
-
-// convert boolean vector to integer bitfield
-NPY_FINLINE npy_uint64 npyv_tobits_b8(npyv_b8 a)
-{
- const npyv_u8 qperm = npyv_set_u8(120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0);
- return vec_extract((npyv_u32)vec_vbpermq((npyv_u8)a, qperm), 2);
-}
-NPY_FINLINE npy_uint64 npyv_tobits_b16(npyv_b16 a)
-{
- const npyv_u8 qperm = npyv_setf_u8(128, 112, 96, 80, 64, 48, 32, 16, 0);
- return vec_extract((npyv_u32)vec_vbpermq((npyv_u8)a, qperm), 2);
-}
-NPY_FINLINE npy_uint64 npyv_tobits_b32(npyv_b32 a)
-{
- const npyv_u8 qperm = npyv_setf_u8(128, 96, 64, 32, 0);
- return vec_extract((npyv_u32)vec_vbpermq((npyv_u8)a, qperm), 2);
-}
-NPY_FINLINE npy_uint64 npyv_tobits_b64(npyv_b64 a)
-{
- npyv_u64 bit = npyv_shri_u64((npyv_u64)a, 63);
- return vec_extract(bit, 0) | (int)vec_extract(bit, 1) << 1;
-}
-
-// truncate compatible with all compilers(internal use for now)
-NPY_FINLINE npyv_s32 npyv__trunc_s32_f32(npyv_f32 a)
-{
-#ifdef __IBMC__
- return vec_cts(a, 0);
-#elif defined(__clang__)
- /**
- * old versions of CLANG doesn't support %x<n> in the inline asm template
- * which fixes register number when using any of the register constraints wa, wd, wf.
- * therefore, we count on built-in functions.
- */
- return __builtin_convertvector(a, npyv_s32);
-#else // gcc
- npyv_s32 ret;
- __asm__ ("xvcvspsxws %x0,%x1" : "=wa" (ret) : "wa" (a));
- return ret;
-#endif
-}
-NPY_FINLINE npyv_s32 npyv__trunc_s32_f64(npyv_f64 a, npyv_f64 b)
-{
-#ifdef __IBMC__
- const npyv_u8 seq_even = npyv_set_u8(0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27);
- // unfortunately, XLC missing asm register vsx fixer
- // hopefully, xlc can optimize around big-endian compatibility
- npyv_s32 lo_even = vec_cts(a, 0);
- npyv_s32 hi_even = vec_cts(b, 0);
- return vec_perm(lo_even, hi_even, seq_even);
-#else
- const npyv_u8 seq_odd = npyv_set_u8(4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31);
- #ifdef __clang__
- // __builtin_convertvector doesn't support this conversion on wide range of versions
- // fortunately, almost all versions have direct builtin of 'xvcvdpsxws'
- npyv_s32 lo_odd = __builtin_vsx_xvcvdpsxws(a);
- npyv_s32 hi_odd = __builtin_vsx_xvcvdpsxws(b);
- #else // gcc
- npyv_s32 lo_odd, hi_odd;
- __asm__ ("xvcvdpsxws %x0,%x1" : "=wa" (lo_odd) : "wa" (a));
- __asm__ ("xvcvdpsxws %x0,%x1" : "=wa" (hi_odd) : "wa" (b));
- #endif
- return vec_perm(lo_odd, hi_odd, seq_odd);
-#endif
-}
-
-// round to nearest integer (assuming even)
-NPY_FINLINE npyv_s32 npyv_round_s32_f32(npyv_f32 a)
-{ return npyv__trunc_s32_f32(vec_rint(a)); }
-
-NPY_FINLINE npyv_s32 npyv_round_s32_f64(npyv_f64 a, npyv_f64 b)
-{ return npyv__trunc_s32_f64(vec_rint(a), vec_rint(b)); }
-
-#endif // _NPY_SIMD_VSX_CVT_H
diff --git a/numpy/core/src/common/umathmodule.h b/numpy/core/src/common/umathmodule.h
index fe44fe403..0c69f8f54 100644
--- a/numpy/core/src/common/umathmodule.h
+++ b/numpy/core/src/common/umathmodule.h
@@ -7,8 +7,14 @@
NPY_NO_EXPORT PyObject *
get_sfloat_dtype(PyObject *NPY_UNUSED(mod), PyObject *NPY_UNUSED(args));
+/* Defined in umath/extobj.c */
+NPY_NO_EXPORT int
+PyUFunc_GiveFloatingpointErrors(const char *name, int fpe_errors);
+
PyObject * add_newdoc_ufunc(PyObject *NPY_UNUSED(dummy), PyObject *args);
PyObject * ufunc_frompyfunc(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *NPY_UNUSED(kwds));
+
+
int initumath(PyObject *m);
#endif /* NUMPY_CORE_SRC_COMMON_UMATHMODULE_H_ */
diff --git a/numpy/core/src/multiarray/argfunc.dispatch.c.src b/numpy/core/src/multiarray/argfunc.dispatch.c.src
index cbfaebdb4..1d7753275 100644
--- a/numpy/core/src/multiarray/argfunc.dispatch.c.src
+++ b/numpy/core/src/multiarray/argfunc.dispatch.c.src
@@ -4,6 +4,7 @@
** sse2 sse42 xop avx2 avx512_skx
** vsx2
** neon asimd
+ ** vx vxe
**/
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
@@ -123,7 +124,7 @@ simd_@func@_@sfx@(npyv_lanetype_@sfx@ *ip, npy_intp len)
* #bsfx = b32, b32, b64, b64, b32, b64#
* #is_fp = 0*4, 1*2#
* #is_idx32 = 1*2, 0*2, 1, 0#
- * #chk_simd = NPY_SIMD*5, NPY_SIMD_F64#
+ * #chk_simd = NPY_SIMD*4, NPY_SIMD_F32, NPY_SIMD_F64#
*/
#if @chk_simd@
/**begin repeat1
@@ -298,6 +299,9 @@ scalar_loop:
#if NPY_BITSOF_@BTYPE@ == 64 && !NPY_SIMD_F64
#undef TO_SIMD_SFX
#endif
+ #if NPY_BITSOF_@BTYPE@ == 32 && !NPY_SIMD_F32
+ #undef TO_SIMD_SFX
+ #endif
#elif @is_unsigned@
#define TO_SIMD_SFX(X) X##_u@len@
#else
diff --git a/numpy/core/src/multiarray/array_assign_array.c b/numpy/core/src/multiarray/array_assign_array.c
index 020a7f29a..9d5bf6875 100644
--- a/numpy/core/src/multiarray/array_assign_array.c
+++ b/numpy/core/src/multiarray/array_assign_array.c
@@ -8,11 +8,13 @@
*/
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
+#define _UMATHMODULE
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "numpy/ndarraytypes.h"
+#include "numpy/npy_math.h"
#include "npy_config.h"
#include "npy_pycompat.h"
@@ -25,6 +27,8 @@
#include "array_assign.h"
#include "dtype_transfer.h"
+#include "umathmodule.h"
+
/*
* Check that array data is both uint-aligned and true-aligned for all array
* elements, as required by the copy/casting code in lowlevel_strided_loops.c
@@ -83,7 +87,7 @@ raw_array_assign_array(int ndim, npy_intp const *shape,
npy_intp src_strides_it[NPY_MAXDIMS];
npy_intp coord[NPY_MAXDIMS];
- int aligned, needs_api = 0;
+ int aligned;
NPY_BEGIN_THREADS_DEF;
@@ -116,15 +120,19 @@ raw_array_assign_array(int ndim, npy_intp const *shape,
/* Get the function to do the casting */
NPY_cast_info cast_info;
+ NPY_ARRAYMETHOD_FLAGS flags;
if (PyArray_GetDTypeTransferFunction(aligned,
src_strides_it[0], dst_strides_it[0],
src_dtype, dst_dtype,
0,
- &cast_info, &needs_api) != NPY_SUCCEED) {
+ &cast_info, &flags) != NPY_SUCCEED) {
return -1;
}
- if (!needs_api) {
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ npy_clear_floatstatus_barrier(src_data);
+ }
+ if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
NPY_BEGIN_THREADS;
}
@@ -143,6 +151,14 @@ raw_array_assign_array(int ndim, npy_intp const *shape,
NPY_END_THREADS;
NPY_cast_info_xfree(&cast_info);
+
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ int fpes = npy_get_floatstatus_barrier(src_data);
+ if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+ return -1;
+ }
+ }
+
return 0;
fail:
NPY_END_THREADS;
@@ -170,7 +186,7 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape,
npy_intp wheremask_strides_it[NPY_MAXDIMS];
npy_intp coord[NPY_MAXDIMS];
- int aligned, needs_api = 0;
+ int aligned;
NPY_BEGIN_THREADS_DEF;
@@ -207,17 +223,21 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape,
/* Get the function to do the casting */
NPY_cast_info cast_info;
+ NPY_ARRAYMETHOD_FLAGS flags;
if (PyArray_GetMaskedDTypeTransferFunction(aligned,
src_strides_it[0],
dst_strides_it[0],
wheremask_strides_it[0],
src_dtype, dst_dtype, wheremask_dtype,
0,
- &cast_info, &needs_api) != NPY_SUCCEED) {
+ &cast_info, &flags) != NPY_SUCCEED) {
return -1;
}
- if (!needs_api) {
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ npy_clear_floatstatus_barrier(src_data);
+ }
+ if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
NPY_BEGIN_THREADS;
}
npy_intp strides[2] = {src_strides_it[0], dst_strides_it[0]};
@@ -232,7 +252,7 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape,
args, &shape_it[0], strides,
(npy_bool *)wheremask_data, wheremask_strides_it[0],
cast_info.auxdata) < 0) {
- break;
+ goto fail;
}
} NPY_RAW_ITER_THREE_NEXT(idim, ndim, coord, shape_it,
dst_data, dst_strides_it,
@@ -241,7 +261,20 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape,
NPY_END_THREADS;
NPY_cast_info_xfree(&cast_info);
- return (needs_api && PyErr_Occurred()) ? -1 : 0;
+
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ int fpes = npy_get_floatstatus_barrier(src_data);
+ if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+ return -1;
+ }
+ }
+
+ return 0;
+
+fail:
+ NPY_END_THREADS;
+ NPY_cast_info_xfree(&cast_info);
+ return -1;
}
/*
diff --git a/numpy/core/src/multiarray/array_assign_scalar.c b/numpy/core/src/multiarray/array_assign_scalar.c
index 4ffef7ecc..ba964b86d 100644
--- a/numpy/core/src/multiarray/array_assign_scalar.c
+++ b/numpy/core/src/multiarray/array_assign_scalar.c
@@ -8,11 +8,13 @@
*/
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
+#define _UMATHMODULE
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <numpy/ndarraytypes.h>
+#include "numpy/npy_math.h"
#include "npy_config.h"
#include "npy_pycompat.h"
@@ -25,6 +27,8 @@
#include "array_assign.h"
#include "dtype_transfer.h"
+#include "umathmodule.h"
+
/*
* Assigns the scalar value to every element of the destination raw array.
*
@@ -39,7 +43,7 @@ raw_array_assign_scalar(int ndim, npy_intp const *shape,
npy_intp shape_it[NPY_MAXDIMS], dst_strides_it[NPY_MAXDIMS];
npy_intp coord[NPY_MAXDIMS];
- int aligned, needs_api = 0;
+ int aligned;
NPY_BEGIN_THREADS_DEF;
@@ -62,15 +66,19 @@ raw_array_assign_scalar(int ndim, npy_intp const *shape,
/* Get the function to do the casting */
NPY_cast_info cast_info;
+ NPY_ARRAYMETHOD_FLAGS flags;
if (PyArray_GetDTypeTransferFunction(aligned,
0, dst_strides_it[0],
src_dtype, dst_dtype,
0,
- &cast_info, &needs_api) != NPY_SUCCEED) {
+ &cast_info, &flags) != NPY_SUCCEED) {
return -1;
}
- if (!needs_api) {
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ npy_clear_floatstatus_barrier(src_data);
+ }
+ if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
npy_intp nitems = 1, i;
for (i = 0; i < ndim; i++) {
nitems *= shape_it[i];
@@ -92,6 +100,14 @@ raw_array_assign_scalar(int ndim, npy_intp const *shape,
NPY_END_THREADS;
NPY_cast_info_xfree(&cast_info);
+
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ int fpes = npy_get_floatstatus_barrier(src_data);
+ if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+ return -1;
+ }
+ }
+
return 0;
fail:
NPY_END_THREADS;
@@ -117,7 +133,7 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape,
npy_intp wheremask_strides_it[NPY_MAXDIMS];
npy_intp coord[NPY_MAXDIMS];
- int aligned, needs_api = 0;
+ int aligned;
NPY_BEGIN_THREADS_DEF;
@@ -142,15 +158,19 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape,
/* Get the function to do the casting */
NPY_cast_info cast_info;
+ NPY_ARRAYMETHOD_FLAGS flags;
if (PyArray_GetMaskedDTypeTransferFunction(aligned,
0, dst_strides_it[0], wheremask_strides_it[0],
src_dtype, dst_dtype, wheremask_dtype,
0,
- &cast_info, &needs_api) != NPY_SUCCEED) {
+ &cast_info, &flags) != NPY_SUCCEED) {
return -1;
}
- if (!needs_api) {
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ npy_clear_floatstatus_barrier(src_data);
+ }
+ if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
npy_intp nitems = 1, i;
for (i = 0; i < ndim; i++) {
nitems *= shape_it[i];
@@ -170,7 +190,7 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape,
args, &shape_it[0], strides,
(npy_bool *)wheremask_data, wheremask_strides_it[0],
cast_info.auxdata) < 0) {
- break;
+ goto fail;
}
} NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it,
dst_data, dst_strides_it,
@@ -178,7 +198,20 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape,
NPY_END_THREADS;
NPY_cast_info_xfree(&cast_info);
- return (needs_api && PyErr_Occurred()) ? -1 : 0;
+
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ int fpes = npy_get_floatstatus_barrier(src_data);
+ if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+ return -1;
+ }
+ }
+
+ return 0;
+
+fail:
+ NPY_END_THREADS;
+ NPY_cast_info_xfree(&cast_info);
+ return -1;
}
/*
diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c
index 1559f3485..e703e7382 100644
--- a/numpy/core/src/multiarray/array_coercion.c
+++ b/numpy/core/src/multiarray/array_coercion.c
@@ -9,6 +9,7 @@
#include "lowlevel_strided_loops.h"
#include "numpy/arrayobject.h"
+#include "numpy/npy_math.h"
#include "descriptor.h"
#include "convert_datatype.h"
@@ -22,6 +23,7 @@
#include "_datetime.h"
#include "npy_import.h"
+#include "umathmodule.h"
/*
* This file defines helpers for some of the ctors.c functions which
@@ -378,6 +380,49 @@ find_scalar_descriptor(
}
+/*
+ * Helper function for casting a raw value from one descriptor to another.
+ * This helper uses the normal casting machinery, but e.g. does not care about
+ * checking cast safety.
+ */
+static int
+cast_raw_scalar_item(
+ PyArray_Descr *from_descr, char *from_item,
+ PyArray_Descr *to_descr, char *to_item)
+{
+ NPY_cast_info cast_info;
+ NPY_ARRAYMETHOD_FLAGS flags;
+ if (PyArray_GetDTypeTransferFunction(
+ 0, 0, 0, from_descr, to_descr, 0, &cast_info,
+ &flags) == NPY_FAIL) {
+ return -1;
+ }
+
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ npy_clear_floatstatus_barrier(from_item);
+ }
+
+ char *args[2] = {from_item, to_item};
+ const npy_intp strides[2] = {0, 0};
+ const npy_intp length = 1;
+ if (cast_info.func(&cast_info.context,
+ args, &length, strides, cast_info.auxdata) < 0) {
+ NPY_cast_info_xfree(&cast_info);
+ return -1;
+ }
+ NPY_cast_info_xfree(&cast_info);
+
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ int fpes = npy_get_floatstatus_barrier(to_item);
+ if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+
/**
* Assign a single element in an array from a python value.
*
@@ -388,26 +433,35 @@ find_scalar_descriptor(
* This function handles the cast, which is for example hit when assigning
* a float128 to complex128.
*
- * At this time, this function does not support arrays (historically we
- * mainly supported arrays through `__float__()`, etc.). Such support should
- * possibly be added (although when called from `PyArray_AssignFromCache`
- * the input cannot be an array).
- * Note that this is also problematic for some array-likes, such as
- * `astropy.units.Quantity` and `np.ma.masked`. These are used to us calling
- * `__float__`/`__int__` for 0-D instances in many cases.
- * Eventually, we may want to define this as wrong: They must use DTypes
- * instead of (only) subclasses. Until then, here as well as in
- * `PyArray_AssignFromCache` (which already does this), we need to special
- * case 0-D array-likes to behave like arbitrary (unknown!) Python objects.
+ * TODO: This function probably needs to be passed an "owner" for the sake of
+ * future HPy (non CPython) support
+ *
+ * NOTE: We do support 0-D exact NumPy arrays correctly via casting here.
+ * There be dragons, because we must NOT support generic array-likes.
+ * The problem is that some (e.g. astropy's Quantity and our masked
+ * arrays) have divergent behaviour for `__array__` as opposed to
+ * `__float__`. And they rely on that.
+ * That is arguably bad as it limits the things that work seamlessly
+ * because `__float__`, etc. cannot even begin to cover all of casting.
+ * However, we have no choice. We simply CANNOT support array-likes
+ * here without finding a solution for this first.
+ * And the only plausible one I see currently, is expanding protocols
+ * in some form, either to indicate that we want a scalar or to indicate
+ * that we want the unsafe version that `__array__` currently gives
+ * for both objects.
+ *
+ * If we ever figure out how to expand this to other array-likes, care
+ * may need to be taken. `PyArray_FromAny`/`PyArray_AssignFromCache`
+ * uses this function but know if the input is an array, array-like,
+ * or scalar. Relaxing things here should be OK, but looks a bit
+ * like possible recursion, so it may make sense to make a "scalars only"
+ * version of this function.
*
* @param descr
* @param item
* @param value
* @return 0 on success -1 on failure.
*/
-/*
- * TODO: This function should possibly be public API.
- */
NPY_NO_EXPORT int
PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value)
{
@@ -433,6 +487,29 @@ PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value)
if (DType == NULL) {
return -1;
}
+ if (DType == (PyArray_DTypeMeta *)Py_None && PyArray_CheckExact(value)
+ && PyArray_NDIM((PyArrayObject *)value) == 0) {
+ /*
+ * WARNING: Do NOT relax the above `PyArray_CheckExact`, unless you
+ * read the function doc NOTE carefully and understood it.
+ *
+ * NOTE: The ndim == 0 check should probably be an error, but
+ * unfortunately. `arr.__float__()` works for 1 element arrays
+ * so in some contexts we need to let it handled like a scalar.
+ * (If we manage to deprecate the above, we can do that.)
+ */
+ Py_DECREF(DType);
+
+ PyArrayObject *arr = (PyArrayObject *)value;
+ if (PyArray_DESCR(arr) == descr && !PyDataType_REFCHK(descr)) {
+ /* light-weight fast-path for when the descrs obviously matches */
+ memcpy(item, PyArray_BYTES(arr), descr->elsize);
+ return 0; /* success (it was an array-like) */
+ }
+ return cast_raw_scalar_item(
+ PyArray_DESCR(arr), PyArray_BYTES(arr), descr, item);
+
+ }
if (DType == NPY_DTYPE(descr) || DType == (PyArray_DTypeMeta *)Py_None) {
/* We can set the element directly (or at least will try to) */
Py_XDECREF(DType);
@@ -461,30 +538,8 @@ PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value)
Py_DECREF(tmp_descr);
return -1;
}
- if (PyDataType_REFCHK(tmp_descr)) {
- /* We could probably use move-references above */
- PyArray_Item_INCREF(data, tmp_descr);
- }
-
- int res = 0;
- int needs_api = 0;
- NPY_cast_info cast_info;
- if (PyArray_GetDTypeTransferFunction(
- 0, 0, 0, tmp_descr, descr, 0, &cast_info,
- &needs_api) == NPY_FAIL) {
- res = -1;
- goto finish;
- }
- char *args[2] = {data, item};
- const npy_intp strides[2] = {0, 0};
- const npy_intp length = 1;
- if (cast_info.func(&cast_info.context,
- args, &length, strides, cast_info.auxdata) < 0) {
- res = -1;
- }
- NPY_cast_info_xfree(&cast_info);
+ int res = cast_raw_scalar_item(tmp_descr, data, descr, item);
- finish:
if (PyDataType_REFCHK(tmp_descr)) {
/* We could probably use move-references above */
PyArray_Item_XDECREF(data, tmp_descr);
diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h
index 30dd94a80..c9ec8903d 100644
--- a/numpy/core/src/multiarray/array_method.h
+++ b/numpy/core/src/multiarray/array_method.h
@@ -7,6 +7,9 @@
#include <Python.h>
#include <numpy/ndarraytypes.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
typedef enum {
/* Flag for whether the GIL is required */
@@ -17,7 +20,11 @@ typedef enum {
* setup/check. No function should set error flags and ignore them
* since it would interfere with chaining operations (e.g. casting).
*/
- /* TODO: Change this into a positive flag */
+ /*
+ * TODO: Change this into a positive flag? That would make "combing"
+ * multiple methods easier. OTOH, if we add more flags, the default
+ * would be 0 just like it is here.
+ */
NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 2,
/* Whether the method supports unaligned access (not runtime) */
NPY_METH_SUPPORTS_UNALIGNED = 1 << 3,
@@ -40,6 +47,20 @@ typedef enum {
} NPY_ARRAYMETHOD_FLAGS;
+/*
+ * It would be nice to just | flags, but in general it seems that 0 bits
+ * probably should indicate "default".
+ * And that is not necessarily compatible with `|`.
+ *
+ * NOTE: If made public, should maybe be a function to easier add flags?
+ */
+#define PyArrayMethod_MINIMAL_FLAGS NPY_METH_NO_FLOATINGPOINT_ERRORS
+#define PyArrayMethod_COMBINED_FLAGS(flags1, flags2) \
+ ((NPY_ARRAYMETHOD_FLAGS)( \
+ ((flags1 | flags2) & ~PyArrayMethod_MINIMAL_FLAGS) \
+ | (flags1 & flags2)))
+
+
struct PyArrayMethodObject_tag;
/*
@@ -249,6 +270,10 @@ PyArrayMethod_FromSpec(PyArrayMethod_Spec *spec);
* need better tests when a public version is exposed.
*/
NPY_NO_EXPORT PyBoundArrayMethodObject *
-PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int private);
+PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int priv);
+
+#ifdef __cplusplus
+}
+#endif
#endif /* NUMPY_CORE_SRC_MULTIARRAY_ARRAY_METHOD_H_ */
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index a1f0e2d5b..d18fe1b10 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -641,375 +641,11 @@ PyArray_FailUnlessWriteable(PyArrayObject *obj, const char *name)
return 0;
}
-/* This also handles possibly mis-aligned data */
-/* Compare s1 and s2 which are not necessarily NULL-terminated.
- s1 is of length len1
- s2 is of length len2
- If they are NULL terminated, then stop comparison.
-*/
-static int
-_myunincmp(npy_ucs4 const *s1, npy_ucs4 const *s2, int len1, int len2)
-{
- npy_ucs4 const *sptr;
- npy_ucs4 *s1t = NULL;
- npy_ucs4 *s2t = NULL;
- int val;
- npy_intp size;
- int diff;
-
- /* Replace `s1` and `s2` with aligned copies if needed */
- if ((npy_intp)s1 % sizeof(npy_ucs4) != 0) {
- size = len1*sizeof(npy_ucs4);
- s1t = malloc(size);
- memcpy(s1t, s1, size);
- s1 = s1t;
- }
- if ((npy_intp)s2 % sizeof(npy_ucs4) != 0) {
- size = len2*sizeof(npy_ucs4);
- s2t = malloc(size);
- memcpy(s2t, s2, size);
- s2 = s1t;
- }
-
- val = PyArray_CompareUCS4(s1, s2, PyArray_MIN(len1,len2));
- if ((val != 0) || (len1 == len2)) {
- goto finish;
- }
- if (len2 > len1) {
- sptr = s2+len1;
- val = -1;
- diff = len2-len1;
- }
- else {
- sptr = s1+len2;
- val = 1;
- diff=len1-len2;
- }
- while (diff--) {
- if (*sptr != 0) {
- goto finish;
- }
- sptr++;
- }
- val = 0;
-
- finish:
- /* Cleanup the aligned copies */
- if (s1t) {
- free(s1t);
- }
- if (s2t) {
- free(s2t);
- }
- return val;
-}
-
-
-
-
-/*
- * Compare s1 and s2 which are not necessarily NULL-terminated.
- * s1 is of length len1
- * s2 is of length len2
- * If they are NULL terminated, then stop comparison.
- */
-static int
-_mystrncmp(char const *s1, char const *s2, int len1, int len2)
-{
- char const *sptr;
- int val;
- int diff;
-
- val = memcmp(s1, s2, PyArray_MIN(len1, len2));
- if ((val != 0) || (len1 == len2)) {
- return val;
- }
- if (len2 > len1) {
- sptr = s2 + len1;
- val = -1;
- diff = len2 - len1;
- }
- else {
- sptr = s1 + len2;
- val = 1;
- diff = len1 - len2;
- }
- while (diff--) {
- if (*sptr != 0) {
- return val;
- }
- sptr++;
- }
- return 0; /* Only happens if NULLs are everywhere */
-}
-
-/* Borrowed from Numarray */
-
-#define SMALL_STRING 2048
-
-static void _rstripw(char *s, int n)
-{
- int i;
- for (i = n - 1; i >= 1; i--) { /* Never strip to length 0. */
- int c = s[i];
-
- if (!c || NumPyOS_ascii_isspace((int)c)) {
- s[i] = 0;
- }
- else {
- break;
- }
- }
-}
-
-static void _unistripw(npy_ucs4 *s, int n)
-{
- int i;
- for (i = n - 1; i >= 1; i--) { /* Never strip to length 0. */
- npy_ucs4 c = s[i];
- if (!c || NumPyOS_ascii_isspace((int)c)) {
- s[i] = 0;
- }
- else {
- break;
- }
- }
-}
-
-
-static char *
-_char_copy_n_strip(char const *original, char *temp, int nc)
-{
- if (nc > SMALL_STRING) {
- temp = malloc(nc);
- if (!temp) {
- PyErr_NoMemory();
- return NULL;
- }
- }
- memcpy(temp, original, nc);
- _rstripw(temp, nc);
- return temp;
-}
-
-static void
-_char_release(char *ptr, int nc)
-{
- if (nc > SMALL_STRING) {
- free(ptr);
- }
-}
-
-static char *
-_uni_copy_n_strip(char const *original, char *temp, int nc)
-{
- if (nc*sizeof(npy_ucs4) > SMALL_STRING) {
- temp = malloc(nc*sizeof(npy_ucs4));
- if (!temp) {
- PyErr_NoMemory();
- return NULL;
- }
- }
- memcpy(temp, original, nc*sizeof(npy_ucs4));
- _unistripw((npy_ucs4 *)temp, nc);
- return temp;
-}
-
-static void
-_uni_release(char *ptr, int nc)
-{
- if (nc*sizeof(npy_ucs4) > SMALL_STRING) {
- free(ptr);
- }
-}
-
-
-/* End borrowed from numarray */
-
-#define _rstrip_loop(CMP) { \
- void *aptr, *bptr; \
- char atemp[SMALL_STRING], btemp[SMALL_STRING]; \
- while(size--) { \
- aptr = stripfunc(iself->dataptr, atemp, N1); \
- if (!aptr) return -1; \
- bptr = stripfunc(iother->dataptr, btemp, N2); \
- if (!bptr) { \
- relfunc(aptr, N1); \
- return -1; \
- } \
- val = compfunc(aptr, bptr, N1, N2); \
- *dptr = (val CMP 0); \
- PyArray_ITER_NEXT(iself); \
- PyArray_ITER_NEXT(iother); \
- dptr += 1; \
- relfunc(aptr, N1); \
- relfunc(bptr, N2); \
- } \
- }
-
-#define _reg_loop(CMP) { \
- while(size--) { \
- val = compfunc((void *)iself->dataptr, \
- (void *)iother->dataptr, \
- N1, N2); \
- *dptr = (val CMP 0); \
- PyArray_ITER_NEXT(iself); \
- PyArray_ITER_NEXT(iother); \
- dptr += 1; \
- } \
- }
-
-static int
-_compare_strings(PyArrayObject *result, PyArrayMultiIterObject *multi,
- int cmp_op, void *func, int rstrip)
-{
- PyArrayIterObject *iself, *iother;
- npy_bool *dptr;
- npy_intp size;
- int val;
- int N1, N2;
- int (*compfunc)(void *, void *, int, int);
- void (*relfunc)(char *, int);
- char* (*stripfunc)(char const *, char *, int);
-
- compfunc = func;
- dptr = (npy_bool *)PyArray_DATA(result);
- iself = multi->iters[0];
- iother = multi->iters[1];
- size = multi->size;
- N1 = PyArray_DESCR(iself->ao)->elsize;
- N2 = PyArray_DESCR(iother->ao)->elsize;
- if ((void *)compfunc == (void *)_myunincmp) {
- N1 >>= 2;
- N2 >>= 2;
- stripfunc = _uni_copy_n_strip;
- relfunc = _uni_release;
- }
- else {
- stripfunc = _char_copy_n_strip;
- relfunc = _char_release;
- }
- switch (cmp_op) {
- case Py_EQ:
- if (rstrip) {
- _rstrip_loop(==);
- } else {
- _reg_loop(==);
- }
- break;
- case Py_NE:
- if (rstrip) {
- _rstrip_loop(!=);
- } else {
- _reg_loop(!=);
- }
- break;
- case Py_LT:
- if (rstrip) {
- _rstrip_loop(<);
- } else {
- _reg_loop(<);
- }
- break;
- case Py_LE:
- if (rstrip) {
- _rstrip_loop(<=);
- } else {
- _reg_loop(<=);
- }
- break;
- case Py_GT:
- if (rstrip) {
- _rstrip_loop(>);
- } else {
- _reg_loop(>);
- }
- break;
- case Py_GE:
- if (rstrip) {
- _rstrip_loop(>=);
- } else {
- _reg_loop(>=);
- }
- break;
- default:
- PyErr_SetString(PyExc_RuntimeError, "bad comparison operator");
- return -1;
- }
- return 0;
-}
-
-#undef _reg_loop
-#undef _rstrip_loop
-#undef SMALL_STRING
+/* From umath/string_ufuncs.cpp/h */
NPY_NO_EXPORT PyObject *
-_strings_richcompare(PyArrayObject *self, PyArrayObject *other, int cmp_op,
- int rstrip)
-{
- PyArrayObject *result;
- PyArrayMultiIterObject *mit;
- int val;
-
- if (PyArray_TYPE(self) != PyArray_TYPE(other)) {
- /*
- * Comparison between Bytes and Unicode is not defined in Py3K;
- * we follow.
- */
- Py_INCREF(Py_NotImplemented);
- return Py_NotImplemented;
- }
- if (PyArray_ISNOTSWAPPED(self) != PyArray_ISNOTSWAPPED(other)) {
- /* Cast `other` to the same byte order as `self` (both unicode here) */
- PyArray_Descr* unicode = PyArray_DescrNew(PyArray_DESCR(self));
- if (unicode == NULL) {
- return NULL;
- }
- unicode->elsize = PyArray_DESCR(other)->elsize;
- PyObject *new = PyArray_FromAny((PyObject *)other,
- unicode, 0, 0, 0, NULL);
- if (new == NULL) {
- return NULL;
- }
- other = (PyArrayObject *)new;
- }
- else {
- Py_INCREF(other);
- }
-
- /* Broad-cast the arrays to a common shape */
- mit = (PyArrayMultiIterObject *)PyArray_MultiIterNew(2, self, other);
- Py_DECREF(other);
- if (mit == NULL) {
- return NULL;
- }
-
- result = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
- PyArray_DescrFromType(NPY_BOOL),
- mit->nd,
- mit->dimensions,
- NULL, NULL, 0,
- NULL);
- if (result == NULL) {
- goto finish;
- }
-
- if (PyArray_TYPE(self) == NPY_UNICODE) {
- val = _compare_strings(result, mit, cmp_op, _myunincmp, rstrip);
- }
- else {
- val = _compare_strings(result, mit, cmp_op, _mystrncmp, rstrip);
- }
-
- if (val < 0) {
- Py_DECREF(result);
- result = NULL;
- }
-
- finish:
- Py_DECREF(mit);
- return (PyObject *)result;
-}
+_umath_strings_richcompare(
+ PyArrayObject *self, PyArrayObject *other, int cmp_op, int rstrip);
/*
* VOID-type arrays can only be compared equal and not-equal
@@ -1130,7 +766,15 @@ _void_compare(PyArrayObject *self, PyArrayObject *other, int cmp_op)
memcpy(dimensions, PyArray_DIMS((PyArrayObject *)temp),
sizeof(npy_intp)*result_ndim);
}
- dimensions[result_ndim] = -1;
+
+ /*
+ * Compute the new dimension size manually, as reshaping
+ * with -1 does not work on empty arrays.
+ */
+ dimensions[result_ndim] = PyArray_MultiplyList(
+ PyArray_DIMS((PyArrayObject *)temp) + result_ndim,
+ PyArray_NDIM((PyArrayObject *)temp) - result_ndim);
+
temp2 = PyArray_Newshape((PyArrayObject *)temp,
&newdims, NPY_ANYORDER);
if (temp2 == NULL) {
@@ -1207,7 +851,7 @@ _void_compare(PyArrayObject *self, PyArrayObject *other, int cmp_op)
return NULL;
}
/* compare as a string. Assumes self and other have same descr->type */
- return _strings_richcompare(self, other, cmp_op, 0);
+ return _umath_strings_richcompare(self, other, cmp_op, 0);
}
}
@@ -1341,36 +985,6 @@ array_richcompare(PyArrayObject *self, PyObject *other, int cmp_op)
PyObject *obj_self = (PyObject *)self;
PyObject *result = NULL;
- /* Special case for string arrays (which don't and currently can't have
- * ufunc loops defined, so there's no point in trying).
- */
- if (PyArray_ISSTRING(self)) {
- array_other = (PyArrayObject *)PyArray_FromObject(other,
- NPY_NOTYPE, 0, 0);
- if (array_other == NULL) {
- PyErr_Clear();
- /* Never mind, carry on, see what happens */
- }
- else if (!PyArray_ISSTRING(array_other)) {
- Py_DECREF(array_other);
- /* Never mind, carry on, see what happens */
- }
- else {
- result = _strings_richcompare(self, array_other, cmp_op, 0);
- Py_DECREF(array_other);
- return result;
- }
- /* If we reach this point, it means that we are not comparing
- * string-to-string. It's possible that this will still work out,
- * e.g. if the other array is an object array, then both will be cast
- * to object or something? I don't know how that works actually, but
- * it does, b/c this works:
- * l = ["a", "b"]
- * assert np.array(l, dtype="S1") == np.array(l, dtype="O")
- * So we fall through and see what happens.
- */
- }
-
switch (cmp_op) {
case Py_LT:
RICHCMP_GIVE_UP_IF_NEEDED(obj_self, other);
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index ee4f5f312..a9f8dfdd2 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -7,6 +7,7 @@
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
+#define _UMATHMODULE
#define _NPY_NO_DEPRECATIONS /* for NPY_CHAR */
#include "numpy/npy_common.h"
@@ -37,6 +38,9 @@
#include "npy_buffer.h"
#include "arraytypes.h"
+
+#include "umathmodule.h"
+
/*
* Define a stack allocated dummy array with only the minimum information set:
* 1. The descr, the main field interesting here.
@@ -96,10 +100,32 @@ MyPyFloat_AsDouble(PyObject *obj)
return ret;
}
+
+static float
+MyPyFloat_AsFloat(PyObject *obj)
+{
+ double d_val = MyPyFloat_AsDouble(obj);
+ float res = (float)d_val;
+ if (NPY_UNLIKELY(npy_isinf(res) && !npy_isinf(d_val))) {
+ if (PyUFunc_GiveFloatingpointErrors("cast", NPY_FPE_OVERFLOW) < 0) {
+ return -1;
+ }
+ }
+ return res;
+}
+
+
static npy_half
MyPyFloat_AsHalf(PyObject *obj)
{
- return npy_double_to_half(MyPyFloat_AsDouble(obj));
+ double d_val = MyPyFloat_AsDouble(obj);
+ npy_half res = npy_double_to_half(d_val);
+ if (NPY_UNLIKELY(npy_half_isinf(res) && !npy_isinf(d_val))) {
+ if (PyUFunc_GiveFloatingpointErrors("cast", NPY_FPE_OVERFLOW) < 0) {
+ return npy_double_to_half(-1.);
+ }
+ }
+ return res;
}
static PyObject *
@@ -200,7 +226,7 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
* MyPyFloat_FromHalf, PyFloat_FromDouble*2#
* #func2 = PyObject_IsTrue, MyPyLong_AsLong*6, MyPyLong_AsUnsignedLong*2,
* MyPyLong_AsLongLong, MyPyLong_AsUnsignedLongLong,
- * MyPyFloat_AsHalf, MyPyFloat_AsDouble*2#
+ * MyPyFloat_AsHalf, MyPyFloat_AsFloat, MyPyFloat_AsDouble#
* #type = npy_bool,
* npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int,
* npy_long, npy_uint, npy_ulong, npy_longlong, npy_ulonglong,
@@ -363,6 +389,26 @@ static int
}
temp.real = (@ftype@) oop.real;
temp.imag = (@ftype@) oop.imag;
+
+#if NPY_SIZEOF_@NAME@ < NPY_SIZEOF_CDOUBLE /* really just float... */
+ /* Overflow could have occured converting double to float */
+ if (NPY_UNLIKELY((npy_isinf(temp.real) && !npy_isinf(oop.real)) ||
+ (npy_isinf(temp.imag) && !npy_isinf(oop.imag)))) {
+ int bufsize, errmask;
+ PyObject *errobj;
+
+ if (PyUFunc_GetPyValues("assignment", &bufsize, &errmask,
+ &errobj) < 0) {
+ return -1;
+ }
+ int first = 1;
+ if (PyUFunc_handlefperr(errmask, errobj, NPY_FPE_OVERFLOW, &first)) {
+ Py_XDECREF(errobj);
+ return -1;
+ }
+ Py_XDECREF(errobj);
+ }
+#endif
}
memcpy(ov, &temp, PyArray_DESCR(ap)->elsize);
@@ -1151,13 +1197,22 @@ static void
@totype@ *op = output;
while (n--) {
- @fromtype@ f = *ip++;
- @totype@ t = (@totype@)f;
#if @supports_nat@ && @floatingpoint@
- /* Avoid undefined behaviour for NaN -> NaT */
+ /*
+ * volatile works around clang (and gcc sometimes) not branching
+ * correctly, leading to floating point errors in the test suite.
+ */
+ volatile @fromtype@ f = *ip++;
+ @totype@ t;
+ /* Avoid undefined behaviour and warning for NaN -> NaT */
if (npy_isnan(f)) {
t = (@totype@)NPY_DATETIME_NAT;
}
+ else {
+ t = (@totype@)f;
+ }
+#else
+ @totype@ t = (@totype@)*ip++;
#endif
*op++ = t;
}
@@ -1177,13 +1232,22 @@ static void
@totype@ *op = output;
while (n--) {
- @fromtype@ f = *ip;
- @totype@ t = (@totype@)f;
#if @supports_nat@
- /* Avoid undefined behaviour for NaN -> NaT */
+ /*
+ * volatile works around clang (and gcc sometimes) not branching
+ * correctly, leading to floating point errors in the test suite.
+ */
+ volatile @fromtype@ f = *ip;
+ @totype@ t;
+ /* Avoid undefined behaviour and warning for NaN -> NaT */
if (npy_isnan(f)) {
t = (@totype@)NPY_DATETIME_NAT;
}
+ else {
+ t = (@totype@)f;
+ }
+#else
+ @totype@ t = (@totype@)*ip;
#endif
*op++ = t;
ip += 2;
diff --git a/numpy/core/src/multiarray/common_dtype.h b/numpy/core/src/multiarray/common_dtype.h
index 13d38ddf8..9f25fc14e 100644
--- a/numpy/core/src/multiarray/common_dtype.h
+++ b/numpy/core/src/multiarray/common_dtype.h
@@ -7,6 +7,10 @@
#include <numpy/ndarraytypes.h>
#include "dtypemeta.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
NPY_NO_EXPORT PyArray_DTypeMeta *
PyArray_CommonDType(PyArray_DTypeMeta *dtype1, PyArray_DTypeMeta *dtype2);
@@ -14,4 +18,8 @@ NPY_NO_EXPORT PyArray_DTypeMeta *
PyArray_PromoteDTypeSequence(
npy_intp length, PyArray_DTypeMeta **dtypes_in);
+#ifdef __cplusplus
+}
+#endif
+
#endif /* NUMPY_CORE_SRC_MULTIARRAY_COMMON_DTYPE_H_ */
diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c
index 630253e38..2aed0bbb4 100644
--- a/numpy/core/src/multiarray/convert.c
+++ b/numpy/core/src/multiarray/convert.c
@@ -20,6 +20,7 @@
#include "array_assign.h"
#include "convert.h"
+#include "array_coercion.h"
int
fallocate(int fd, int mode, off_t offset, off_t len);
@@ -358,151 +359,42 @@ PyArray_ToString(PyArrayObject *self, NPY_ORDER order)
NPY_NO_EXPORT int
PyArray_FillWithScalar(PyArrayObject *arr, PyObject *obj)
{
- PyArray_Descr *dtype = NULL;
- npy_longlong value_buffer[4];
- char *value = NULL;
- int retcode = 0;
-
/*
- * If 'arr' is an object array, copy the object as is unless
- * 'obj' is a zero-dimensional array, in which case we copy
- * the element in that array instead.
+ * If we knew that the output array has at least one element, we would
+ * not actually need a helping buffer, we always null it, just in case.
+ *
+ * (The longlong here should help with alignment.)
*/
- if (PyArray_DESCR(arr)->type_num == NPY_OBJECT &&
- !(PyArray_Check(obj) &&
- PyArray_NDIM((PyArrayObject *)obj) == 0)) {
- value = (char *)&obj;
-
- dtype = PyArray_DescrFromType(NPY_OBJECT);
- if (dtype == NULL) {
- return -1;
- }
- }
- /* NumPy scalar */
- else if (PyArray_IsScalar(obj, Generic)) {
- dtype = PyArray_DescrFromScalar(obj);
- if (dtype == NULL) {
- return -1;
- }
- value = scalar_value(obj, dtype);
- if (value == NULL) {
- Py_DECREF(dtype);
- return -1;
- }
- }
- /* Python boolean */
- else if (PyBool_Check(obj)) {
- value = (char *)value_buffer;
- *value = (obj == Py_True);
-
- dtype = PyArray_DescrFromType(NPY_BOOL);
- if (dtype == NULL) {
- return -1;
- }
- }
- /* Python integer */
- else if (PyLong_Check(obj)) {
- /* Try long long before unsigned long long */
- npy_longlong ll_v = PyLong_AsLongLong(obj);
- if (error_converting(ll_v)) {
- /* Long long failed, try unsigned long long */
- npy_ulonglong ull_v;
- PyErr_Clear();
- ull_v = PyLong_AsUnsignedLongLong(obj);
- if (ull_v == (unsigned long long)-1 && PyErr_Occurred()) {
- return -1;
- }
- value = (char *)value_buffer;
- *(npy_ulonglong *)value = ull_v;
-
- dtype = PyArray_DescrFromType(NPY_ULONGLONG);
- if (dtype == NULL) {
- return -1;
- }
- }
- else {
- /* Long long succeeded */
- value = (char *)value_buffer;
- *(npy_longlong *)value = ll_v;
-
- dtype = PyArray_DescrFromType(NPY_LONGLONG);
- if (dtype == NULL) {
- return -1;
- }
- }
- }
- /* Python float */
- else if (PyFloat_Check(obj)) {
- npy_double v = PyFloat_AsDouble(obj);
- if (error_converting(v)) {
- return -1;
- }
- value = (char *)value_buffer;
- *(npy_double *)value = v;
-
- dtype = PyArray_DescrFromType(NPY_DOUBLE);
- if (dtype == NULL) {
+ npy_longlong value_buffer_stack[4] = {0};
+ char *value_buffer_heap = NULL;
+ char *value = (char *)value_buffer_stack;
+ PyArray_Descr *descr = PyArray_DESCR(arr);
+
+ if (descr->elsize > sizeof(value_buffer_stack)) {
+ /* We need a large temporary buffer... */
+ value_buffer_heap = PyObject_Calloc(1, descr->elsize);
+ if (value_buffer_heap == NULL) {
+ PyErr_NoMemory();
return -1;
}
+ value = value_buffer_heap;
}
- /* Python complex */
- else if (PyComplex_Check(obj)) {
- npy_double re, im;
-
- re = PyComplex_RealAsDouble(obj);
- if (error_converting(re)) {
- return -1;
- }
- im = PyComplex_ImagAsDouble(obj);
- if (error_converting(im)) {
- return -1;
- }
- value = (char *)value_buffer;
- ((npy_double *)value)[0] = re;
- ((npy_double *)value)[1] = im;
-
- dtype = PyArray_DescrFromType(NPY_CDOUBLE);
- if (dtype == NULL) {
- return -1;
- }
- }
-
- /* Use the value pointer we got if possible */
- if (value != NULL) {
- /* TODO: switch to SAME_KIND casting */
- retcode = PyArray_AssignRawScalar(arr, dtype, value,
- NULL, NPY_UNSAFE_CASTING);
- Py_DECREF(dtype);
- return retcode;
+ if (PyArray_Pack(descr, value, obj) < 0) {
+ PyMem_FREE(value_buffer_heap);
+ return -1;
}
- /* Otherwise convert to an array to do the assignment */
- else {
- PyArrayObject *src_arr;
- /**
- * The dtype of the destination is used when converting
- * from the pyobject, so that for example a tuple gets
- * recognized as a struct scalar of the required type.
- */
- Py_INCREF(PyArray_DTYPE(arr));
- src_arr = (PyArrayObject *)PyArray_FromAny(obj,
- PyArray_DTYPE(arr), 0, 0, 0, NULL);
- if (src_arr == NULL) {
- return -1;
- }
-
- if (PyArray_NDIM(src_arr) != 0) {
- PyErr_SetString(PyExc_ValueError,
- "Input object to FillWithScalar is not a scalar");
- Py_DECREF(src_arr);
- return -1;
- }
-
- retcode = PyArray_CopyInto(arr, src_arr);
+ /*
+ * There is no cast anymore, the above already coerced using scalar
+ * coercion rules
+ */
+ int retcode = raw_array_assign_scalar(
+ PyArray_NDIM(arr), PyArray_DIMS(arr), descr,
+ PyArray_BYTES(arr), PyArray_STRIDES(arr),
+ descr, value);
- Py_DECREF(src_arr);
- return retcode;
- }
+ PyMem_FREE(value_buffer_heap);
+ return retcode;
}
/*
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index 8d0a4cd56..bc8a3bf88 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -1691,8 +1691,12 @@ PyArray_ResultType(
all_DTypes[i_all] = &PyArray_PyComplexAbstractDType;
}
else {
- /* N.B.: Could even be an object dtype here for large ints */
+ /* This could even be an object dtype here for large ints */
all_DTypes[i_all] = &PyArray_PyIntAbstractDType;
+ if (PyArray_TYPE(arrs[i]) != NPY_LONG) {
+ /* Not a "normal" scalar, so we cannot avoid the legacy path */
+ all_pyscalar = 0;
+ }
}
Py_INCREF(all_DTypes[i_all]);
/*
@@ -3042,26 +3046,22 @@ nonstructured_to_structured_get_loop(
NPY_ARRAYMETHOD_FLAGS *flags)
{
if (context->descriptors[1]->names != NULL) {
- int needs_api = 0;
if (get_fields_transfer_function(
aligned, strides[0], strides[1],
context->descriptors[0], context->descriptors[1],
move_references, out_loop, out_transferdata,
- &needs_api) == NPY_FAIL) {
+ flags) == NPY_FAIL) {
return -1;
}
- *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0;
}
else if (context->descriptors[1]->subarray != NULL) {
- int needs_api = 0;
if (get_subarray_transfer_function(
aligned, strides[0], strides[1],
context->descriptors[0], context->descriptors[1],
move_references, out_loop, out_transferdata,
- &needs_api) == NPY_FAIL) {
+ flags) == NPY_FAIL) {
return -1;
}
- *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0;
}
else {
/*
@@ -3204,26 +3204,22 @@ structured_to_nonstructured_get_loop(
NPY_ARRAYMETHOD_FLAGS *flags)
{
if (context->descriptors[0]->names != NULL) {
- int needs_api = 0;
if (get_fields_transfer_function(
aligned, strides[0], strides[1],
context->descriptors[0], context->descriptors[1],
move_references, out_loop, out_transferdata,
- &needs_api) == NPY_FAIL) {
+ flags) == NPY_FAIL) {
return -1;
}
- *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0;
}
else if (context->descriptors[0]->subarray != NULL) {
- int needs_api = 0;
if (get_subarray_transfer_function(
aligned, strides[0], strides[1],
context->descriptors[0], context->descriptors[1],
move_references, out_loop, out_transferdata,
- &needs_api) == NPY_FAIL) {
+ flags) == NPY_FAIL) {
return -1;
}
- *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0;
}
else {
/*
@@ -3513,27 +3509,23 @@ void_to_void_get_loop(
{
if (context->descriptors[0]->names != NULL ||
context->descriptors[1]->names != NULL) {
- int needs_api = 0;
if (get_fields_transfer_function(
aligned, strides[0], strides[1],
context->descriptors[0], context->descriptors[1],
move_references, out_loop, out_transferdata,
- &needs_api) == NPY_FAIL) {
+ flags) == NPY_FAIL) {
return -1;
}
- *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0;
}
else if (context->descriptors[0]->subarray != NULL ||
context->descriptors[1]->subarray != NULL) {
- int needs_api = 0;
if (get_subarray_transfer_function(
aligned, strides[0], strides[1],
context->descriptors[0], context->descriptors[1],
move_references, out_loop, out_transferdata,
- &needs_api) == NPY_FAIL) {
+ flags) == NPY_FAIL) {
return -1;
}
- *flags = needs_api ? NPY_METH_REQUIRES_PYAPI : 0;
}
else {
/*
@@ -3546,7 +3538,7 @@ void_to_void_get_loop(
out_loop, out_transferdata) == NPY_FAIL) {
return -1;
}
- *flags = 0;
+ *flags = PyArrayMethod_MINIMAL_FLAGS;
}
return 0;
}
diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h
index d1865d1c2..af6d790cf 100644
--- a/numpy/core/src/multiarray/convert_datatype.h
+++ b/numpy/core/src/multiarray/convert_datatype.h
@@ -3,6 +3,10 @@
#include "array_method.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
extern NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[];
NPY_NO_EXPORT PyObject *
@@ -34,7 +38,7 @@ dtype_kind_to_ordering(char kind);
/* Used by PyArray_CanCastArrayTo and in the legacy ufunc type resolution */
NPY_NO_EXPORT npy_bool
can_cast_scalar_to(PyArray_Descr *scal_type, char *scal_data,
- PyArray_Descr *to, NPY_CASTING casting);
+ PyArray_Descr *to, NPY_CASTING casting);
NPY_NO_EXPORT int
should_use_min_scalar(npy_intp narrs, PyArrayObject **arr,
@@ -59,7 +63,7 @@ NPY_NO_EXPORT int
PyArray_AddCastingImplementation(PyBoundArrayMethodObject *meth);
NPY_NO_EXPORT int
-PyArray_AddCastingImplementation_FromSpec(PyArrayMethod_Spec *spec, int private);
+PyArray_AddCastingImplementation_FromSpec(PyArrayMethod_Spec *spec, int private_);
NPY_NO_EXPORT NPY_CASTING
PyArray_MinCastSafety(NPY_CASTING casting1, NPY_CASTING casting2);
@@ -99,4 +103,8 @@ simple_cast_resolve_descriptors(
NPY_NO_EXPORT int
PyArray_InitializeCasts(void);
+#ifdef __cplusplus
+}
+#endif
+
#endif /* NUMPY_CORE_SRC_MULTIARRAY_CONVERT_DATATYPE_H_ */
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index c780f4b2b..c3d66dd6b 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1,5 +1,6 @@
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
+#define _UMATHMODULE
#define PY_SSIZE_T_CLEAN
#include <Python.h>
@@ -33,6 +34,8 @@
#include "get_attr_string.h"
#include "array_coercion.h"
+#include "umathmodule.h"
+
/*
* Reading from a file or a string.
*
@@ -465,55 +468,12 @@ PyArray_AssignFromCache_Recursive(
PyArrayObject *self, const int ndim, coercion_cache_obj **cache)
{
/* Consume first cache element by extracting information and freeing it */
- PyObject *original_obj = (*cache)->converted_obj;
PyObject *obj = (*cache)->arr_or_sequence;
Py_INCREF(obj);
npy_bool sequence = (*cache)->sequence;
int depth = (*cache)->depth;
*cache = npy_unlink_coercion_cache(*cache);
- /*
- * The maximum depth is special (specifically for objects), but usually
- * unrolled in the sequence branch below.
- */
- if (NPY_UNLIKELY(depth == ndim)) {
- /*
- * We have reached the maximum depth. We should simply assign to the
- * element in principle. There is one exception. If this is a 0-D
- * array being stored into a 0-D array (but we do not reach here then).
- */
- if (PyArray_ISOBJECT(self)) {
- assert(ndim != 0); /* guaranteed by PyArray_AssignFromCache */
- assert(PyArray_NDIM(self) == 0);
- Py_DECREF(obj);
- return PyArray_Pack(PyArray_DESCR(self), PyArray_BYTES(self),
- original_obj);
- }
- if (sequence) {
- /*
- * Sanity check which may be removed, the error is raised already
- * in `PyArray_DiscoverDTypeAndShape`.
- */
- assert(0);
- PyErr_SetString(PyExc_RuntimeError,
- "setting an array element with a sequence");
- goto fail;
- }
- else if (original_obj != obj || !PyArray_CheckExact(obj)) {
- /*
- * If the leave node is an array-like, but not a numpy array,
- * we pretend it is an arbitrary scalar. This means that in
- * most cases (where the dtype is int or float), we will end
- * up using float(array-like), or int(array-like). That does
- * not support general casting, but helps Quantity and masked
- * arrays, because it allows them to raise an error when
- * `__float__()` or `__int__()` is called.
- */
- Py_DECREF(obj);
- return PyArray_SETITEM(self, PyArray_BYTES(self), original_obj);
- }
- }
-
/* The element is either a sequence, or an array */
if (!sequence) {
/* Straight forward array assignment */
@@ -535,20 +495,24 @@ PyArray_AssignFromCache_Recursive(
for (npy_intp i = 0; i < length; i++) {
PyObject *value = PySequence_Fast_GET_ITEM(obj, i);
- if (*cache == NULL || (*cache)->converted_obj != value ||
- (*cache)->depth != depth + 1) {
- if (ndim != depth + 1) {
- PyErr_SetString(PyExc_RuntimeError,
- "Inconsistent object during array creation? "
- "Content of sequences changed (now too shallow).");
- goto fail;
- }
- /* Straight forward assignment of elements */
+ if (ndim == depth + 1) {
+ /*
+ * Straight forward assignment of elements. Note that it is
+ * possible for such an element to be a 0-D array or array-like.
+ * `PyArray_Pack` supports arrays as well as we want: We
+ * support exact NumPy arrays, but at this point ignore others.
+ * (Please see the `PyArray_Pack` function comment if this
+ * rightly confuses you.)
+ */
char *item;
item = (PyArray_BYTES(self) + i * PyArray_STRIDES(self)[0]);
if (PyArray_Pack(PyArray_DESCR(self), item, value) < 0) {
goto fail;
}
+ /* If this was an array(-like) we still need to unlike int: */
+ if (*cache != NULL && (*cache)->converted_obj == value) {
+ *cache = npy_unlink_coercion_cache(*cache);
+ }
}
else {
PyArrayObject *view;
@@ -2780,18 +2744,22 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
* contiguous strides, etc.
*/
NPY_cast_info cast_info;
+ NPY_ARRAYMETHOD_FLAGS flags;
if (PyArray_GetDTypeTransferFunction(
IsUintAligned(src) && IsAligned(src) &&
IsUintAligned(dst) && IsAligned(dst),
src_stride, dst_stride,
PyArray_DESCR(src), PyArray_DESCR(dst),
0,
- &cast_info, &needs_api) != NPY_SUCCEED) {
+ &cast_info, &flags) != NPY_SUCCEED) {
NpyIter_Deallocate(dst_iter);
NpyIter_Deallocate(src_iter);
return -1;
}
-
+ needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ npy_clear_floatstatus_barrier((char *)src_iter);
+ }
if (!needs_api) {
NPY_BEGIN_THREADS;
}
@@ -2843,8 +2811,20 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
NPY_END_THREADS;
NPY_cast_info_xfree(&cast_info);
- NpyIter_Deallocate(dst_iter);
- NpyIter_Deallocate(src_iter);
+ if (!NpyIter_Deallocate(dst_iter)) {
+ res = -1;
+ }
+ if (!NpyIter_Deallocate(src_iter)) {
+ res = -1;
+ }
+
+ if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ int fpes = npy_get_floatstatus_barrier((char *)src_iter);
+ if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+ return -1;
+ }
+ }
+
return res;
}
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 18de5d132..f8458d2d7 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -11,12 +11,14 @@
*/
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
+#define _UMATHMODULE
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <structmember.h>
#include "numpy/arrayobject.h"
+#include "numpy/npy_math.h"
#include "lowlevel_strided_loops.h"
#include "npy_pycompat.h"
@@ -35,6 +37,8 @@
#include "array_method.h"
#include "array_coercion.h"
+#include "umathmodule.h"
+
#define NPY_LOWLEVEL_BUFFER_BLOCKSIZE 128
/********** PRINTF DEBUG TRACING **************/
@@ -1506,7 +1510,7 @@ get_one_to_n_transfer_function(int aligned,
npy_intp N,
PyArrayMethod_StridedLoop **out_stransfer,
NpyAuxData **out_transferdata,
- int *out_needs_api)
+ NPY_ARRAYMETHOD_FLAGS *out_flags)
{
_one_to_n_data *data = PyMem_Malloc(sizeof(_one_to_n_data));
if (data == NULL) {
@@ -1530,18 +1534,19 @@ get_one_to_n_transfer_function(int aligned,
src_dtype, dst_dtype,
0,
&data->wrapped,
- out_needs_api) != NPY_SUCCEED) {
+ out_flags) != NPY_SUCCEED) {
NPY_AUXDATA_FREE((NpyAuxData *)data);
return NPY_FAIL;
}
/* If the src object will need a DECREF, set src_dtype */
if (move_references && PyDataType_REFCHK(src_dtype)) {
+ *out_flags |= NPY_METH_REQUIRES_PYAPI;
if (get_decref_transfer_function(aligned,
src_stride,
src_dtype,
&data->decref_src,
- out_needs_api) != NPY_SUCCEED) {
+ NULL) != NPY_SUCCEED) {
NPY_AUXDATA_FREE((NpyAuxData *)data);
return NPY_FAIL;
}
@@ -1667,7 +1672,7 @@ get_n_to_n_transfer_function(int aligned,
npy_intp N,
PyArrayMethod_StridedLoop **out_stransfer,
NpyAuxData **out_transferdata,
- int *out_needs_api)
+ NPY_ARRAYMETHOD_FLAGS *out_flags)
{
_n_to_n_data *data = PyMem_Malloc(sizeof(_n_to_n_data));
if (data == NULL) {
@@ -1699,7 +1704,7 @@ get_n_to_n_transfer_function(int aligned,
src_dtype, dst_dtype,
move_references,
&data->wrapped,
- out_needs_api) != NPY_SUCCEED) {
+ out_flags) != NPY_SUCCEED) {
NPY_AUXDATA_FREE((NpyAuxData *)data);
return NPY_FAIL;
}
@@ -1913,7 +1918,7 @@ get_subarray_broadcast_transfer_function(int aligned,
int move_references,
PyArrayMethod_StridedLoop **out_stransfer,
NpyAuxData **out_transferdata,
- int *out_needs_api)
+ NPY_ARRAYMETHOD_FLAGS *out_flags)
{
_subarray_broadcast_data *data;
npy_intp structsize, loop_index, run, run_size,
@@ -1946,7 +1951,7 @@ get_subarray_broadcast_transfer_function(int aligned,
src_dtype, dst_dtype,
0,
&data->wrapped,
- out_needs_api) != NPY_SUCCEED) {
+ out_flags) != NPY_SUCCEED) {
NPY_AUXDATA_FREE((NpyAuxData *)data);
return NPY_FAIL;
}
@@ -1958,7 +1963,7 @@ get_subarray_broadcast_transfer_function(int aligned,
src_dtype, NULL,
1,
&data->decref_src,
- out_needs_api) != NPY_SUCCEED) {
+ out_flags) != NPY_SUCCEED) {
NPY_AUXDATA_FREE((NpyAuxData *)data);
return NPY_FAIL;
}
@@ -1971,7 +1976,7 @@ get_subarray_broadcast_transfer_function(int aligned,
dst_dtype, NULL,
1,
&data->decref_dst,
- out_needs_api) != NPY_SUCCEED) {
+ out_flags) != NPY_SUCCEED) {
NPY_AUXDATA_FREE((NpyAuxData *)data);
return NPY_FAIL;
}
@@ -2087,7 +2092,7 @@ get_subarray_transfer_function(int aligned,
int move_references,
PyArrayMethod_StridedLoop **out_stransfer,
NpyAuxData **out_transferdata,
- int *out_needs_api)
+ NPY_ARRAYMETHOD_FLAGS *out_flags)
{
PyArray_Dims src_shape = {NULL, -1}, dst_shape = {NULL, -1};
npy_intp src_size = 1, dst_size = 1;
@@ -2132,7 +2137,7 @@ get_subarray_transfer_function(int aligned,
move_references,
src_size,
out_stransfer, out_transferdata,
- out_needs_api);
+ out_flags);
}
/* Copy the src value to all the dst values */
else if (src_size == 1) {
@@ -2145,7 +2150,7 @@ get_subarray_transfer_function(int aligned,
move_references,
dst_size,
out_stransfer, out_transferdata,
- out_needs_api);
+ out_flags);
}
/*
* Copy the subarray with broadcasting, truncating, and zero-padding
@@ -2159,7 +2164,7 @@ get_subarray_transfer_function(int aligned,
src_shape, dst_shape,
move_references,
out_stransfer, out_transferdata,
- out_needs_api);
+ out_flags);
npy_free_cache_dim_obj(src_shape);
npy_free_cache_dim_obj(dst_shape);
@@ -2277,7 +2282,7 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
int move_references,
PyArrayMethod_StridedLoop **out_stransfer,
NpyAuxData **out_transferdata,
- int *out_needs_api)
+ NPY_ARRAYMETHOD_FLAGS *out_flags)
{
PyObject *key, *tup, *title;
PyArray_Descr *src_fld_dtype, *dst_fld_dtype;
@@ -2308,6 +2313,7 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
data->base.clone = &_field_transfer_data_clone;
data->field_count = 0;
+ *out_flags = PyArrayMethod_MINIMAL_FLAGS;
for (i = 0; i < field_count; ++i) {
key = PyTuple_GET_ITEM(dst_dtype->names, i);
tup = PyDict_GetItem(dst_dtype->fields, key);
@@ -2316,15 +2322,17 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
PyMem_Free(data);
return NPY_FAIL;
}
+ NPY_ARRAYMETHOD_FLAGS field_flags;
if (PyArray_GetDTypeTransferFunction(0,
src_stride, dst_stride,
src_dtype, dst_fld_dtype,
0,
&data->fields[i].info,
- out_needs_api) != NPY_SUCCEED) {
+ &field_flags) != NPY_SUCCEED) {
NPY_AUXDATA_FREE((NpyAuxData *)data);
return NPY_FAIL;
}
+ *out_flags = PyArrayMethod_COMBINED_FLAGS(*out_flags, field_flags);
data->fields[i].src_offset = 0;
data->fields[i].dst_offset = dst_offset;
data->field_count++;
@@ -2336,11 +2344,12 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
* input, the second one (normally output) just does not matter here.
*/
if (move_references && PyDataType_REFCHK(src_dtype)) {
+ *out_flags |= NPY_METH_REQUIRES_PYAPI;
if (get_decref_transfer_function(0,
src_stride,
src_dtype,
&data->fields[field_count].info,
- out_needs_api) != NPY_SUCCEED) {
+ NULL) != NPY_SUCCEED) {
NPY_AUXDATA_FREE((NpyAuxData *)data);
return NPY_FAIL;
}
@@ -2388,7 +2397,7 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
src_fld_dtype, dst_dtype,
move_references,
&data->fields[0].info,
- out_needs_api) != NPY_SUCCEED) {
+ out_flags) != NPY_SUCCEED) {
PyMem_Free(data);
return NPY_FAIL;
}
@@ -2423,6 +2432,7 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
data->base.clone = &_field_transfer_data_clone;
data->field_count = 0;
+ *out_flags = PyArrayMethod_MINIMAL_FLAGS;
/* set up the transfer function for each field */
for (i = 0; i < field_count; ++i) {
key = PyTuple_GET_ITEM(dst_dtype->names, i);
@@ -2440,15 +2450,17 @@ get_fields_transfer_function(int NPY_UNUSED(aligned),
return NPY_FAIL;
}
+ NPY_ARRAYMETHOD_FLAGS field_flags;
if (PyArray_GetDTypeTransferFunction(0,
src_stride, dst_stride,
src_fld_dtype, dst_fld_dtype,
move_references,
&data->fields[i].info,
- out_needs_api) != NPY_SUCCEED) {
+ &field_flags) != NPY_SUCCEED) {
NPY_AUXDATA_FREE((NpyAuxData *)data);
return NPY_FAIL;
}
+ *out_flags = PyArrayMethod_COMBINED_FLAGS(*out_flags, field_flags);
data->fields[i].src_offset = src_offset;
data->fields[i].dst_offset = dst_offset;
data->field_count++;
@@ -2748,11 +2760,12 @@ get_decref_transfer_function(int aligned,
src_size = PyArray_MultiplyList(src_shape.ptr, src_shape.len);
npy_free_cache_dim_obj(src_shape);
+ NPY_ARRAYMETHOD_FLAGS ignored_flags;
if (get_n_to_n_transfer_function(aligned,
src_stride, 0,
src_dtype->subarray->base, NULL, 1, src_size,
&cast_info->func, &cast_info->auxdata,
- out_needs_api) != NPY_SUCCEED) {
+ &ignored_flags) != NPY_SUCCEED) {
return NPY_FAIL;
}
@@ -3098,7 +3111,7 @@ define_cast_for_descrs(
npy_intp src_stride, npy_intp dst_stride,
PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
int move_references,
- NPY_cast_info *cast_info, int *out_needs_api)
+ NPY_cast_info *cast_info, NPY_ARRAYMETHOD_FLAGS *out_flags)
{
/* Storage for all cast info in case multi-step casting is necessary */
_multistep_castdata castdata;
@@ -3109,6 +3122,7 @@ define_cast_for_descrs(
/* `view_offset` passed to `init_cast_info` but unused for the main cast */
npy_intp view_offset = NPY_MIN_INTP;
NPY_CASTING casting = -1;
+ *out_flags = PyArrayMethod_MINIMAL_FLAGS;
if (init_cast_info(
cast_info, &casting, &view_offset, src_dtype, dst_dtype, 1) < 0) {
@@ -3159,7 +3173,7 @@ define_cast_for_descrs(
}
assert(castdata.from.func != NULL);
- *out_needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+ *out_flags = PyArrayMethod_COMBINED_FLAGS(*out_flags, flags);
/* The main cast now uses a buffered input: */
src_stride = strides[1];
move_references = 1; /* main cast has to clear the buffer */
@@ -3198,7 +3212,7 @@ define_cast_for_descrs(
}
assert(castdata.to.func != NULL);
- *out_needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+ *out_flags = PyArrayMethod_COMBINED_FLAGS(*out_flags, flags);
/* The main cast now uses a buffered input: */
dst_stride = strides[0];
if (castdata.from.func != NULL) {
@@ -3219,7 +3233,7 @@ define_cast_for_descrs(
goto fail;
}
- *out_needs_api |= (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+ *out_flags = PyArrayMethod_COMBINED_FLAGS(*out_flags, flags);
if (castdata.from.func == NULL && castdata.to.func == NULL) {
/* Most of the time, there will be only one step required. */
@@ -3256,7 +3270,7 @@ PyArray_GetDTypeTransferFunction(int aligned,
PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
int move_references,
NPY_cast_info *cast_info,
- int *out_needs_api)
+ NPY_ARRAYMETHOD_FLAGS *out_flags)
{
assert(src_dtype != NULL);
@@ -3271,17 +3285,24 @@ PyArray_GetDTypeTransferFunction(int aligned,
*/
if (dst_dtype == NULL) {
assert(move_references);
- return get_decref_transfer_function(aligned,
+ int needs_api = 0;
+ int res = get_decref_transfer_function(aligned,
src_dtype->elsize,
src_dtype,
cast_info,
- out_needs_api);
+ &needs_api);
+ /* decref'ing never creates floating point errors, so just ignore it */
+ *out_flags = PyArrayMethod_MINIMAL_FLAGS;
+ if (needs_api) {
+ *out_flags |= NPY_METH_REQUIRES_PYAPI;
+ }
+ return res;
}
if (define_cast_for_descrs(aligned,
src_stride, dst_stride,
src_dtype, dst_dtype, move_references,
- cast_info, out_needs_api) < 0) {
+ cast_info, out_flags) < 0) {
return NPY_FAIL;
}
@@ -3353,21 +3374,29 @@ wrap_aligned_transferfunction(
* have an explicit implementation instead if we want performance.
*/
if (must_wrap || src_wrapped_dtype != src_dtype) {
+ NPY_ARRAYMETHOD_FLAGS flags;
if (PyArray_GetDTypeTransferFunction(aligned,
src_stride, castdata.main.descriptors[0]->elsize,
src_dtype, castdata.main.descriptors[0], 0,
- &castdata.from, out_needs_api) != NPY_SUCCEED) {
+ &castdata.from, &flags) != NPY_SUCCEED) {
goto fail;
}
+ if (flags & NPY_METH_REQUIRES_PYAPI) {
+ *out_needs_api = 1;
+ }
}
if (must_wrap || dst_wrapped_dtype != dst_dtype) {
+ NPY_ARRAYMETHOD_FLAGS flags;
if (PyArray_GetDTypeTransferFunction(aligned,
castdata.main.descriptors[1]->elsize, dst_stride,
castdata.main.descriptors[1], dst_dtype,
1, /* clear buffer if it includes references */
- &castdata.to, out_needs_api) != NPY_SUCCEED) {
+ &castdata.to, &flags) != NPY_SUCCEED) {
goto fail;
}
+ if (flags & NPY_METH_REQUIRES_PYAPI) {
+ *out_needs_api = 1;
+ }
}
*out_transferdata = _multistep_cast_auxdata_clone_int(&castdata, 1);
@@ -3492,7 +3521,7 @@ PyArray_GetMaskedDTypeTransferFunction(int aligned,
PyArray_Descr *mask_dtype,
int move_references,
NPY_cast_info *cast_info,
- int *out_needs_api)
+ NPY_ARRAYMETHOD_FLAGS *out_flags)
{
NPY_cast_info_init(cast_info);
@@ -3520,18 +3549,19 @@ PyArray_GetMaskedDTypeTransferFunction(int aligned,
src_dtype, dst_dtype,
move_references,
&data->wrapped,
- out_needs_api) != NPY_SUCCEED) {
+ out_flags) != NPY_SUCCEED) {
PyMem_Free(data);
return NPY_FAIL;
}
/* If the src object will need a DECREF, get a function to handle that */
if (move_references && PyDataType_REFCHK(src_dtype)) {
+ *out_flags |= NPY_METH_REQUIRES_PYAPI;
if (get_decref_transfer_function(aligned,
src_stride,
src_dtype,
&data->decref_src,
- out_needs_api) != NPY_SUCCEED) {
+ NULL) != NPY_SUCCEED) {
NPY_AUXDATA_FREE((NpyAuxData *)data);
return NPY_FAIL;
}
@@ -3562,7 +3592,7 @@ PyArray_CastRawArrays(npy_intp count,
PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
int move_references)
{
- int aligned = 1, needs_api = 0;
+ int aligned;
/* Make sure the copy is reasonable */
if (dst_stride == 0 && count > 1) {
@@ -3586,15 +3616,20 @@ PyArray_CastRawArrays(npy_intp count,
/* Get the function to do the casting */
NPY_cast_info cast_info;
+ NPY_ARRAYMETHOD_FLAGS flags;
if (PyArray_GetDTypeTransferFunction(aligned,
src_stride, dst_stride,
src_dtype, dst_dtype,
move_references,
&cast_info,
- &needs_api) != NPY_SUCCEED) {
+ &flags) != NPY_SUCCEED) {
return NPY_FAIL;
}
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ npy_clear_floatstatus_barrier((char*)&cast_info);
+ }
+
/* Cast */
char *args[2] = {src, dst};
npy_intp strides[2] = {src_stride, dst_stride};
@@ -3603,8 +3638,16 @@ PyArray_CastRawArrays(npy_intp count,
/* Cleanup */
NPY_cast_info_xfree(&cast_info);
- /* If needs_api was set to 1, it may have raised a Python exception */
- return (needs_api && PyErr_Occurred()) ? NPY_FAIL : NPY_SUCCEED;
+ if (flags & NPY_METH_REQUIRES_PYAPI && PyErr_Occurred()) {
+ return NPY_FAIL;
+ }
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ int fpes = npy_get_floatstatus_barrier(*args);
+ if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+ return NPY_FAIL;
+ }
+ }
+ return NPY_SUCCEED;
}
/*
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
index 577478d2a..cc99a3eca 100644
--- a/numpy/core/src/multiarray/dtypemeta.c
+++ b/numpy/core/src/multiarray/dtypemeta.c
@@ -613,6 +613,7 @@ string_unicode_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
return cls;
}
+
static PyArray_DTypeMeta *
datetime_common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other)
{
diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h
index e7d5505d8..618491c98 100644
--- a/numpy/core/src/multiarray/dtypemeta.h
+++ b/numpy/core/src/multiarray/dtypemeta.h
@@ -1,6 +1,9 @@
#ifndef NUMPY_CORE_SRC_MULTIARRAY_DTYPEMETA_H_
#define NUMPY_CORE_SRC_MULTIARRAY_DTYPEMETA_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
/* DType flags, currently private, since we may just expose functions */
#define NPY_DT_LEGACY 1 << 0
@@ -126,4 +129,8 @@ python_builtins_are_known_scalar_types(
NPY_NO_EXPORT int
dtypemeta_wrap_legacy_descriptor(PyArray_Descr *dtypem);
+#ifdef __cplusplus
+}
+#endif
+
#endif /* NUMPY_CORE_SRC_MULTIARRAY_DTYPEMETA_H_ */
diff --git a/numpy/core/src/multiarray/einsum_sumprod.c.src b/numpy/core/src/multiarray/einsum_sumprod.c.src
index 3114a5896..e7b2f2c2c 100644
--- a/numpy/core/src/multiarray/einsum_sumprod.c.src
+++ b/numpy/core/src/multiarray/einsum_sumprod.c.src
@@ -68,7 +68,7 @@
* 0*3#
* #NPYV_CHK = 0*5,
* 0*5,
- * 0, NPY_SIMD, NPY_SIMD_F64, 0,
+ * 0, NPY_SIMD_F32, NPY_SIMD_F64, 0,
* 0*3#
*/
diff --git a/numpy/core/src/multiarray/experimental_public_dtype_api.c b/numpy/core/src/multiarray/experimental_public_dtype_api.c
index cf5f152ab..441dbdc1f 100644
--- a/numpy/core/src/multiarray/experimental_public_dtype_api.c
+++ b/numpy/core/src/multiarray/experimental_public_dtype_api.c
@@ -300,37 +300,13 @@ PyArrayInitDTypeMeta_FromSpec(
}
-/* Function is defined in umath/dispatching.c (same/one compilation unit) */
+/* Functions defined in umath/dispatching.c (same/one compilation unit) */
NPY_NO_EXPORT int
PyUFunc_AddLoop(PyUFuncObject *ufunc, PyObject *info, int ignore_duplicate);
-static int
-PyUFunc_AddLoopFromSpec(PyObject *ufunc, PyArrayMethod_Spec *spec)
-{
- if (!PyObject_TypeCheck(ufunc, &PyUFunc_Type)) {
- PyErr_SetString(PyExc_TypeError,
- "ufunc object passed is not a ufunc!");
- return -1;
- }
- PyBoundArrayMethodObject *bmeth =
- (PyBoundArrayMethodObject *)PyArrayMethod_FromSpec(spec);
- if (bmeth == NULL) {
- return -1;
- }
- int nargs = bmeth->method->nin + bmeth->method->nout;
- PyObject *dtypes = PyArray_TupleFromItems(
- nargs, (PyObject **)bmeth->dtypes, 1);
- if (dtypes == NULL) {
- return -1;
- }
- PyObject *info = PyTuple_Pack(2, dtypes, bmeth->method);
- Py_DECREF(bmeth);
- Py_DECREF(dtypes);
- if (info == NULL) {
- return -1;
- }
- return PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0);
-}
+NPY_NO_EXPORT int
+PyUFunc_AddLoopFromSpec(PyUFuncObject *ufunc, PyObject *info, int ignore_duplicate);
+
/*
* Function is defined in umath/wrapping_array_method.c
diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c
index f959162fd..95aa11d2d 100644
--- a/numpy/core/src/multiarray/iterators.c
+++ b/numpy/core/src/multiarray/iterators.c
@@ -827,7 +827,8 @@ iter_ass_subscript(PyArrayIterObject *self, PyObject *ind, PyObject *val)
if (PyBool_Check(ind)) {
retval = 0;
if (PyObject_IsTrue(ind)) {
- retval = PyArray_Pack(PyArray_DESCR(self->ao), self->dataptr, val);
+ retval = PyArray_Pack(
+ PyArray_DESCR(self->ao), self->dataptr, val);
}
goto finish;
}
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index e313d2447..8e3afd3cc 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -13,6 +13,7 @@
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
+#define _UMATHMODULE
#include <numpy/arrayobject.h>
#include <numpy/npy_cpu.h>
#include <numpy/halffloat.h>
@@ -22,6 +23,7 @@
#include "array_method.h"
#include "usertypes.h"
+#include "umathmodule.h"
/*
* x86 platform works with unaligned access but the compiler is allowed to
@@ -1557,14 +1559,16 @@ mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind,
* General advanced indexing iteration.
*/
NPY_NO_EXPORT int
-mapiter_@name@(PyArrayMapIterObject *mit)
+mapiter_@name@(
+ PyArrayMapIterObject *mit, NPY_cast_info *cast_info,
+ NPY_ARRAYMETHOD_FLAGS flags, int is_aligned)
{
npy_intp *counter, count;
- int i, is_aligned;
+ int i;
/* Cached mit info */
int numiter = mit->numiter;
- int needs_api = mit->needs_api;
+ int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
/* Constant information */
npy_intp fancy_dims[NPY_MAXDIMS];
npy_intp fancy_strides[NPY_MAXDIMS];
@@ -1586,13 +1590,6 @@ mapiter_@name@(PyArrayMapIterObject *mit)
fancy_strides[i] = mit->fancy_strides[i];
}
- /*
- * Alignment information (swapping is never needed, since we buffer),
- * could also check extra_op is buffered, but it should rarely matter.
- */
-
- is_aligned = IsUintAligned(array) && IsUintAligned(mit->extra_op);
-
if (mit->size == 0) {
return 0;
}
@@ -1600,9 +1597,11 @@ mapiter_@name@(PyArrayMapIterObject *mit)
if (mit->subspace_iter == NULL) {
/*
* Item by item copy situation, the operand is buffered
- * so use copyswap.
+ * so use copyswap. The iterator may not do any transfers, so may
+ * not have set `needs_api` yet, set it if necessary:
*/
- PyArray_CopySwapFunc *copyswap = PyArray_DESCR(array)->f->copyswap;
+ needs_api |= PyDataType_REFCHK(PyArray_DESCR(array));
+ PyArray_CopySwapFunc *copyswap = PyArray_DESCR(array)->f->copyswap;
/* We have only one iterator handling everything */
counter = NpyIter_GetInnerLoopSizePtr(mit->outer);
@@ -1715,28 +1714,9 @@ mapiter_@name@(PyArrayMapIterObject *mit)
int is_subiter_trivial = 0; /* has three states */
npy_intp reset_offsets[2] = {0, 0};
- /* Use strided transfer functions for the inner loop */
- npy_intp fixed_strides[2];
-
- /*
- * Get a dtype transfer function, since there are no
- * buffers, this is safe.
- */
- NpyIter_GetInnerFixedStrideArray(mit->subspace_iter, fixed_strides);
-
- NPY_cast_info cast_info;
- if (PyArray_GetDTypeTransferFunction(is_aligned,
-#if @isget@
- fixed_strides[0], fixed_strides[1],
- PyArray_DESCR(array), PyArray_DESCR(mit->extra_op),
-#else
- fixed_strides[1], fixed_strides[0],
- PyArray_DESCR(mit->extra_op), PyArray_DESCR(array),
-#endif
- 0,
- &cast_info,
- &needs_api) != NPY_SUCCEED) {
- return -1;
+ /* Note: it may make sense to refactor `needs_api` out in this branch */
+ if (flags & NPY_METH_REQUIRES_PYAPI) {
+ needs_api = 1;
}
counter = NpyIter_GetInnerLoopSizePtr(mit->subspace_iter);
@@ -1771,7 +1751,6 @@ mapiter_@name@(PyArrayMapIterObject *mit)
#if @isget@ && @one_iter@
if (check_and_adjust_index(&indval, fancy_dims[i],
iteraxis, _save) < 0 ) {
- NPY_cast_info_xfree(&cast_info);
return -1;
}
#else
@@ -1803,7 +1782,6 @@ mapiter_@name@(PyArrayMapIterObject *mit)
&errmsg)) {
NPY_END_THREADS;
PyErr_SetString(PyExc_ValueError, errmsg);
- NPY_cast_info_xfree(&cast_info);
return -1;
}
if (is_subiter_trivial != 0) {
@@ -1833,7 +1811,6 @@ mapiter_@name@(PyArrayMapIterObject *mit)
* not at all...
*/
if (needs_api && PyErr_Occurred()) {
- NPY_cast_info_xfree(&cast_info);
return -1;
}
#endif
@@ -1841,21 +1818,19 @@ mapiter_@name@(PyArrayMapIterObject *mit)
do {
#if @isget@
- if (NPY_UNLIKELY(cast_info.func(&cast_info.context,
+ if (NPY_UNLIKELY(cast_info->func(&cast_info->context,
subspace_ptrs, counter, subspace_strides,
- cast_info.auxdata) < 0)) {
+ cast_info->auxdata) < 0)) {
NPY_END_THREADS;
- NPY_cast_info_xfree(&cast_info);
return -1;
}
#else
/* The operand order is reversed here */
char *args[2] = {subspace_ptrs[1], subspace_ptrs[0]};
npy_intp strides[2] = {subspace_strides[1], subspace_strides[0]};
- if (NPY_UNLIKELY(cast_info.func(&cast_info.context,
- args, counter, strides, cast_info.auxdata) < 0)) {
+ if (NPY_UNLIKELY(cast_info->func(&cast_info->context,
+ args, counter, strides, cast_info->auxdata) < 0)) {
NPY_END_THREADS;
- NPY_cast_info_xfree(&cast_info);
return -1;
}
#endif
@@ -1866,8 +1841,6 @@ mapiter_@name@(PyArrayMapIterObject *mit)
NPY_END_THREADS;
}
/**end repeat1**/
-
- NPY_cast_info_xfree(&cast_info);
}
return 0;
}
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index 1a2ade11b..98c2d7eda 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -1,11 +1,14 @@
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
+#define _UMATHMODULE
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <structmember.h>
#include "numpy/arrayobject.h"
+#include "numpy/npy_math.h"
+
#include "arrayobject.h"
#include "npy_config.h"
@@ -23,6 +26,11 @@
#include "mem_overlap.h"
#include "array_assign.h"
#include "array_coercion.h"
+/* TODO: Only for `NpyIter_GetTransferFlags` until it is public */
+#define NPY_ITERATOR_IMPLEMENTATION_CODE
+#include "nditer_impl.h"
+
+#include "umathmodule.h"
#define HAS_INTEGER 1
@@ -914,7 +922,6 @@ array_boolean_subscript(PyArrayObject *self,
char *ret_data;
PyArray_Descr *dtype;
PyArrayObject *ret;
- int needs_api = 0;
size = count_boolean_trues(PyArray_NDIM(bmask), PyArray_DATA(bmask),
PyArray_DIMS(bmask), PyArray_STRIDES(bmask));
@@ -962,13 +969,18 @@ array_boolean_subscript(PyArrayObject *self,
/* Get a dtype transfer function */
NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
NPY_cast_info cast_info;
+ /*
+ * TODO: Ignoring cast flags, since this is only ever a copy. In
+ * principle that may not be quite right in some future?
+ */
+ NPY_ARRAYMETHOD_FLAGS cast_flags;
if (PyArray_GetDTypeTransferFunction(
IsUintAligned(self) && IsAligned(self),
fixed_strides[0], itemsize,
dtype, dtype,
0,
&cast_info,
- &needs_api) != NPY_SUCCEED) {
+ &cast_flags) != NPY_SUCCEED) {
Py_DECREF(ret);
NpyIter_Deallocate(iter);
return NULL;
@@ -1068,7 +1080,6 @@ array_assign_boolean_subscript(PyArrayObject *self,
{
npy_intp size, v_stride;
char *v_data;
- int needs_api = 0;
npy_intp bmask_size;
if (PyArray_DESCR(bmask)->type_num != NPY_BOOL) {
@@ -1164,6 +1175,7 @@ array_assign_boolean_subscript(PyArrayObject *self,
/* Get a dtype transfer function */
NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
NPY_cast_info cast_info;
+ NPY_ARRAYMETHOD_FLAGS cast_flags;
if (PyArray_GetDTypeTransferFunction(
IsUintAligned(self) && IsAligned(self) &&
IsUintAligned(v) && IsAligned(v),
@@ -1171,14 +1183,17 @@ array_assign_boolean_subscript(PyArrayObject *self,
PyArray_DESCR(v), PyArray_DESCR(self),
0,
&cast_info,
- &needs_api) != NPY_SUCCEED) {
+ &cast_flags) != NPY_SUCCEED) {
NpyIter_Deallocate(iter);
return -1;
}
- if (!needs_api) {
+ if (!(cast_flags & NPY_METH_REQUIRES_PYAPI)) {
NPY_BEGIN_THREADS_NDITER(iter);
}
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ npy_clear_floatstatus_barrier((char *)self);
+ }
npy_intp strides[2] = {v_stride, self_stride};
@@ -1209,7 +1224,7 @@ array_assign_boolean_subscript(PyArrayObject *self,
}
} while (iternext(iter));
- if (!needs_api) {
+ if (!(cast_flags & NPY_METH_REQUIRES_PYAPI)) {
NPY_END_THREADS;
}
@@ -1217,6 +1232,12 @@ array_assign_boolean_subscript(PyArrayObject *self,
if (!NpyIter_Deallocate(iter)) {
res = -1;
}
+ if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ int fpes = npy_get_floatstatus_barrier((char *)self);
+ if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+ return -1;
+ }
+ }
}
return res;
@@ -1414,6 +1435,8 @@ array_subscript(PyArrayObject *self, PyObject *op)
int index_type;
int index_num;
int i, ndim, fancy_ndim;
+ NPY_cast_info cast_info = {.func = NULL};
+
/*
* Index info array. We can have twice as many indices as dimensions
* (because of None). The + 1 is to not need to check as much.
@@ -1579,7 +1602,43 @@ array_subscript(PyArrayObject *self, PyObject *op)
goto finish;
}
- if (mapiter_get(mit) < 0) {
+ /*
+ * Alignment information (swapping is never needed, since we buffer),
+ * could also check extra_op is buffered, but it should rarely matter.
+ */
+ int is_aligned = IsUintAligned(self) && IsUintAligned(mit->extra_op);
+ /*
+ * NOTE: Getting never actually casts, so we currently do not bother to do
+ * the full checks (floating point errors) here (unlike assignment).
+ */
+ int meth_flags = NpyIter_GetTransferFlags(mit->outer);
+ if (mit->extra_op_iter) {
+ int extra_op_flags = NpyIter_GetTransferFlags(mit->extra_op_iter);
+ meth_flags = PyArrayMethod_COMBINED_FLAGS(meth_flags, extra_op_flags);
+ }
+
+ if (mit->subspace_iter != NULL) {
+ int extra_op_flags = NpyIter_GetTransferFlags(mit->subspace_iter);
+ meth_flags = PyArrayMethod_COMBINED_FLAGS(meth_flags, extra_op_flags);
+
+ NPY_ARRAYMETHOD_FLAGS transfer_flags;
+ npy_intp fixed_strides[2];
+ /*
+ * Get a dtype transfer function, since there are no
+ * buffers, this is safe.
+ */
+ NpyIter_GetInnerFixedStrideArray(mit->subspace_iter, fixed_strides);
+
+ if (PyArray_GetDTypeTransferFunction(is_aligned,
+ fixed_strides[0], fixed_strides[1],
+ PyArray_DESCR(self), PyArray_DESCR(mit->extra_op),
+ 0, &cast_info, &transfer_flags) != NPY_SUCCEED) {
+ goto finish;
+ }
+ meth_flags = PyArrayMethod_COMBINED_FLAGS(meth_flags, transfer_flags);
+ }
+
+ if (mapiter_get(mit, &cast_info, meth_flags, is_aligned) < 0) {
goto finish;
}
@@ -1614,6 +1673,7 @@ array_subscript(PyArrayObject *self, PyObject *op)
}
finish:
+ NPY_cast_info_xfree(&cast_info);
Py_XDECREF(mit);
Py_XDECREF(view);
/* Clean up indices */
@@ -1699,6 +1759,9 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
PyArrayMapIterObject *mit = NULL;
+ /* When a subspace is used, casting is done manually. */
+ NPY_cast_info cast_info = {.func = NULL};
+
if (op == NULL) {
PyErr_SetString(PyExc_ValueError,
"cannot delete array elements");
@@ -1871,7 +1934,6 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
index_num == 1 && tmp_arr) {
/* The array being indexed has one dimension and it is a fancy index */
PyArrayObject *ind = (PyArrayObject*)indices[0].object;
-
/* Check if the type is equivalent */
if (PyArray_EquivTypes(PyArray_DESCR(self),
PyArray_DESCR(tmp_arr)) &&
@@ -1935,12 +1997,50 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
}
}
- /* Can now reset the outer iterator (delayed bufalloc) */
- if (NpyIter_Reset(mit->outer, NULL) < 0) {
+ if (PyArray_MapIterCheckIndices(mit) < 0) {
goto fail;
}
- if (PyArray_MapIterCheckIndices(mit) < 0) {
+ /*
+ * Alignment information (swapping is never needed, since we buffer),
+ * could also check extra_op is buffered, but it should rarely matter.
+ */
+ int is_aligned = IsUintAligned(self) && IsUintAligned(mit->extra_op);
+ int meth_flags = NpyIter_GetTransferFlags(mit->outer);
+
+ if (mit->extra_op_iter) {
+ int extra_op_flags = NpyIter_GetTransferFlags(mit->extra_op_iter);
+ meth_flags = PyArrayMethod_COMBINED_FLAGS(meth_flags, extra_op_flags);
+ }
+
+ if (mit->subspace_iter != NULL) {
+ int extra_op_flags = NpyIter_GetTransferFlags(mit->subspace_iter);
+ meth_flags = PyArrayMethod_COMBINED_FLAGS(meth_flags, extra_op_flags);
+
+ NPY_ARRAYMETHOD_FLAGS transfer_flags;
+ npy_intp fixed_strides[2];
+
+ /*
+ * Get a dtype transfer function, since there are no
+ * buffers, this is safe.
+ */
+ NpyIter_GetInnerFixedStrideArray(mit->subspace_iter, fixed_strides);
+
+ if (PyArray_GetDTypeTransferFunction(is_aligned,
+ fixed_strides[1], fixed_strides[0],
+ PyArray_DESCR(mit->extra_op), PyArray_DESCR(self),
+ 0, &cast_info, &transfer_flags) != NPY_SUCCEED) {
+ goto fail;
+ }
+ meth_flags = PyArrayMethod_COMBINED_FLAGS(meth_flags, transfer_flags);
+ }
+
+ if (!(meth_flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ npy_clear_floatstatus_barrier((char *)mit);
+ }
+
+ /* Can now reset the outer iterator (delayed bufalloc) */
+ if (NpyIter_Reset(mit->outer, NULL) < 0) {
goto fail;
}
@@ -1948,11 +2048,17 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
* Could add a casting check, but apparently most assignments do
* not care about safe casting.
*/
-
- if (mapiter_set(mit) < 0) {
+ if (mapiter_set(mit, &cast_info, meth_flags, is_aligned) < 0) {
goto fail;
}
+ if (!(meth_flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ int fpes = npy_get_floatstatus_barrier((char *)mit);
+ if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
+ goto fail;
+ }
+ }
+
Py_DECREF(mit);
goto success;
@@ -1961,6 +2067,8 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
Py_XDECREF((PyObject *)view);
Py_XDECREF((PyObject *)tmp_arr);
Py_XDECREF((PyObject *)mit);
+ NPY_cast_info_xfree(&cast_info);
+
for (i=0; i < index_num; i++) {
Py_XDECREF(indices[i].object);
}
@@ -1969,6 +2077,8 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
success:
Py_XDECREF((PyObject *)view);
Py_XDECREF((PyObject *)tmp_arr);
+ NPY_cast_info_xfree(&cast_info);
+
for (i=0; i < index_num; i++) {
Py_XDECREF(indices[i].object);
}
@@ -2089,7 +2199,7 @@ _nonzero_indices(PyObject *myBool, PyArrayObject **arrays)
/* Reset the map iterator to the beginning */
-NPY_NO_EXPORT void
+NPY_NO_EXPORT int
PyArray_MapIterReset(PyArrayMapIterObject *mit)
{
npy_intp indval;
@@ -2097,12 +2207,16 @@ PyArray_MapIterReset(PyArrayMapIterObject *mit)
int i;
if (mit->size == 0) {
- return;
+ return 0;
}
- NpyIter_Reset(mit->outer, NULL);
+ if (!NpyIter_Reset(mit->outer, NULL)) {
+ return -1;
+ }
if (mit->extra_op_iter) {
- NpyIter_Reset(mit->extra_op_iter, NULL);
+ if (!NpyIter_Reset(mit->extra_op_iter, NULL)) {
+ return -1;
+ }
baseptrs[1] = mit->extra_op_ptrs[0];
}
@@ -2119,14 +2233,16 @@ PyArray_MapIterReset(PyArrayMapIterObject *mit)
mit->dataptr = baseptrs[0];
if (mit->subspace_iter) {
- NpyIter_ResetBasePointers(mit->subspace_iter, baseptrs, NULL);
+ if (!NpyIter_ResetBasePointers(mit->subspace_iter, baseptrs, NULL)) {
+ return -1;
+ }
mit->iter_count = *NpyIter_GetInnerLoopSizePtr(mit->subspace_iter);
}
else {
mit->iter_count = *NpyIter_GetInnerLoopSizePtr(mit->outer);
}
- return;
+ return 0;
}
@@ -2592,13 +2708,14 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
}
/* create new MapIter object */
- mit = (PyArrayMapIterObject *)PyArray_malloc(sizeof(PyArrayMapIterObject));
+ mit = (PyArrayMapIterObject *)PyArray_malloc(
+ sizeof(PyArrayMapIterObject) + sizeof(NPY_cast_info));
if (mit == NULL) {
Py_DECREF(intp_descr);
return NULL;
}
/* set all attributes of mapiter to zero */
- memset(mit, 0, sizeof(PyArrayMapIterObject));
+ memset(mit, 0, sizeof(PyArrayMapIterObject) + sizeof(NPY_cast_info));
PyObject_Init((PyObject *)mit, &PyArrayMapIter_Type);
Py_INCREF(arr);
@@ -2874,6 +2991,11 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
/* If external array is iterated, and no subspace is needed */
nops = mit->numiter;
+
+ if (!uses_subspace) {
+ outer_flags |= NPY_ITER_EXTERNAL_LOOP;
+ }
+
if (extra_op_flags && !uses_subspace) {
/*
* NOTE: This small limitation should practically not matter.
@@ -2921,9 +3043,6 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
if (mit->outer == NULL) {
goto fail;
}
- if (!uses_subspace) {
- NpyIter_EnableExternalLoop(mit->outer);
- }
mit->outer_next = NpyIter_GetIterNext(mit->outer, NULL);
if (mit->outer_next == NULL) {
@@ -3061,7 +3180,7 @@ PyArray_MapIterNew(npy_index_info *indices , int index_num, int index_type,
mit->subspace_ptrs = NpyIter_GetDataPtrArray(mit->subspace_iter);
mit->subspace_strides = NpyIter_GetInnerStrideArray(mit->subspace_iter);
- if (NpyIter_IterationNeedsAPI(mit->outer)) {
+ if (NpyIter_IterationNeedsAPI(mit->subspace_iter)) {
mit->needs_api = 1;
/*
* NOTE: In this case, need to call PyErr_Occurred() after
@@ -3212,9 +3331,12 @@ PyArray_MapIterArrayCopyIfOverlap(PyArrayObject * a, PyObject * index,
goto fail;
}
+ if (PyArray_MapIterReset(mit) < 0) {
+ goto fail;
+ }
+
Py_XDECREF(a_copy);
Py_XDECREF(subspace);
- PyArray_MapIterReset(mit);
for (i=0; i < index_num; i++) {
Py_XDECREF(indices[i].object);
diff --git a/numpy/core/src/multiarray/mapping.h b/numpy/core/src/multiarray/mapping.h
index e929b8b3f..4e5d06238 100644
--- a/numpy/core/src/multiarray/mapping.h
+++ b/numpy/core/src/multiarray/mapping.h
@@ -51,7 +51,7 @@ array_assign_item(PyArrayObject *self, Py_ssize_t i, PyObject *v);
* Prototypes for Mapping calls --- not part of the C-API
* because only useful as part of a getitem call.
*/
-NPY_NO_EXPORT void
+NPY_NO_EXPORT int
PyArray_MapIterReset(PyArrayMapIterObject *mit);
NPY_NO_EXPORT void
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 5209d6914..96d0c893d 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -85,6 +85,10 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
NPY_NO_EXPORT int initscalarmath(PyObject *);
NPY_NO_EXPORT int set_matmul_flags(PyObject *d); /* in ufunc_object.c */
+/* From umath/string_ufuncs.cpp/h */
+NPY_NO_EXPORT PyObject *
+_umath_strings_richcompare(
+ PyArrayObject *self, PyArrayObject *other, int cmp_op, int rstrip);
/*
* global variable to determine if legacy printing is enabled, accessible from
@@ -138,12 +142,12 @@ PyArray_GetPriority(PyObject *obj, double default_)
}
priority = PyFloat_AsDouble(ret);
+ Py_DECREF(ret);
if (error_converting(priority)) {
/* TODO[gh-14801]: propagate crashes for bad priority? */
PyErr_Clear();
return default_;
}
- Py_DECREF(ret);
return priority;
}
@@ -3726,6 +3730,12 @@ format_longfloat(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
TrimMode_LeaveOneZero, -1, -1);
}
+
+/*
+ * The only purpose of this function is that it allows the "rstrip".
+ * From my (@seberg's) perspective, this function should be deprecated
+ * and I do not think it matters if it is not particularly fast.
+ */
static PyObject *
compare_chararrays(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
{
@@ -3791,7 +3801,7 @@ compare_chararrays(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
return NULL;
}
if (PyArray_ISSTRING(newarr) && PyArray_ISSTRING(newoth)) {
- res = _strings_richcompare(newarr, newoth, cmp_op, rstrip != 0);
+ res = _umath_strings_richcompare(newarr, newoth, cmp_op, rstrip != 0);
}
else {
PyErr_SetString(PyExc_TypeError,
diff --git a/numpy/core/src/multiarray/nditer_api.c b/numpy/core/src/multiarray/nditer_api.c
index 860c8c1f6..b80312e06 100644
--- a/numpy/core/src/multiarray/nditer_api.c
+++ b/numpy/core/src/multiarray/nditer_api.c
@@ -857,6 +857,13 @@ NpyIter_RequiresBuffering(NpyIter *iter)
* Whether the iteration loop, and in particular the iternext()
* function, needs API access. If this is true, the GIL must
* be retained while iterating.
+ *
+ * NOTE: Internally (currently), `NpyIter_GetTransferFlags` will
+ * additionally provide information on whether floating point errors
+ * may be given during casts. The flags only require the API use
+ * necessary for buffering though. So an iterate which does not require
+ * buffering may indicate `NpyIter_IterationNeedsAPI`, but not include
+ * the flag in `NpyIter_GetTransferFlags`.
*/
NPY_NO_EXPORT npy_bool
NpyIter_IterationNeedsAPI(NpyIter *iter)
@@ -864,6 +871,21 @@ NpyIter_IterationNeedsAPI(NpyIter *iter)
return (NIT_ITFLAGS(iter)&NPY_ITFLAG_NEEDSAPI) != 0;
}
+
+/*
+ * Fetch the ArrayMethod (runtime) flags for all "transfer functions' (i.e.
+ * copy to buffer/casts).
+ *
+ * TODO: This should be public API, but that only makes sense when the
+ * ArrayMethod API is made public.
+ */
+NPY_NO_EXPORT int
+NpyIter_GetTransferFlags(NpyIter *iter)
+{
+ return NIT_ITFLAGS(iter) >> NPY_ITFLAG_TRANSFERFLAGS_SHIFT;
+}
+
+
/*NUMPY_API
* Gets the number of dimensions being iterated
*/
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index f82a9624e..a383c63e8 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -3141,7 +3141,9 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
npy_intp *strides = NAD_STRIDES(axisdata), op_stride;
NpyIter_TransferInfo *transferinfo = NBF_TRANSFERINFO(bufferdata);
- int needs_api = 0;
+ /* combined cast flags, the new cast flags for each cast: */
+ NPY_ARRAYMETHOD_FLAGS cflags = PyArrayMethod_MINIMAL_FLAGS;
+ NPY_ARRAYMETHOD_FLAGS nc_flags;
for (iop = 0; iop < nop; ++iop) {
npyiter_opitflags flags = op_itflags[iop];
@@ -3167,10 +3169,11 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
op_dtype[iop],
move_references,
&transferinfo[iop].read,
- &needs_api) != NPY_SUCCEED) {
+ &nc_flags) != NPY_SUCCEED) {
iop -= 1; /* This one cannot be cleaned up yet. */
goto fail;
}
+ cflags = PyArrayMethod_COMBINED_FLAGS(cflags, nc_flags);
}
else {
transferinfo[iop].read.func = NULL;
@@ -3199,9 +3202,10 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
mask_dtype,
move_references,
&transferinfo[iop].write,
- &needs_api) != NPY_SUCCEED) {
+ &nc_flags) != NPY_SUCCEED) {
goto fail;
}
+ cflags = PyArrayMethod_COMBINED_FLAGS(cflags, nc_flags);
}
else {
if (PyArray_GetDTypeTransferFunction(
@@ -3212,9 +3216,10 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
PyArray_DESCR(op[iop]),
move_references,
&transferinfo[iop].write,
- &needs_api) != NPY_SUCCEED) {
+ &nc_flags) != NPY_SUCCEED) {
goto fail;
}
+ cflags = PyArrayMethod_COMBINED_FLAGS(cflags, nc_flags);
}
}
/* If no write back but there are references make a decref fn */
@@ -3230,9 +3235,10 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
op_dtype[iop], NULL,
1,
&transferinfo[iop].write,
- &needs_api) != NPY_SUCCEED) {
+ &nc_flags) != NPY_SUCCEED) {
goto fail;
}
+ cflags = PyArrayMethod_COMBINED_FLAGS(cflags, nc_flags);
}
else {
transferinfo[iop].write.func = NULL;
@@ -3244,8 +3250,12 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
}
}
- /* If any of the dtype transfer functions needed the API, flag it */
- if (needs_api) {
+ /* Store the combined transfer flags on the iterator */
+ NIT_ITFLAGS(iter) |= cflags << NPY_ITFLAG_TRANSFERFLAGS_SHIFT;
+ assert(NIT_ITFLAGS(iter) >> NPY_ITFLAG_TRANSFERFLAGS_SHIFT == cflags);
+
+ /* If any of the dtype transfer functions needed the API, flag it. */
+ if (cflags & NPY_METH_REQUIRES_PYAPI) {
NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI;
}
diff --git a/numpy/core/src/multiarray/nditer_impl.h b/numpy/core/src/multiarray/nditer_impl.h
index 2a82b7e54..459675ea8 100644
--- a/numpy/core/src/multiarray/nditer_impl.h
+++ b/numpy/core/src/multiarray/nditer_impl.h
@@ -76,33 +76,38 @@
/* Internal iterator flags */
/* The perm is the identity */
-#define NPY_ITFLAG_IDENTPERM 0x0001
+#define NPY_ITFLAG_IDENTPERM (1 << 0)
/* The perm has negative entries (indicating flipped axes) */
-#define NPY_ITFLAG_NEGPERM 0x0002
+#define NPY_ITFLAG_NEGPERM (1 << 1)
/* The iterator is tracking an index */
-#define NPY_ITFLAG_HASINDEX 0x0004
+#define NPY_ITFLAG_HASINDEX (1 << 2)
/* The iterator is tracking a multi-index */
-#define NPY_ITFLAG_HASMULTIINDEX 0x0008
+#define NPY_ITFLAG_HASMULTIINDEX (1 << 3)
/* The iteration order was forced on construction */
-#define NPY_ITFLAG_FORCEDORDER 0x0010
+#define NPY_ITFLAG_FORCEDORDER (1 << 4)
/* The inner loop is handled outside the iterator */
-#define NPY_ITFLAG_EXLOOP 0x0020
+#define NPY_ITFLAG_EXLOOP (1 << 5)
/* The iterator is ranged */
-#define NPY_ITFLAG_RANGE 0x0040
+#define NPY_ITFLAG_RANGE (1 << 6)
/* The iterator is buffered */
-#define NPY_ITFLAG_BUFFER 0x0080
+#define NPY_ITFLAG_BUFFER (1 << 7)
/* The iterator should grow the buffered inner loop when possible */
-#define NPY_ITFLAG_GROWINNER 0x0100
+#define NPY_ITFLAG_GROWINNER (1 << 8)
/* There is just one iteration, can specialize iternext for that */
-#define NPY_ITFLAG_ONEITERATION 0x0200
+#define NPY_ITFLAG_ONEITERATION (1 << 9)
/* Delay buffer allocation until first Reset* call */
-#define NPY_ITFLAG_DELAYBUF 0x0400
+#define NPY_ITFLAG_DELAYBUF (1 << 10)
/* Iteration needs API access during iternext */
-#define NPY_ITFLAG_NEEDSAPI 0x0800
+#define NPY_ITFLAG_NEEDSAPI (1 << 11)
/* Iteration includes one or more operands being reduced */
-#define NPY_ITFLAG_REDUCE 0x1000
+#define NPY_ITFLAG_REDUCE (1 << 12)
/* Reduce iteration doesn't need to recalculate reduce loops next time */
-#define NPY_ITFLAG_REUSE_REDUCE_LOOPS 0x2000
+#define NPY_ITFLAG_REUSE_REDUCE_LOOPS (1 << 13)
+/*
+ * Offset of (combined) ArrayMethod flags for all transfer functions.
+ * For now, we use the top 8 bits.
+ */
+#define NPY_ITFLAG_TRANSFERFLAGS_SHIFT 24
/* Internal iterator per-operand iterator flags */
@@ -356,4 +361,12 @@ npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs);
NPY_NO_EXPORT void
npyiter_clear_buffers(NpyIter *iter);
+/*
+ * Function to get the ArrayMethod flags of the transfer functions.
+ * TODO: This function should be public and removed from `nditer_impl.h`, but
+ * this requires making the ArrayMethod flags public API first.
+ */
+NPY_NO_EXPORT int
+NpyIter_GetTransferFlags(NpyIter *iter);
+
#endif /* NUMPY_CORE_SRC_MULTIARRAY_NDITER_IMPL_H_ */
diff --git a/numpy/core/src/multiarray/textreading/readtext.c b/numpy/core/src/multiarray/textreading/readtext.c
index 9804fd462..a5db1cb77 100644
--- a/numpy/core/src/multiarray/textreading/readtext.c
+++ b/numpy/core/src/multiarray/textreading/readtext.c
@@ -270,6 +270,10 @@ _load_from_filelike(PyObject *NPY_UNUSED(mod),
}
/* Calloc just to not worry about overflow */
usecols = PyMem_Calloc(num_usecols, sizeof(Py_ssize_t));
+ if (usecols == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
for (Py_ssize_t i = 0; i < num_usecols; i++) {
PyObject *tmp = PySequence_GetItem(usecols_obj, i);
if (tmp == NULL) {
diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
index e30ff835e..a72fb79d9 100644
--- a/numpy/core/src/multiarray/textreading/rows.c
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -91,7 +91,7 @@ create_conv_funcs(
if (column < -num_fields || column >= num_fields) {
PyErr_Format(PyExc_ValueError,
"converter specified for column %zd, which is invalid "
- "for the number of fields %d.", column, num_fields);
+ "for the number of fields %zd.", column, num_fields);
goto error;
}
if (column < 0) {
@@ -319,7 +319,7 @@ read_rows(stream *s,
if (!usecols && (actual_num_fields != current_num_fields)) {
PyErr_Format(PyExc_ValueError,
- "the number of columns changed from %d to %d at row %zu; "
+ "the number of columns changed from %zd to %zd at row %zd; "
"use `usecols` to select a subset and avoid this error",
actual_num_fields, current_num_fields, row_count+1);
goto error;
@@ -382,9 +382,9 @@ read_rows(stream *s,
}
if (NPY_UNLIKELY((col < 0) || (col >= current_num_fields))) {
PyErr_Format(PyExc_ValueError,
- "invalid column index %d at row %zu with %d "
+ "invalid column index %zd at row %zd with %zd "
"columns",
- usecols[i], current_num_fields, row_count+1);
+ usecols[i], row_count+1, current_num_fields);
goto error;
}
}
@@ -419,7 +419,7 @@ read_rows(stream *s,
}
PyErr_Format(PyExc_ValueError,
"could not convert string %.100R to %S at "
- "row %zu, column %d.",
+ "row %zd, column %zd.",
string, field_types[f].descr, row_count, col+1);
Py_DECREF(string);
npy_PyErr_ChainExceptionsCause(exc, val, tb);
@@ -432,7 +432,12 @@ read_rows(stream *s,
}
tokenizer_clear(&ts);
- PyMem_FREE(conv_funcs);
+ if (conv_funcs != NULL) {
+ for (Py_ssize_t i = 0; i < actual_num_fields; i++) {
+ Py_XDECREF(conv_funcs[i]);
+ }
+ PyMem_FREE(conv_funcs);
+ }
if (data_array == NULL) {
assert(row_count == 0 && result_shape[0] == 0);
@@ -474,7 +479,12 @@ read_rows(stream *s,
return data_array;
error:
- PyMem_FREE(conv_funcs);
+ if (conv_funcs != NULL) {
+ for (Py_ssize_t i = 0; i < actual_num_fields; i++) {
+ Py_XDECREF(conv_funcs[i]);
+ }
+ PyMem_FREE(conv_funcs);
+ }
tokenizer_clear(&ts);
Py_XDECREF(data_array);
return NULL;
diff --git a/numpy/core/src/npymath/ieee754.c.src b/numpy/core/src/npymath/ieee754.c.src
index 4e6ddb712..5d1ea3a69 100644
--- a/numpy/core/src/npymath/ieee754.c.src
+++ b/numpy/core/src/npymath/ieee754.c.src
@@ -566,228 +566,38 @@ int npy_get_floatstatus() {
return npy_get_floatstatus_barrier(&x);
}
-/*
- * Functions to set the floating point status word.
- */
-
-#if (defined(__unix__) || defined(unix)) && !defined(USG)
-#include <sys/param.h>
-#endif
-
/*
- * Define floating point status functions. We must define
- * npy_get_floatstatus_barrier, npy_clear_floatstatus_barrier,
- * npy_set_floatstatus_{divbyzero, overflow, underflow, invalid}
- * for all supported platforms.
+ * General C99 code for floating point error handling. These functions mainly
+ * exists, because `fenv.h` was not standardized in C89 so they gave better
+ * portability. This should be unnecessary with C99/C++11 and further
+ * functionality can be used from `fenv.h` directly.
*/
-
-
-/* Solaris --------------------------------------------------------*/
-/* --------ignoring SunOS ieee_flags approach, someone else can
-** deal with that! */
-#if defined(sun) || defined(__BSD__) || defined(__OpenBSD__) || \
- (defined(__FreeBSD__) && (__FreeBSD_version < 502114)) || \
- defined(__NetBSD__)
-#include <ieeefp.h>
-
-int npy_get_floatstatus_barrier(char * param)
-{
- int fpstatus = fpgetsticky();
- /*
- * By using a volatile, the compiler cannot reorder this call
- */
- if (param != NULL) {
- volatile char NPY_UNUSED(c) = *(char*)param;
- }
- return ((FP_X_DZ & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
- ((FP_X_OFL & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
- ((FP_X_UFL & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
- ((FP_X_INV & fpstatus) ? NPY_FPE_INVALID : 0);
-}
-
-int npy_clear_floatstatus_barrier(char * param)
-{
- int fpstatus = npy_get_floatstatus_barrier(param);
- fpsetsticky(0);
-
- return fpstatus;
-}
-
-void npy_set_floatstatus_divbyzero(void)
-{
- fpsetsticky(FP_X_DZ);
-}
-
-void npy_set_floatstatus_overflow(void)
-{
- fpsetsticky(FP_X_OFL);
-}
-
-void npy_set_floatstatus_underflow(void)
-{
- fpsetsticky(FP_X_UFL);
-}
-
-void npy_set_floatstatus_invalid(void)
-{
- fpsetsticky(FP_X_INV);
-}
-
-#elif defined(_AIX) && !defined(__GNUC__)
-#include <float.h>
-#include <fpxcp.h>
-
-int npy_get_floatstatus_barrier(char *param)
-{
- int fpstatus = fp_read_flag();
- /*
- * By using a volatile, the compiler cannot reorder this call
- */
- if (param != NULL) {
- volatile char NPY_UNUSED(c) = *(char*)param;
- }
- return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
- ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
- ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
- ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
-}
-
-int npy_clear_floatstatus_barrier(char * param)
-{
- int fpstatus = npy_get_floatstatus_barrier(param);
- fp_swap_flag(0);
-
- return fpstatus;
-}
-
-void npy_set_floatstatus_divbyzero(void)
-{
- fp_raise_xcp(FP_DIV_BY_ZERO);
-}
-
-void npy_set_floatstatus_overflow(void)
-{
- fp_raise_xcp(FP_OVERFLOW);
-}
-
-void npy_set_floatstatus_underflow(void)
-{
- fp_raise_xcp(FP_UNDERFLOW);
-}
-
-void npy_set_floatstatus_invalid(void)
-{
- fp_raise_xcp(FP_INVALID);
-}
-
-#elif defined(_MSC_VER) || (defined(__osf__) && defined(__alpha)) || \
- defined (__UCLIBC__) || (defined(__arc__) && defined(__GLIBC__))
+# include <fenv.h>
/*
- * By using a volatile floating point value,
- * the compiler is forced to actually do the requested
- * operations because of potential concurrency.
- *
- * We shouldn't write multiple values to a single
- * global here, because that would cause
- * a race condition.
+ * According to the C99 standard FE_DIVBYZERO, etc. may not be provided when
+ * unsupported. In such cases NumPy will not report these correctly, but we
+ * should still allow compiling (whether tests pass or not).
+ * By defining them as 0 locally, we make them no-ops. Unlike these defines,
+ * for example `musl` still defines all of the functions (as no-ops):
+ * https://git.musl-libc.org/cgit/musl/tree/src/fenv/fenv.c
+ * and does similar replacement in its tests:
+ * http://nsz.repo.hu/git/?p=libc-test;a=blob;f=src/common/mtest.h;h=706c1ba23ea8989b17a2f72ed1a919e187c06b6a;hb=HEAD#l30
*/
-static volatile double _npy_floatstatus_x,
- _npy_floatstatus_zero = 0.0, _npy_floatstatus_big = 1e300,
- _npy_floatstatus_small = 1e-300, _npy_floatstatus_inf;
-
-void npy_set_floatstatus_divbyzero(void)
-{
- _npy_floatstatus_x = 1.0 / _npy_floatstatus_zero;
-}
-
-void npy_set_floatstatus_overflow(void)
-{
- _npy_floatstatus_x = _npy_floatstatus_big * 1e300;
-}
-
-void npy_set_floatstatus_underflow(void)
-{
- _npy_floatstatus_x = _npy_floatstatus_small * 1e-300;
-}
-
-void npy_set_floatstatus_invalid(void)
-{
- _npy_floatstatus_inf = NPY_INFINITY;
- _npy_floatstatus_x = _npy_floatstatus_inf - NPY_INFINITY;
-}
-
-/* MS Windows -----------------------------------------------------*/
-#if defined(_MSC_VER)
-
-#include <float.h>
-
-int npy_get_floatstatus_barrier(char *param)
-{
- /*
- * By using a volatile, the compiler cannot reorder this call
- */
-#if defined(_WIN64)
- int fpstatus = _statusfp();
-#else
- /* windows enables sse on 32 bit, so check both flags */
- int fpstatus, fpstatus2;
- _statusfp2(&fpstatus, &fpstatus2);
- fpstatus |= fpstatus2;
+#ifndef FE_DIVBYZERO
+ #define FE_DIVBYZERO 0
#endif
- if (param != NULL) {
- volatile char NPY_UNUSED(c) = *(char*)param;
- }
- return ((SW_ZERODIVIDE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
- ((SW_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
- ((SW_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
- ((SW_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
-}
-
-int npy_clear_floatstatus_barrier(char *param)
-{
- int fpstatus = npy_get_floatstatus_barrier(param);
- _clearfp();
-
- return fpstatus;
-}
-
-/* OSF/Alpha (Tru64) ---------------------------------------------*/
-#elif defined(__osf__) && defined(__alpha)
-
-#include <machine/fpu.h>
-
-int npy_get_floatstatus_barrier(char *param)
-{
- unsigned long fpstatus = ieee_get_fp_control();
- /*
- * By using a volatile, the compiler cannot reorder this call
- */
- if (param != NULL) {
- volatile char NPY_UNUSED(c) = *(char*)param;
- }
- return ((IEEE_STATUS_DZE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
- ((IEEE_STATUS_OVF & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
- ((IEEE_STATUS_UNF & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
- ((IEEE_STATUS_INV & fpstatus) ? NPY_FPE_INVALID : 0);
-}
-
-int npy_clear_floatstatus_barrier(char *param)
-{
- int fpstatus = npy_get_floatstatus_barrier(param);
- /* clear status bits as well as disable exception mode if on */
- ieee_set_fp_control(0);
-
- return fpstatus;
-}
-
+#ifndef FE_OVERFLOW
+ #define FE_OVERFLOW 0
+#endif
+#ifndef FE_UNDERFLOW
+ #define FE_UNDERFLOW 0
+#endif
+#ifndef FE_INVALID
+ #define FE_INVALID 0
#endif
-/* End of defined(_MSC_VER) || (defined(__osf__) && defined(__alpha)) */
-#else
-/* General GCC code, should work on most platforms */
-# include <fenv.h>
int npy_get_floatstatus_barrier(char* param)
{
@@ -839,4 +649,3 @@ void npy_set_floatstatus_invalid(void)
feraiseexcept(FE_INVALID);
}
-#endif
diff --git a/numpy/core/src/npymath/ieee754.cpp b/numpy/core/src/npymath/ieee754.cpp
index 2244004c0..27fcf7c6e 100644
--- a/numpy/core/src/npymath/ieee754.cpp
+++ b/numpy/core/src/npymath/ieee754.cpp
@@ -655,6 +655,30 @@ npy_get_floatstatus()
*/
#include <fenv.h>
+/*
+ * According to the C99 standard FE_DIVBYZERO, etc. may not be provided when
+ * unsupported. In such cases NumPy will not report these correctly, but we
+ * should still allow compiling (whether tests pass or not).
+ * By defining them as 0 locally, we make them no-ops. Unlike these defines,
+ * for example `musl` still defines all of the functions (as no-ops):
+ * https://git.musl-libc.org/cgit/musl/tree/src/fenv/fenv.c
+ * and does similar replacement in its tests:
+ * http://nsz.repo.hu/git/?p=libc-test;a=blob;f=src/common/mtest.h;h=706c1ba23ea8989b17a2f72ed1a919e187c06b6a;hb=HEAD#l30
+ */
+#ifndef FE_DIVBYZERO
+ #define FE_DIVBYZERO 0
+#endif
+#ifndef FE_OVERFLOW
+ #define FE_OVERFLOW 0
+#endif
+#ifndef FE_UNDERFLOW
+ #define FE_UNDERFLOW 0
+#endif
+#ifndef FE_INVALID
+ #define FE_INVALID 0
+#endif
+
+
extern "C" int
npy_get_floatstatus_barrier(char *param)
{
diff --git a/numpy/core/src/umath/dispatching.c b/numpy/core/src/umath/dispatching.c
index b8f102b3d..620335d88 100644
--- a/numpy/core/src/umath/dispatching.c
+++ b/numpy/core/src/umath/dispatching.c
@@ -145,6 +145,38 @@ PyUFunc_AddLoop(PyUFuncObject *ufunc, PyObject *info, int ignore_duplicate)
}
+/*
+ * Add loop directly to a ufunc from a given ArrayMethod spec.
+ */
+NPY_NO_EXPORT int
+PyUFunc_AddLoopFromSpec(PyObject *ufunc, PyArrayMethod_Spec *spec)
+{
+ if (!PyObject_TypeCheck(ufunc, &PyUFunc_Type)) {
+ PyErr_SetString(PyExc_TypeError,
+ "ufunc object passed is not a ufunc!");
+ return -1;
+ }
+ PyBoundArrayMethodObject *bmeth =
+ (PyBoundArrayMethodObject *)PyArrayMethod_FromSpec(spec);
+ if (bmeth == NULL) {
+ return -1;
+ }
+ int nargs = bmeth->method->nin + bmeth->method->nout;
+ PyObject *dtypes = PyArray_TupleFromItems(
+ nargs, (PyObject **)bmeth->dtypes, 1);
+ if (dtypes == NULL) {
+ return -1;
+ }
+ PyObject *info = PyTuple_Pack(2, dtypes, bmeth->method);
+ Py_DECREF(bmeth);
+ Py_DECREF(dtypes);
+ if (info == NULL) {
+ return -1;
+ }
+ return PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0);
+}
+
+
/**
* Resolves the implementation to use, this uses typical multiple dispatching
* methods of finding the best matching implementation or resolver.
diff --git a/numpy/core/src/umath/dispatching.h b/numpy/core/src/umath/dispatching.h
index a7e9e88d0..f2ab0be2e 100644
--- a/numpy/core/src/umath/dispatching.h
+++ b/numpy/core/src/umath/dispatching.h
@@ -6,6 +6,9 @@
#include <numpy/ufuncobject.h>
#include "array_method.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
typedef int promoter_function(PyUFuncObject *ufunc,
PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[],
@@ -14,6 +17,9 @@ typedef int promoter_function(PyUFuncObject *ufunc,
NPY_NO_EXPORT int
PyUFunc_AddLoop(PyUFuncObject *ufunc, PyObject *info, int ignore_duplicate);
+NPY_NO_EXPORT int
+PyUFunc_AddLoopFromSpec(PyObject *ufunc, PyArrayMethod_Spec *spec);
+
NPY_NO_EXPORT PyArrayMethodObject *
promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
PyArrayObject *const ops[],
@@ -41,5 +47,8 @@ object_only_ufunc_promoter(PyUFuncObject *ufunc,
NPY_NO_EXPORT int
install_logical_ufunc_promoter(PyObject *ufunc);
+#ifdef __cplusplus
+}
+#endif
#endif /*_NPY_DISPATCHING_H */
diff --git a/numpy/core/src/umath/extobj.c b/numpy/core/src/umath/extobj.c
index 6b9a27e26..893429107 100644
--- a/numpy/core/src/umath/extobj.c
+++ b/numpy/core/src/umath/extobj.c
@@ -267,6 +267,33 @@ _extract_pyvals(PyObject *ref, const char *name, int *bufsize,
}
/*
+ * Handler which uses the default `np.errstate` given that `fpe_errors` is
+ * already set. `fpe_errors` is typically the (nonzero) result of
+ * `npy_get_floatstatus_barrier`.
+ *
+ * Returns -1 on failure (an error was raised) and 0 on success.
+ */
+NPY_NO_EXPORT int
+PyUFunc_GiveFloatingpointErrors(const char *name, int fpe_errors)
+{
+ int bufsize, errmask;
+ PyObject *errobj;
+
+ if (PyUFunc_GetPyValues((char *)name, &bufsize, &errmask,
+ &errobj) < 0) {
+ return -1;
+ }
+ int first = 1;
+ if (PyUFunc_handlefperr(errmask, errobj, fpe_errors, &first)) {
+ Py_XDECREF(errobj);
+ return -1;
+ }
+ Py_XDECREF(errobj);
+ return 0;
+}
+
+
+/*
* check the floating point status
* - errmask: mask of status to check
* - extobj: ufunc pyvals object
diff --git a/numpy/core/src/umath/loops_arithm_fp.dispatch.c.src b/numpy/core/src/umath/loops_arithm_fp.dispatch.c.src
index 51b167844..bf8142880 100644
--- a/numpy/core/src/umath/loops_arithm_fp.dispatch.c.src
+++ b/numpy/core/src/umath/loops_arithm_fp.dispatch.c.src
@@ -1,6 +1,7 @@
/*@targets
** $maxopt baseline
** sse2 avx2 avx512f
+ ** vx vxe
**/
#define _UMATHMODULE
#define _MULTIARRAYMODULE
@@ -364,7 +365,7 @@ sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i
* #type = npy_float, npy_double#
* #TYPE = FLOAT, DOUBLE#
* #sfx = f32, f64#
- * #CHK = , _F64#
+ * #CHK = _F32, _F64#
*/
#if NPY_SIMD@CHK@
/**begin repeat1
@@ -444,7 +445,7 @@ simd_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i
* #type = npy_float, npy_double, npy_longdouble#
* #TYPE = FLOAT, DOUBLE, LONGDOUBLE#
* #vector = 1, 1, 0#
- * #VECTOR = NPY_SIMD, NPY_SIMD_F64, 0 #
+ * #VECTOR = NPY_SIMD_F32, NPY_SIMD_F64, 0 #
*/
/**begin repeat1
* Arithmetic
diff --git a/numpy/core/src/umath/loops_arithmetic.dispatch.c.src b/numpy/core/src/umath/loops_arithmetic.dispatch.c.src
index 16a9eac2e..5b5f13ad1 100644
--- a/numpy/core/src/umath/loops_arithmetic.dispatch.c.src
+++ b/numpy/core/src/umath/loops_arithmetic.dispatch.c.src
@@ -3,6 +3,7 @@
** sse2 sse41 avx2 avx512f avx512_skx
** vsx2 vsx4
** neon
+ ** vx
**/
#define _UMATHMODULE
#define _MULTIARRAYMODULE
@@ -51,13 +52,14 @@ simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len)
const npyv_@sfx@x3 divisor = npyv_divisor_@sfx@(scalar);
if (scalar == -1) {
- npyv_b@len@ noverflow = npyv_cvt_b@len@_@sfx@(npyv_setall_@sfx@(-1));
- npyv_@sfx@ vzero = npyv_zero_@sfx@();
+ npyv_b@len@ noverflow = npyv_cvt_b@len@_@sfx@(npyv_setall_@sfx@(-1));
+ const npyv_@sfx@ vzero = npyv_zero_@sfx@();
+ const npyv_@sfx@ vmin = npyv_setall_@sfx@(NPY_MIN_INT@len@);
for (; len >= vstep; len -= vstep, src += vstep, dst += vstep) {
npyv_@sfx@ a = npyv_load_@sfx@(src);
npyv_b@len@ gt_min = npyv_cmpgt_@sfx@(a, npyv_setall_@sfx@(NPY_MIN_INT@len@));
noverflow = npyv_and_b@len@(noverflow, gt_min);
- npyv_@sfx@ neg = npyv_ifsub_@sfx@(gt_min, vzero, a, vzero);
+ npyv_@sfx@ neg = npyv_ifsub_@sfx@(gt_min, vzero, a, vmin);
npyv_store_@sfx@(dst, neg);
}
@@ -66,13 +68,13 @@ simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len)
npyv_lanetype_@sfx@ a = *src;
if (a == NPY_MIN_INT@len@) {
raise_err = 1;
- *dst = 0;
+ *dst = NPY_MIN_INT@len@;
} else {
*dst = -a;
}
}
if (raise_err) {
- npy_set_floatstatus_divbyzero();
+ npy_set_floatstatus_overflow();
}
} else {
for (; len >= vstep; len -= vstep, src += vstep, dst += vstep) {
@@ -253,7 +255,8 @@ vsx4_simd_divide_contig_@sfx@(char **args, npy_intp len)
const npyv_@sfx@ vneg_one = npyv_setall_@sfx@(-1);
const npyv_@sfx@ vzero = npyv_zero_@sfx@();
const npyv_@sfx@ vmin = npyv_setall_@sfx@(NPY_MIN_INT@len@);
- npyv_b@len@ warn = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());
+ npyv_b@len@ warn_zero = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());
+ npyv_b@len@ warn_overflow = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());
const int vstep = npyv_nlanes_@sfx@;
for (; len >= vstep; len -= vstep, src1 += vstep, src2 += vstep,
@@ -267,10 +270,8 @@ vsx4_simd_divide_contig_@sfx@(char **args, npy_intp len)
npyv_b@len@ amin = npyv_cmpeq_@sfx@(a, vmin);
npyv_b@len@ bneg_one = npyv_cmpeq_@sfx@(b, vneg_one);
npyv_b@len@ overflow = npyv_and_@sfx@(bneg_one, amin);
- npyv_b@len@ error = npyv_or_@sfx@(bzero, overflow);
- // in case of overflow or b = 0, 'cvtozero' forces quo/rem to be 0
- npyv_@sfx@ cvtozero = npyv_select_@sfx@(error, vzero, vneg_one);
- warn = npyv_or_@sfx@(error, warn);
+ warn_zero = npyv_or_@sfx@(bzero, warn_zero);
+ warn_overflow = npyv_or_@sfx@(overflow, warn_overflow);
// handle mixed case the way Python does
// ((a > 0) == (b > 0) || rem == 0)
npyv_b@len@ a_gt_zero = npyv_cmpgt_@sfx@(a, vzero);
@@ -280,21 +281,30 @@ vsx4_simd_divide_contig_@sfx@(char **args, npy_intp len)
npyv_b@len@ or = npyv_or_@sfx@(ab_eq_cond, rem_zero);
npyv_@sfx@ to_sub = npyv_select_@sfx@(or, vzero, vneg_one);
quo = npyv_add_@sfx@(quo, to_sub);
- npyv_store_@sfx@(dst1, npyv_and_@sfx@(cvtozero, quo));
+ // Divide by zero
+ quo = npyv_select_@sfx@(bzero, vzero, quo);
+ // Overflow
+ quo = npyv_select_@sfx@(overflow, vmin, quo);
+ npyv_store_@sfx@(dst1, quo);
}
- if (!vec_all_eq(warn, vzero)) {
+ if (!vec_all_eq(warn_zero, vzero)) {
npy_set_floatstatus_divbyzero();
}
+ if (!vec_all_eq(warn_overflow, vzero)) {
+ npy_set_floatstatus_overflow();
+ }
for (; len > 0; --len, ++src1, ++src2, ++dst1) {
const npyv_lanetype_@sfx@ a = *src1;
const npyv_lanetype_@sfx@ b = *src2;
- if (b == 0 || (a == NPY_MIN_INT@len@ && b == -1)) {
+ if (NPY_UNLIKELY(b == 0)) {
npy_set_floatstatus_divbyzero();
*dst1 = 0;
- }
- else {
+ } else if (NPY_UNLIKELY((a == NPY_MIN_INT@len@) && (b == -1))) {
+ npy_set_floatstatus_overflow();
+ *dst1 = NPY_MIN_INT@len@;
+ } else {
*dst1 = a / b;
if (((a > 0) != (b > 0)) && ((*dst1 * b) != a)) {
*dst1 -= 1;
@@ -340,8 +350,14 @@ NPY_FINLINE @type@ floor_div_@TYPE@(const @type@ n, const @type@ d)
* (i.e. a different approach than npy_set_floatstatus_divbyzero()).
*/
if (NPY_UNLIKELY(d == 0 || (n == NPY_MIN_@TYPE@ && d == -1))) {
- npy_set_floatstatus_divbyzero();
- return 0;
+ if (d == 0) {
+ npy_set_floatstatus_divbyzero();
+ return 0;
+ }
+ else {
+ npy_set_floatstatus_overflow();
+ return NPY_MIN_@TYPE@;
+ }
}
@type@ r = n / d;
// Negative quotients needs to be rounded down
diff --git a/numpy/core/src/umath/loops_comparison.dispatch.c.src b/numpy/core/src/umath/loops_comparison.dispatch.c.src
index 01d58fbf9..2f75593a5 100644
--- a/numpy/core/src/umath/loops_comparison.dispatch.c.src
+++ b/numpy/core/src/umath/loops_comparison.dispatch.c.src
@@ -3,6 +3,7 @@
** sse2 sse42 avx2 avx512f avx512_skx
** vsx2 vsx3
** neon
+ ** vx vxe
**/
#define _UMATHMODULE
#define _MULTIARRAYMODULE
@@ -22,7 +23,7 @@
* #sfx = u8, s8, u16, s16, u32, s32, u64, s64, f32, f64#
* #len = 8, 8, 16, 16, 32, 32, 64, 64, 32, 64#
* #signed = 0, 1, 0, 1, 0, 1, 0, 1, 0, 0#
- * #VECTOR = NPY_SIMD*9, NPY_SIMD_F64#
+ * #VECTOR = NPY_SIMD*8, NPY_SIMD_F32, NPY_SIMD_F64#
*/
/**begin repeat1
* #kind = equal, not_equal, less, less_equal#
@@ -298,7 +299,7 @@ static void simd_binary_scalar2_@kind@_b8(char **args, npy_intp len)
* #bool = 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0#
* #fp = 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1#
* #signed = 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0#
- * #VECTOR = NPY_SIMD*10, NPY_SIMD_F64#
+ * #VECTOR = NPY_SIMD*9, NPY_SIMD_F32, NPY_SIMD_F64#
*/
/**begin repeat1
* #kind = equal, not_equal, less, less_equal#
diff --git a/numpy/core/src/umath/loops_hyperbolic.dispatch.c.src b/numpy/core/src/umath/loops_hyperbolic.dispatch.c.src
index 8cccc18f0..ce4962ce3 100644
--- a/numpy/core/src/umath/loops_hyperbolic.dispatch.c.src
+++ b/numpy/core/src/umath/loops_hyperbolic.dispatch.c.src
@@ -3,6 +3,7 @@
** (avx2 fma3) AVX512_SKX
** vsx2 vsx4
** neon_vfpv4
+ ** vx vxe
**/
#include "numpy/npy_math.h"
#include "simd/simd.h"
@@ -240,6 +241,8 @@ simd_tanh_f64(const double *src, npy_intp ssrc, double *dst, npy_intp sdst, npy_
}
}
#endif // NPY_SIMD_F64
+
+#if NPY_SIMD_F32
static void
simd_tanh_f32(const float *src, npy_intp ssrc, float *dst, npy_intp sdst, npy_intp len)
{
@@ -335,6 +338,7 @@ simd_tanh_f32(const float *src, npy_intp ssrc, float *dst, npy_intp sdst, npy_in
}
}
}
+#endif // NPY_SIMD_F32
#endif // NPY_SIMD_FMA3
/**begin repeat
@@ -342,7 +346,7 @@ simd_tanh_f32(const float *src, npy_intp ssrc, float *dst, npy_intp sdst, npy_in
* #type = float, double#
* #sfx = f32, f64#
* #ssfx = f, #
- * #simd = NPY_SIMD_FMA3, NPY_SIMD_FMA3 && NPY_SIMD_F64#
+ * #simd = NPY_SIMD_FMA3 && NPY_SIMD_F32, NPY_SIMD_FMA3 && NPY_SIMD_F64#
*/
/**begin repeat1
* #func = tanh#
diff --git a/numpy/core/src/umath/loops_minmax.dispatch.c.src b/numpy/core/src/umath/loops_minmax.dispatch.c.src
index ba2288f0b..b4fb205a0 100644
--- a/numpy/core/src/umath/loops_minmax.dispatch.c.src
+++ b/numpy/core/src/umath/loops_minmax.dispatch.c.src
@@ -3,6 +3,7 @@
** neon asimd
** sse2 avx2 avx512_skx
** vsx2
+ ** vx vxe
**/
#define _UMATHMODULE
#define _MULTIARRAYMODULE
@@ -144,7 +145,7 @@ NPY_FINLINE @type@ scalar_@op@_@c_sfx@(@type@ a, @type@ b) {
/**begin repeat
* #sfx = f32, f64#
* #bsfx = b32, b64#
- * #simd_chk = NPY_SIMD, NPY_SIMD_F64#
+ * #simd_chk = NPY_SIMD_F32, NPY_SIMD_F64#
* #scalar_sfx = f, d#
*/
#if @simd_chk@
@@ -196,7 +197,7 @@ NPY_FINLINE @type@ scalar_@op@_@c_sfx@(@type@ a, @type@ b) {
******************************************************************************/
/**begin repeat
* #sfx = s8, u8, s16, u16, s32, u32, s64, u64, f32, f64#
- * #simd_chk = NPY_SIMD*9, NPY_SIMD_F64#
+ * #simd_chk = NPY_SIMD*8, NPY_SIMD_F32, NPY_SIMD_F64#
* #is_fp = 0*8, 1, 1#
* #scalar_sfx = i*8, f, d#
*/
@@ -395,6 +396,9 @@ simd_binary_@intrin@_@sfx@(const npyv_lanetype_@sfx@ *ip1, npy_intp sip1,
#elif NPY_SIMD && NPY_BITSOF_@BTYPE@ == @len@
#if @is_fp@
#define TO_SIMD_SFX(X) X##_f@len@
+ #if NPY_BITSOF_@BTYPE@ == 32 && !NPY_SIMD_F32
+ #undef TO_SIMD_SFX
+ #endif
#if NPY_BITSOF_@BTYPE@ == 64 && !NPY_SIMD_F64
#undef TO_SIMD_SFX
#endif
diff --git a/numpy/core/src/umath/loops_trigonometric.dispatch.c.src b/numpy/core/src/umath/loops_trigonometric.dispatch.c.src
index 44c47d14f..78685e807 100644
--- a/numpy/core/src/umath/loops_trigonometric.dispatch.c.src
+++ b/numpy/core/src/umath/loops_trigonometric.dispatch.c.src
@@ -3,6 +3,7 @@
** (avx2 fma3) avx512f
** vsx2 vsx3 vsx4
** neon_vfpv4
+ ** vxe vxe2
**/
#include "numpy/npy_math.h"
#include "simd/simd.h"
@@ -13,7 +14,7 @@
* - use vectorized version of Payne-Hanek style reduction for large elements or
* when there's no native FUSED support instead of fallback to libc
*/
-#if NPY_SIMD_FMA3 // native support
+#if NPY_SIMD_F32 && NPY_SIMD_FMA3 // native support
/*
* Vectorized Cody-Waite range reduction technique
* Performs the reduction step x* = x - y*C in three steps:
@@ -210,7 +211,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_@func@)
const npy_intp sdst = steps[1] / lsize;
npy_intp len = dimensions[0];
assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0));
-#if NPY_SIMD_FMA3
+#if NPY_SIMD_F32 && NPY_SIMD_FMA3
if (is_mem_overlap(src, steps[0], dst, steps[1], len) ||
!npyv_loadable_stride_f32(ssrc) || !npyv_storable_stride_f32(sdst)
) {
diff --git a/numpy/core/src/umath/loops_unary_fp.dispatch.c.src b/numpy/core/src/umath/loops_unary_fp.dispatch.c.src
index 78e231965..0ac39a9b1 100644
--- a/numpy/core/src/umath/loops_unary_fp.dispatch.c.src
+++ b/numpy/core/src/umath/loops_unary_fp.dispatch.c.src
@@ -3,6 +3,7 @@
** sse2 sse41
** vsx2
** neon asimd
+ ** vx vxe
**/
/**
* Force use SSE only on x86, even if AVX2 or AVX512F are enabled
@@ -18,7 +19,7 @@
/**********************************************************
** Scalars
**********************************************************/
-#if !NPY_SIMD
+#if !NPY_SIMD_F32
NPY_FINLINE float c_recip_f32(float a)
{ return 1.0f / a; }
NPY_FINLINE float c_abs_f32(float a)
@@ -29,7 +30,7 @@ NPY_FINLINE float c_abs_f32(float a)
}
NPY_FINLINE float c_square_f32(float a)
{ return a * a; }
-#endif // !NPY_SIMD
+#endif // !NPY_SIMD_F32
#if !NPY_SIMD_F64
NPY_FINLINE double c_recip_f64(double a)
@@ -147,7 +148,7 @@ NPY_FINLINE double c_square_f64(double a)
/**begin repeat
* #TYPE = FLOAT, DOUBLE#
* #sfx = f32, f64#
- * #VCHK = NPY_SIMD, NPY_SIMD_F64#
+ * #VCHK = NPY_SIMD_F32, NPY_SIMD_F64#
*/
#if @VCHK@
/**begin repeat1
@@ -259,7 +260,7 @@ static void simd_@TYPE@_@kind@_@STYPE@_@DTYPE@
/**begin repeat
* #TYPE = FLOAT, DOUBLE#
* #sfx = f32, f64#
- * #VCHK = NPY_SIMD, NPY_SIMD_F64#
+ * #VCHK = NPY_SIMD_F32, NPY_SIMD_F64#
*/
/**begin repeat1
* #kind = rint, floor, ceil, trunc, sqrt, absolute, square, reciprocal#
diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src
index 4993546f8..ef608378a 100644
--- a/numpy/core/src/umath/scalarmath.c.src
+++ b/numpy/core/src/umath/scalarmath.c.src
@@ -499,17 +499,26 @@ half_ctype_power(npy_half a, npy_half b, npy_half *out)
* #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
* npy_long, npy_ulong, npy_longlong, npy_ulonglong,
* npy_float, npy_double, npy_longdouble#
+ * #NAME = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
+ * LONG, ULONG, LONGLONG, ULONGLONG,
+ * FLOAT, DOUBLE, LONGDOUBLE#
* #uns = (0,1)*5,0*3#
+ * #int = 1*10,0*3#
*/
static NPY_INLINE int
@name@_ctype_negative(@type@ a, @type@ *out)
{
- *out = -a;
#if @uns@
+ *out = -a;
return NPY_FPE_OVERFLOW;
-#else
- return 0;
+#elif @int@
+ if(a == NPY_MIN_@NAME@){
+ *out = a;
+ return NPY_FPE_OVERFLOW;
+ }
#endif
+ *out = -a;
+ return 0;
}
/**end repeat**/
@@ -584,10 +593,15 @@ static NPY_INLINE int
/**begin repeat
* #name = byte, short, int, long, longlong#
* #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong#
+ * #NAME = BYTE, SHORT, INT, LONG, LONGLONG#
*/
static NPY_INLINE int
@name@_ctype_absolute(@type@ a, @type@ *out)
{
+ if (a == NPY_MIN_@NAME@) {
+ *out = a;
+ return NPY_FPE_OVERFLOW;
+ }
*out = (a < 0 ? -a : a);
return 0;
}
@@ -1564,8 +1578,23 @@ static PyObject *
val = PyArrayScalar_VAL(a, @Name@);
+ int retstatus = @name@_ctype_@oper@(val, &out);
- @name@_ctype_@oper@(val, &out);
+ if (retstatus) {
+ int bufsize, errmask;
+ PyObject *errobj;
+
+ if (PyUFunc_GetPyValues("@name@_scalars", &bufsize, &errmask,
+ &errobj) < 0) {
+ return NULL;
+ }
+ int first = 1;
+ if (PyUFunc_handlefperr(errmask, errobj, retstatus, &first)) {
+ Py_XDECREF(errobj);
+ return NULL;
+ }
+ Py_XDECREF(errobj);
+ }
/*
* TODO: Complex absolute should check floating point flags.
diff --git a/numpy/core/src/umath/string_ufuncs.cpp b/numpy/core/src/umath/string_ufuncs.cpp
new file mode 100644
index 000000000..5a35c318b
--- /dev/null
+++ b/numpy/core/src/umath/string_ufuncs.cpp
@@ -0,0 +1,449 @@
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#define _UMATHMODULE
+
+#include "numpy/ndarraytypes.h"
+
+#include "numpyos.h"
+#include "dispatching.h"
+#include "dtypemeta.h"
+#include "common_dtype.h"
+#include "convert_datatype.h"
+
+#include "string_ufuncs.h"
+
+
+template <typename character>
+static NPY_INLINE int
+character_cmp(character a, character b)
+{
+ if (a == b) {
+ return 0;
+ }
+ else if (a < b) {
+ return -1;
+ }
+ else {
+ return 1;
+ }
+}
+
+
+/*
+ * Compare two strings of different length. Note that either string may be
+ * zero padded (trailing zeros are ignored in other words, the shorter word
+ * is always padded with zeros).
+ */
+template <bool rstrip, typename character>
+static NPY_INLINE int
+string_cmp(int len1, const character *str1, int len2, const character *str2)
+{
+ if (rstrip) {
+ /*
+ * Ignore/"trim" trailing whitespace (and 0s). Note that this function
+ * does not support unicode whitespace (and never has).
+ */
+ while (len1 > 0) {
+ character c = str1[len1-1];
+ if (c != (character)0 && !NumPyOS_ascii_isspace(c)) {
+ break;
+ }
+ len1--;
+ }
+ while (len2 > 0) {
+ character c = str2[len2-1];
+ if (c != (character)0 && !NumPyOS_ascii_isspace(c)) {
+ break;
+ }
+ len2--;
+ }
+ }
+
+ int n = PyArray_MIN(len1, len2);
+
+ if (sizeof(character) == 1) {
+ /*
+ * TODO: `memcmp` makes things 2x faster for longer words that match
+ * exactly, but at least 2x slower for short or mismatching ones.
+ */
+ int cmp = memcmp(str1, str2, n);
+ if (cmp != 0) {
+ return cmp;
+ }
+ str1 += n;
+ str2 += n;
+ }
+ else {
+ for (int i = 0; i < n; i++) {
+ int cmp = character_cmp(*str1, *str2);
+ if (cmp != 0) {
+ return cmp;
+ }
+ str1++;
+ str2++;
+ }
+ }
+ if (len1 > len2) {
+ for (int i = n; i < len1; i++) {
+ int cmp = character_cmp(*str1, (character)0);
+ if (cmp != 0) {
+ return cmp;
+ }
+ str1++;
+ }
+ }
+ else if (len2 > len1) {
+ for (int i = n; i < len2; i++) {
+ int cmp = character_cmp((character)0, *str2);
+ if (cmp != 0) {
+ return cmp;
+ }
+ str2++;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ * Helper for templating, avoids warnings about uncovered switch paths.
+ */
+enum class COMP {
+ EQ, NE, LT, LE, GT, GE,
+};
+
+static char const *
+comp_name(COMP comp) {
+ switch(comp) {
+ case COMP::EQ: return "equal";
+ case COMP::NE: return "not_equal";
+ case COMP::LT: return "less";
+ case COMP::LE: return "less_equal";
+ case COMP::GT: return "greater";
+ case COMP::GE: return "greater_equal";
+ default:
+ assert(0);
+ return nullptr;
+ }
+}
+
+
+template <bool rstrip, COMP comp, typename character>
+static int
+string_comparison_loop(PyArrayMethod_Context *context,
+ char *const data[], npy_intp const dimensions[],
+ npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
+{
+ /*
+ * Note, fetching `elsize` from the descriptor is OK even without the GIL,
+ * however it may be that this should be moved into `auxdata` eventually,
+ * which may also be slightly faster/cleaner (but more involved).
+ */
+ int len1 = context->descriptors[0]->elsize / sizeof(character);
+ int len2 = context->descriptors[1]->elsize / sizeof(character);
+
+ char *in1 = data[0];
+ char *in2 = data[1];
+ char *out = data[2];
+
+ npy_intp N = dimensions[0];
+
+ while (N--) {
+ int cmp = string_cmp<rstrip>(
+ len1, (character *)in1, len2, (character *)in2);
+ npy_bool res;
+ switch (comp) {
+ case COMP::EQ:
+ res = cmp == 0;
+ break;
+ case COMP::NE:
+ res = cmp != 0;
+ break;
+ case COMP::LT:
+ res = cmp < 0;
+ break;
+ case COMP::LE:
+ res = cmp <= 0;
+ break;
+ case COMP::GT:
+ res = cmp > 0;
+ break;
+ case COMP::GE:
+ res = cmp >= 0;
+ break;
+ }
+ *(npy_bool *)out = res;
+
+ in1 += strides[0];
+ in2 += strides[1];
+ out += strides[2];
+ }
+ return 0;
+}
+
+
+/*
+ * Machinery to add the string loops to the existing ufuncs.
+ */
+
+/*
+ * This function replaces the strided loop with the passed in one,
+ * and registers it with the given ufunc.
+ */
+static int
+add_loop(PyObject *umath, const char *ufunc_name,
+ PyArrayMethod_Spec *spec, PyArrayMethod_StridedLoop *loop)
+{
+ PyObject *name = PyUnicode_FromString(ufunc_name);
+ if (name == nullptr) {
+ return -1;
+ }
+ PyObject *ufunc = PyObject_GetItem(umath, name);
+ Py_DECREF(name);
+ if (ufunc == nullptr) {
+ return -1;
+ }
+ spec->slots[0].pfunc = (void *)loop;
+
+ int res = PyUFunc_AddLoopFromSpec(ufunc, spec);
+ Py_DECREF(ufunc);
+ return res;
+}
+
+
+template<bool rstrip, typename character, COMP...>
+struct add_loops;
+
+template<bool rstrip, typename character>
+struct add_loops<rstrip, character> {
+ int operator()(PyObject*, PyArrayMethod_Spec*) {
+ return 0;
+ }
+};
+
+template<bool rstrip, typename character, COMP comp, COMP... comps>
+struct add_loops<rstrip, character, comp, comps...> {
+ int operator()(PyObject* umath, PyArrayMethod_Spec* spec) {
+ PyArrayMethod_StridedLoop* loop = string_comparison_loop<rstrip, comp, character>;
+
+ if (add_loop(umath, comp_name(comp), spec, loop) < 0) {
+ return -1;
+ }
+ else {
+ return add_loops<rstrip, character, comps...>()(umath, spec);
+ }
+ }
+};
+
+
+NPY_NO_EXPORT int
+init_string_ufuncs(PyObject *umath)
+{
+ int res = -1;
+ /* NOTE: This should receive global symbols? */
+ PyArray_DTypeMeta *String = PyArray_DTypeFromTypeNum(NPY_STRING);
+ PyArray_DTypeMeta *Unicode = PyArray_DTypeFromTypeNum(NPY_UNICODE);
+ PyArray_DTypeMeta *Bool = PyArray_DTypeFromTypeNum(NPY_BOOL);
+
+ /* We start with the string loops: */
+ PyArray_DTypeMeta *dtypes[] = {String, String, Bool};
+ /*
+ * We only have one loop right now, the strided one. The default type
+ * resolver ensures native byte order/canonical representation.
+ */
+ PyType_Slot slots[] = {
+ {NPY_METH_strided_loop, nullptr},
+ {0, nullptr}
+ };
+
+ PyArrayMethod_Spec spec = {};
+ spec.name = "templated_string_comparison";
+ spec.nin = 2;
+ spec.nout = 1;
+ spec.dtypes = dtypes;
+ spec.slots = slots;
+ spec.flags = NPY_METH_NO_FLOATINGPOINT_ERRORS;
+
+ /* All String loops */
+ using string_looper = add_loops<false, npy_byte, COMP::EQ, COMP::NE, COMP::LT, COMP::LE, COMP::GT, COMP::GE>;
+ if (string_looper()(umath, &spec) < 0) {
+ goto finish;
+ }
+
+ /* All Unicode loops */
+ using ucs_looper = add_loops<false, npy_ucs4, COMP::EQ, COMP::NE, COMP::LT, COMP::LE, COMP::GT, COMP::GE>;
+ dtypes[0] = Unicode;
+ dtypes[1] = Unicode;
+ if (ucs_looper()(umath, &spec) < 0) {
+ goto finish;
+ }
+
+ res = 0;
+ finish:
+ Py_DECREF(String);
+ Py_DECREF(Unicode);
+ Py_DECREF(Bool);
+ return res;
+}
+
+
+template <bool rstrip, typename character>
+static PyArrayMethod_StridedLoop *
+get_strided_loop(int comp)
+{
+ switch (comp) {
+ case Py_EQ:
+ return string_comparison_loop<rstrip, COMP::EQ, character>;
+ case Py_NE:
+ return string_comparison_loop<rstrip, COMP::NE, character>;
+ case Py_LT:
+ return string_comparison_loop<rstrip, COMP::LT, character>;
+ case Py_LE:
+ return string_comparison_loop<rstrip, COMP::LE, character>;
+ case Py_GT:
+ return string_comparison_loop<rstrip, COMP::GT, character>;
+ case Py_GE:
+ return string_comparison_loop<rstrip, COMP::GE, character>;
+ default:
+ assert(false); /* caller ensures this */
+ }
+ return nullptr;
+}
+
+
+/*
+ * This function is used for `compare_chararrays` and currently also void
+ * comparisons (unstructured voids). The first could probably be deprecated
+ * and removed but is used by `np.char.chararray` the latter should also be
+ * moved to the ufunc probably (removing the need for manual looping).
+ *
+ * The `rstrip` mechanism is presumably for some fortran compat, but the
+ * question is whether it would not be better to have/use `rstrip` on such
+ * an array first...
+ *
+ * NOTE: This function is also used for unstructured voids, this works because
+ * `npy_byte` is correct.
+ */
+NPY_NO_EXPORT PyObject *
+_umath_strings_richcompare(
+ PyArrayObject *self, PyArrayObject *other, int cmp_op, int rstrip)
+{
+ NpyIter *iter = nullptr;
+ PyObject *result = nullptr;
+
+ char **dataptr = nullptr;
+ npy_intp *strides = nullptr;
+ npy_intp *countptr = nullptr;
+ npy_intp size = 0;
+
+ PyArrayMethod_Context context = {};
+ NpyIter_IterNextFunc *iternext = nullptr;
+
+ npy_uint32 it_flags = (
+ NPY_ITER_EXTERNAL_LOOP | NPY_ITER_ZEROSIZE_OK |
+ NPY_ITER_BUFFERED | NPY_ITER_GROWINNER);
+ npy_uint32 op_flags[3] = {
+ NPY_ITER_READONLY | NPY_ITER_ALIGNED,
+ NPY_ITER_READONLY | NPY_ITER_ALIGNED,
+ NPY_ITER_WRITEONLY | NPY_ITER_ALLOCATE | NPY_ITER_ALIGNED};
+
+ PyArrayMethod_StridedLoop *strided_loop = nullptr;
+ NPY_BEGIN_THREADS_DEF;
+
+ if (PyArray_TYPE(self) != PyArray_TYPE(other)) {
+ /*
+ * Comparison between Bytes and Unicode is not defined in Py3K;
+ * we follow.
+ * TODO: This makes no sense at all for `compare_chararrays`, kept
+ * only under the assumption that we are more likely to deprecate
+ * than fix it to begin with.
+ */
+ Py_INCREF(Py_NotImplemented);
+ return Py_NotImplemented;
+ }
+
+ PyArrayObject *ops[3] = {self, other, nullptr};
+ PyArray_Descr *descrs[3] = {nullptr, nullptr, PyArray_DescrFromType(NPY_BOOL)};
+ /* TODO: ensuring native byte order is not really necessary for == and != */
+ descrs[0] = NPY_DT_CALL_ensure_canonical(PyArray_DESCR(self));
+ if (descrs[0] == nullptr) {
+ goto finish;
+ }
+ descrs[1] = NPY_DT_CALL_ensure_canonical(PyArray_DESCR(other));
+ if (descrs[1] == nullptr) {
+ goto finish;
+ }
+
+ /*
+ * Create the iterator:
+ */
+ iter = NpyIter_AdvancedNew(
+ 3, ops, it_flags, NPY_KEEPORDER, NPY_SAFE_CASTING, op_flags, descrs,
+ -1, nullptr, nullptr, 0);
+ if (iter == nullptr) {
+ goto finish;
+ }
+
+ size = NpyIter_GetIterSize(iter);
+ if (size == 0) {
+ result = (PyObject *)NpyIter_GetOperandArray(iter)[2];
+ Py_INCREF(result);
+ goto finish;
+ }
+
+ iternext = NpyIter_GetIterNext(iter, nullptr);
+ if (iternext == nullptr) {
+ goto finish;
+ }
+
+ /*
+ * Prepare the inner-loop and execute it (we only need descriptors to be
+ * passed in).
+ */
+ context.descriptors = descrs;
+
+ dataptr = NpyIter_GetDataPtrArray(iter);
+ strides = NpyIter_GetInnerStrideArray(iter);
+ countptr = NpyIter_GetInnerLoopSizePtr(iter);
+
+ if (rstrip == 0) {
+ /* NOTE: Also used for VOID, so can be STRING, UNICODE, or VOID: */
+ if (descrs[0]->type_num != NPY_UNICODE) {
+ strided_loop = get_strided_loop<false, npy_byte>(cmp_op);
+ }
+ else {
+ strided_loop = get_strided_loop<false, npy_ucs4>(cmp_op);
+ }
+ }
+ else {
+ if (descrs[0]->type_num != NPY_UNICODE) {
+ strided_loop = get_strided_loop<true, npy_byte>(cmp_op);
+ }
+ else {
+ strided_loop = get_strided_loop<true, npy_ucs4>(cmp_op);
+ }
+ }
+
+ NPY_BEGIN_THREADS_THRESHOLDED(size);
+
+ do {
+ /* We know the loop cannot fail */
+ strided_loop(&context, dataptr, countptr, strides, nullptr);
+ } while (iternext(iter) != 0);
+
+ NPY_END_THREADS;
+
+ result = (PyObject *)NpyIter_GetOperandArray(iter)[2];
+ Py_INCREF(result);
+
+ finish:
+ if (NpyIter_Deallocate(iter) < 0) {
+ Py_CLEAR(result);
+ }
+ Py_XDECREF(descrs[0]);
+ Py_XDECREF(descrs[1]);
+ Py_XDECREF(descrs[2]);
+ return result;
+}
diff --git a/numpy/core/src/umath/string_ufuncs.h b/numpy/core/src/umath/string_ufuncs.h
new file mode 100644
index 000000000..aa1719954
--- /dev/null
+++ b/numpy/core/src/umath/string_ufuncs.h
@@ -0,0 +1,19 @@
+#ifndef _NPY_CORE_SRC_UMATH_STRING_UFUNCS_H_
+#define _NPY_CORE_SRC_UMATH_STRING_UFUNCS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+NPY_NO_EXPORT int
+init_string_ufuncs(PyObject *umath);
+
+NPY_NO_EXPORT PyObject *
+_umath_strings_richcompare(
+ PyArrayObject *self, PyArrayObject *other, int cmp_op, int rstrip);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _NPY_CORE_SRC_UMATH_STRING_UFUNCS_H_ */ \ No newline at end of file
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index fce7d61de..2636396d3 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -57,6 +57,10 @@
#include "legacy_array_method.h"
#include "abstractdtypes.h"
+/* TODO: Only for `NpyIter_GetTransferFlags` until it is public */
+#define NPY_ITERATOR_IMPLEMENTATION_CODE
+#include "nditer_impl.h"
+
/********** PRINTF DEBUG TRACING **************/
#define NPY_UF_DBG_TRACING 0
@@ -1544,10 +1548,6 @@ execute_ufunc_loop(PyArrayMethod_Context *context, int masked,
if (masked) {
baseptrs[nop] = PyArray_BYTES(op_it[nop]);
}
- if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) {
- NpyIter_Deallocate(iter);
- return -1;
- }
/*
* Get the inner loop, with the possibility of specialization
@@ -1584,17 +1584,25 @@ execute_ufunc_loop(PyArrayMethod_Context *context, int masked,
char **dataptr = NpyIter_GetDataPtrArray(iter);
npy_intp *strides = NpyIter_GetInnerStrideArray(iter);
npy_intp *countptr = NpyIter_GetInnerLoopSizePtr(iter);
- int needs_api = NpyIter_IterationNeedsAPI(iter);
NPY_BEGIN_THREADS_DEF;
+ flags = PyArrayMethod_COMBINED_FLAGS(flags, NpyIter_GetTransferFlags(iter));
+
if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
npy_clear_floatstatus_barrier((char *)context);
}
- if (!needs_api && !(flags & NPY_METH_REQUIRES_PYAPI)) {
+ if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
NPY_BEGIN_THREADS_THRESHOLDED(full_size);
}
+ /* The reset may copy the first buffer chunk, which could cause FPEs */
+ if (NpyIter_ResetBasePointers(iter, baseptrs, NULL) != NPY_SUCCEED) {
+ NPY_AUXDATA_FREE(auxdata);
+ NpyIter_Deallocate(iter);
+ return -1;
+ }
+
NPY_UF_DBG_PRINT("Actual inner loop:\n");
/* Execute the loop */
int res;
@@ -2388,7 +2396,8 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc,
NPY_ITER_MULTI_INDEX |
NPY_ITER_REFS_OK |
NPY_ITER_ZEROSIZE_OK |
- NPY_ITER_COPY_IF_OVERLAP;
+ NPY_ITER_COPY_IF_OVERLAP |
+ NPY_ITER_DELAY_BUFALLOC;
/* Create the iterator */
iter = NpyIter_AdvancedNew(nop, op, iter_flags,
diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c
index 49328d19e..17fedec6f 100644
--- a/numpy/core/src/umath/umathmodule.c
+++ b/numpy/core/src/umath/umathmodule.c
@@ -23,11 +23,13 @@
#include "numpy/npy_math.h"
#include "number.h"
#include "dispatching.h"
+#include "string_ufuncs.h"
/* Automatically generated code to define all ufuncs: */
#include "funcs.inc"
#include "__umath_generated.c"
+
static PyUFuncGenericFunction pyfunc_functions[] = {PyUFunc_On_Om};
static int
@@ -347,5 +349,10 @@ int initumath(PyObject *m)
if (install_logical_ufunc_promoter(s) < 0) {
return -1;
}
+
+ if (init_string_ufuncs(d) < 0) {
+ return -1;
+ }
+
return 0;
}
diff --git a/numpy/core/tests/test_abc.py b/numpy/core/tests/test_abc.py
index 30e5748af..8b12d07ac 100644
--- a/numpy/core/tests/test_abc.py
+++ b/numpy/core/tests/test_abc.py
@@ -20,35 +20,35 @@ class TestABC:
def test_floats(self):
for t in sctypes['float']:
assert_(isinstance(t(), numbers.Real),
- "{0} is not instance of Real".format(t.__name__))
+ f"{t.__name__} is not instance of Real")
assert_(issubclass(t, numbers.Real),
- "{0} is not subclass of Real".format(t.__name__))
+ f"{t.__name__} is not subclass of Real")
assert_(not isinstance(t(), numbers.Rational),
- "{0} is instance of Rational".format(t.__name__))
+ f"{t.__name__} is instance of Rational")
assert_(not issubclass(t, numbers.Rational),
- "{0} is subclass of Rational".format(t.__name__))
+ f"{t.__name__} is subclass of Rational")
def test_complex(self):
for t in sctypes['complex']:
assert_(isinstance(t(), numbers.Complex),
- "{0} is not instance of Complex".format(t.__name__))
+ f"{t.__name__} is not instance of Complex")
assert_(issubclass(t, numbers.Complex),
- "{0} is not subclass of Complex".format(t.__name__))
+ f"{t.__name__} is not subclass of Complex")
assert_(not isinstance(t(), numbers.Real),
- "{0} is instance of Real".format(t.__name__))
+ f"{t.__name__} is instance of Real")
assert_(not issubclass(t, numbers.Real),
- "{0} is subclass of Real".format(t.__name__))
+ f"{t.__name__} is subclass of Real")
def test_int(self):
for t in sctypes['int']:
assert_(isinstance(t(), numbers.Integral),
- "{0} is not instance of Integral".format(t.__name__))
+ f"{t.__name__} is not instance of Integral")
assert_(issubclass(t, numbers.Integral),
- "{0} is not subclass of Integral".format(t.__name__))
+ f"{t.__name__} is not subclass of Integral")
def test_uint(self):
for t in sctypes['uint']:
assert_(isinstance(t(), numbers.Integral),
- "{0} is not instance of Integral".format(t.__name__))
+ f"{t.__name__} is not instance of Integral")
assert_(issubclass(t, numbers.Integral),
- "{0} is not subclass of Integral".format(t.__name__))
+ f"{t.__name__} is not subclass of Integral")
diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
index e858cd8b6..ed3ef7e67 100644
--- a/numpy/core/tests/test_array_coercion.py
+++ b/numpy/core/tests/test_array_coercion.py
@@ -373,28 +373,29 @@ class TestScalarDiscovery:
assert discovered_dtype.itemsize == dtype.itemsize
@pytest.mark.parametrize("dtype", np.typecodes["Integer"])
- def test_scalar_to_int_coerce_does_not_cast(self, dtype):
+ @pytest.mark.parametrize(["scalar", "error"],
+ [(np.float64(np.nan), ValueError),
+ (np.ulonglong(-1), OverflowError)])
+ def test_scalar_to_int_coerce_does_not_cast(self, dtype, scalar, error):
"""
Signed integers are currently different in that they do not cast other
NumPy scalar, but instead use scalar.__int__(). The hardcoded
exception to this rule is `np.array(scalar, dtype=integer)`.
"""
dtype = np.dtype(dtype)
- invalid_int = np.ulonglong(-1)
- float_nan = np.float64(np.nan)
-
- for scalar in [float_nan, invalid_int]:
- # This is a special case using casting logic and thus not failing:
+ # This is a special case using casting logic. It warns for the NaN
+ # but allows the cast (giving undefined behaviour).
+ with np.errstate(invalid="ignore"):
coerced = np.array(scalar, dtype=dtype)
cast = np.array(scalar).astype(dtype)
- assert_array_equal(coerced, cast)
+ assert_array_equal(coerced, cast)
- # However these fail:
- with pytest.raises((ValueError, OverflowError)):
- np.array([scalar], dtype=dtype)
- with pytest.raises((ValueError, OverflowError)):
- cast[()] = scalar
+ # However these fail:
+ with pytest.raises(error):
+ np.array([scalar], dtype=dtype)
+ with pytest.raises(error):
+ cast[()] = scalar
class TestTimeScalars:
@@ -614,8 +615,8 @@ class TestBadSequences:
obj.append([2, 3])
obj.append(mylist([1, 2]))
- with pytest.raises(RuntimeError):
- np.array(obj)
+ # Does not crash:
+ np.array(obj)
def test_replace_0d_array(self):
# List to coerce, `mylist` will mutate the first element
diff --git a/numpy/core/tests/test_casting_floatingpoint_errors.py b/numpy/core/tests/test_casting_floatingpoint_errors.py
new file mode 100644
index 000000000..4fafc4ed8
--- /dev/null
+++ b/numpy/core/tests/test_casting_floatingpoint_errors.py
@@ -0,0 +1,153 @@
+import pytest
+from pytest import param
+
+import numpy as np
+
+
+def values_and_dtypes():
+ """
+ Generate value+dtype pairs that generate floating point errors during
+ casts. The invalid casts to integers will generate "invalid" value
+ warnings, the float casts all generate "overflow".
+
+ (The Python int/float paths don't need to get tested in all the same
+ situations, but it does not hurt.)
+ """
+ # Casting to float16:
+ yield param(70000, "float16", id="int-to-f2")
+ yield param("70000", "float16", id="str-to-f2")
+ yield param(70000.0, "float16", id="float-to-f2")
+ yield param(np.longdouble(70000.), "float16", id="longdouble-to-f2")
+ yield param(np.float64(70000.), "float16", id="double-to-f2")
+ yield param(np.float32(70000.), "float16", id="float-to-f2")
+ # Casting to float32:
+ yield param(10**100, "float32", id="int-to-f4")
+ yield param(1e100, "float32", id="float-to-f2")
+ yield param(np.longdouble(1e300), "float32", id="longdouble-to-f2")
+ yield param(np.float64(1e300), "float32", id="double-to-f2")
+ # Casting to float64:
+ # If longdouble is double-double, its max can be rounded down to the double
+ # max. So we correct the double spacing (a bit weird, admittedly):
+ max_ld = np.finfo(np.longdouble).max
+ spacing = np.spacing(np.nextafter(np.finfo("f8").max, 0))
+ if max_ld - spacing > np.finfo("f8").max:
+ yield param(np.finfo(np.longdouble).max, "float64",
+ id="longdouble-to-f8")
+
+ # Cast to complex32:
+ yield param(2e300, "complex64", id="float-to-c8")
+ yield param(2e300+0j, "complex64", id="complex-to-c8")
+ yield param(2e300j, "complex64", id="complex-to-c8")
+ yield param(np.longdouble(2e300), "complex64", id="longdouble-to-c8")
+
+ # Invalid float to integer casts:
+ with np.errstate(over="ignore"):
+ for to_dt in np.typecodes["AllInteger"]:
+ for value in [np.inf, np.nan]:
+ for from_dt in np.typecodes["AllFloat"]:
+ from_dt = np.dtype(from_dt)
+ from_val = from_dt.type(value)
+
+ yield param(from_val, to_dt, id=f"{from_val}-to-{to_dt}")
+
+
+def check_operations(dtype, value):
+ """
+ There are many dedicated paths in NumPy which cast and should check for
+ floating point errors which occurred during those casts.
+ """
+ if dtype.kind != 'i':
+ # These assignments use the stricter setitem logic:
+ def assignment():
+ arr = np.empty(3, dtype=dtype)
+ arr[0] = value
+
+ yield assignment
+
+ def fill():
+ arr = np.empty(3, dtype=dtype)
+ arr.fill(value)
+
+ yield fill
+
+ def copyto_scalar():
+ arr = np.empty(3, dtype=dtype)
+ np.copyto(arr, value, casting="unsafe")
+
+ yield copyto_scalar
+
+ def copyto():
+ arr = np.empty(3, dtype=dtype)
+ np.copyto(arr, np.array([value, value, value]), casting="unsafe")
+
+ yield copyto
+
+ def copyto_scalar_masked():
+ arr = np.empty(3, dtype=dtype)
+ np.copyto(arr, value, casting="unsafe",
+ where=[True, False, True])
+
+ yield copyto_scalar_masked
+
+ def copyto_masked():
+ arr = np.empty(3, dtype=dtype)
+ np.copyto(arr, np.array([value, value, value]), casting="unsafe",
+ where=[True, False, True])
+
+ yield copyto_masked
+
+ def direct_cast():
+ np.array([value, value, value]).astype(dtype)
+
+ yield direct_cast
+
+ def direct_cast_nd_strided():
+ arr = np.full((5, 5, 5), fill_value=value)[:, ::2, :]
+ arr.astype(dtype)
+
+ yield direct_cast_nd_strided
+
+ def boolean_array_assignment():
+ arr = np.empty(3, dtype=dtype)
+ arr[[True, False, True]] = np.array([value, value])
+
+ yield boolean_array_assignment
+
+ def integer_array_assignment():
+ arr = np.empty(3, dtype=dtype)
+ values = np.array([value, value])
+
+ arr[[0, 1]] = values
+
+ yield integer_array_assignment
+
+ def integer_array_assignment_with_subspace():
+ arr = np.empty((5, 3), dtype=dtype)
+ values = np.array([value, value, value])
+
+ arr[[0, 2]] = values
+
+ yield integer_array_assignment_with_subspace
+
+ def flat_assignment():
+ arr = np.empty((3,), dtype=dtype)
+ values = np.array([value, value, value])
+ arr.flat[:] = values
+
+ yield flat_assignment
+
+@pytest.mark.parametrize(["value", "dtype"], values_and_dtypes())
+@pytest.mark.filterwarnings("ignore::numpy.ComplexWarning")
+def test_floatingpoint_errors_casting(dtype, value):
+ dtype = np.dtype(dtype)
+ for operation in check_operations(dtype, value):
+ dtype = np.dtype(dtype)
+
+ match = "invalid" if dtype.kind in 'iu' else "overflow"
+ with pytest.warns(RuntimeWarning, match=match):
+ operation()
+
+ with np.errstate(all="raise"):
+ with pytest.raises(FloatingPointError, match=match):
+ operation()
+
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 2b7864433..2255cb2a3 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -166,7 +166,7 @@ class TestComparisonDeprecations(_DeprecationTestCase):
# For two string arrays, strings always raised the broadcasting error:
a = np.array(['a', 'b'])
b = np.array(['a', 'b', 'c'])
- assert_raises(ValueError, lambda x, y: x == y, a, b)
+ assert_warns(FutureWarning, lambda x, y: x == y, a, b)
# The empty list is not cast to string, and this used to pass due
# to dtype mismatch; now (2018-06-21) it correctly leads to a
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index 32e2c6842..b37bded73 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -1346,6 +1346,16 @@ class TestPromotion:
match=r".* no common DType exists for the given inputs"):
np.result_type(1j, rational(1, 2))
+ @pytest.mark.parametrize("val", [2, 2**32, 2**63, 2**64, 2*100])
+ def test_python_integer_promotion(self, val):
+ # If we only path scalars (mainly python ones!), the result must take
+ # into account that the integer may be considered int32, int64, uint64,
+ # or object depending on the input value. So test those paths!
+ expected_dtype = np.result_type(np.array(val).dtype, np.array(0).dtype)
+ assert np.result_type(val, 0) == expected_dtype
+ # For completeness sake, also check with a NumPy scalar as second arg:
+ assert np.result_type(val, np.int8(0)) == expected_dtype
+
@pytest.mark.parametrize(["other", "expected"],
[(1, rational), (1., np.float64)])
def test_float_int_pyscalar_promote_rational(self, other, expected):
diff --git a/numpy/core/tests/test_half.py b/numpy/core/tests/test_half.py
index 1b6fd21e1..6743dfb51 100644
--- a/numpy/core/tests/test_half.py
+++ b/numpy/core/tests/test_half.py
@@ -104,9 +104,9 @@ class TestHalf:
# Increase the float by a minimal value:
if offset == "up":
- f16s_float = np.nextafter(f16s_float, float_t(1e50))
+ f16s_float = np.nextafter(f16s_float, float_t(np.inf))
elif offset == "down":
- f16s_float = np.nextafter(f16s_float, float_t(-1e50))
+ f16s_float = np.nextafter(f16s_float, float_t(-np.inf))
# Convert back to float16 and its bit pattern:
res_patterns = f16s_float.astype(np.float16).view(np.uint16)
@@ -233,12 +233,14 @@ class TestHalf:
np.inf]
# Check float64->float16 rounding
- b = np.array(a, dtype=float16)
+ with np.errstate(over="ignore"):
+ b = np.array(a, dtype=float16)
assert_equal(b, rounded)
# Check float32->float16 rounding
a = np.array(a, dtype=float32)
- b = np.array(a, dtype=float16)
+ with np.errstate(over="ignore"):
+ b = np.array(a, dtype=float16)
assert_equal(b, rounded)
def test_half_correctness(self):
diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py
index efcb92c2e..9ef30eae2 100644
--- a/numpy/core/tests/test_indexing.py
+++ b/numpy/core/tests/test_indexing.py
@@ -1297,11 +1297,10 @@ class TestBooleanIndexing:
def test_boolean_indexing_weirdness(self):
# Weird boolean indexing things
a = np.ones((2, 3, 4))
- a[False, True, ...].shape == (0, 2, 3, 4)
- a[True, [0, 1], True, True, [1], [[2]]] == (1, 2)
+ assert a[False, True, ...].shape == (0, 2, 3, 4)
+ assert a[True, [0, 1], True, True, [1], [[2]]].shape == (1, 2)
assert_raises(IndexError, lambda: a[False, [0, 1], ...])
-
def test_boolean_indexing_fast_path(self):
# These used to either give the wrong error, or incorrectly give no
# error.
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index f4454130d..84fdf545f 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -68,8 +68,8 @@ def _aligned_zeros(shape, dtype=float, order="C", align=None):
# Note: slices producing 0-size arrays do not necessarily change
# data pointer --- so we use and allocate size+1
buf = buf[offset:offset+size+1][:-1]
+ buf.fill(0)
data = np.ndarray(shape, dtype, buf, order=order)
- data.fill(0)
return data
@@ -1244,6 +1244,18 @@ class TestStructured:
# The main importance is that it does not return True:
with pytest.raises(TypeError):
x == y
+
+ def test_empty_structured_array_comparison(self):
+ # Check that comparison works on empty arrays with nontrivially
+ # shaped fields
+ a = np.zeros(0, [('a', '<f8', (1, 1))])
+ assert_equal(a, a)
+ a = np.zeros(0, [('a', '<f8', (1,))])
+ assert_equal(a, a)
+ a = np.zeros((0, 0), [('a', '<f8', (1, 1))])
+ assert_equal(a, a)
+ a = np.zeros((1, 0, 1), [('a', '<f8', (1, 1))])
+ assert_equal(a, a)
def test_structured_comparisons_with_promotion(self):
# Check that structured arrays can be compared so long as their
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index 0b03c6576..5b15e29b4 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -2939,7 +2939,9 @@ class TestLikeFuncs:
self.check_like_function(np.full_like, 1, True)
self.check_like_function(np.full_like, 1000, True)
self.check_like_function(np.full_like, 123.456, True)
- self.check_like_function(np.full_like, np.inf, True)
+ # Inf to integer casts cause invalid-value errors: ignore them.
+ with np.errstate(invalid="ignore"):
+ self.check_like_function(np.full_like, np.inf, True)
@pytest.mark.parametrize('likefunc', [np.empty_like, np.full_like,
np.zeros_like, np.ones_like])
diff --git a/numpy/core/tests/test_overrides.py b/numpy/core/tests/test_overrides.py
index 36970dbc0..e68406ebd 100644
--- a/numpy/core/tests/test_overrides.py
+++ b/numpy/core/tests/test_overrides.py
@@ -355,6 +355,45 @@ class TestArrayFunctionImplementation:
TypeError, "no implementation found for 'my.func'"):
func(MyArray())
+ def test_signature_error_message(self):
+ # The lambda function will be named "<lambda>", but the TypeError
+ # should show the name as "func"
+ def _dispatcher():
+ return ()
+
+ @array_function_dispatch(_dispatcher)
+ def func():
+ pass
+
+ try:
+ func(bad_arg=3)
+ except TypeError as e:
+ expected_exception = e
+
+ try:
+ func(bad_arg=3)
+ raise AssertionError("must fail")
+ except TypeError as exc:
+ assert exc.args == expected_exception.args
+
+ @pytest.mark.parametrize("value", [234, "this func is not replaced"])
+ def test_dispatcher_error(self, value):
+ # If the dispatcher raises an error, we must not attempt to mutate it
+ error = TypeError(value)
+
+ def dispatcher():
+ raise error
+
+ @array_function_dispatch(dispatcher)
+ def func():
+ return 3
+
+ try:
+ func()
+ raise AssertionError("must fail")
+ except TypeError as exc:
+ assert exc is error # unmodified exception
+
class TestNDArrayMethods:
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index 98e0df9b8..4538c825d 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -326,20 +326,20 @@ class TestRegression:
assert_raises(ValueError, bfa)
assert_raises(ValueError, bfb)
- def test_nonarray_assignment(self):
+ @pytest.mark.parametrize("index",
+ [np.ones(10, dtype=bool), np.arange(10)],
+ ids=["boolean-arr-index", "integer-arr-index"])
+ def test_nonarray_assignment(self, index):
# See also Issue gh-2870, test for non-array assignment
# and equivalent unsafe casted array assignment
a = np.arange(10)
- b = np.ones(10, dtype=bool)
- r = np.arange(10)
- def assign(a, b, c):
- a[b] = c
+ with pytest.raises(ValueError):
+ a[index] = np.nan
- assert_raises(ValueError, assign, a, b, np.nan)
- a[b] = np.array(np.nan) # but not this.
- assert_raises(ValueError, assign, a, r, np.nan)
- a[r] = np.array(np.nan)
+ with np.errstate(invalid="warn"):
+ with pytest.warns(RuntimeWarning, match="invalid value"):
+ a[index] = np.array(np.nan) # Only warns
def test_unpickle_dtype_with_object(self):
# Implemented in r2840
@@ -1496,7 +1496,7 @@ class TestRegression:
min = np.array([np.iinfo(t).min])
min //= -1
- with np.errstate(divide="ignore"):
+ with np.errstate(over="ignore"):
for t in (np.int8, np.int16, np.int32, np.int64, int):
test_type(t)
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index b7fe5183e..8b14284ff 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -683,8 +683,12 @@ class TestNegative:
sup.filter(RuntimeWarning)
for dt in types:
a = np.ones((), dtype=dt)[()]
- assert_equal(operator.neg(a) + a, 0)
-
+ if dt in np.typecodes['UnsignedInteger']:
+ st = np.dtype(dt).type
+ max = st(np.iinfo(dt).max)
+ assert_equal(operator.neg(a), max)
+ else:
+ assert_equal(operator.neg(a) + a, 0)
class TestSubtract:
def test_exceptions(self):
@@ -896,9 +900,13 @@ def test_scalar_integer_operation_overflow(dtype, operation):
@pytest.mark.parametrize("dtype", np.typecodes["Integer"])
@pytest.mark.parametrize("operation", [
+ lambda min, neg_1: -min,
lambda min, neg_1: abs(min),
- lambda min, neg_1: min * neg_1,
- lambda min, neg_1: min // neg_1], ids=["abs", "*", "//"])
+ pytest.param(lambda min, neg_1: min * neg_1,
+ marks=pytest.mark.xfail(reason="broken on some platforms")),
+ pytest.param(lambda min, neg_1: min // neg_1,
+ marks=pytest.mark.skip(reason="broken on some platforms"))],
+ ids=["neg", "abs", "*", "//"])
def test_scalar_signed_integer_overflow(dtype, operation):
# The minimum signed integer can "overflow" for some additional operations
st = np.dtype(dtype).type
@@ -910,8 +918,7 @@ def test_scalar_signed_integer_overflow(dtype, operation):
@pytest.mark.parametrize("dtype", np.typecodes["UnsignedInteger"])
-@pytest.mark.xfail # TODO: the check is quite simply missing!
-def test_scalar_signed_integer_overflow(dtype):
+def test_scalar_unsigned_integer_overflow(dtype):
val = np.dtype(dtype).type(8)
with pytest.warns(RuntimeWarning, match="overflow encountered"):
-val
diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py
index 324948cf2..c4488533a 100644
--- a/numpy/core/tests/test_simd.py
+++ b/numpy/core/tests/test_simd.py
@@ -85,16 +85,13 @@ class _Test_Utility:
return getattr(self.npyv, cvt_intrin.format(sfx[1:], sfx))(vector)
def _pinfinity(self):
- v = self.npyv.setall_u32(0x7f800000)
- return self.npyv.reinterpret_f32_u32(v)[0]
+ return float("inf")
def _ninfinity(self):
- v = self.npyv.setall_u32(0xff800000)
- return self.npyv.reinterpret_f32_u32(v)[0]
+ return -float("inf")
def _nan(self):
- v = self.npyv.setall_u32(0x7fc00000)
- return self.npyv.reinterpret_f32_u32(v)[0]
+ return float("nan")
def _cpu_features(self):
target = self.target_name
@@ -170,8 +167,9 @@ class _SIMD_BOOL(_Test_Utility):
for data in (self._data(), self._data(reverse=True)):
vdata = self._load_b(data)
data_bits = data2bits(data)
- tobits = bin(self.tobits(vdata))
- assert tobits == bin(data_bits)
+ tobits = self.tobits(vdata)
+ bin_tobits = bin(tobits)
+ assert bin_tobits == bin(data_bits)
def test_pack(self):
"""
@@ -746,9 +744,11 @@ class _SIMD_ALL(_Test_Utility):
# We're testing the sanity of _simd's type-vector,
# reinterpret* intrinsics itself are tested via compiler
# during the build of _simd module
- sfxes = ["u8", "s8", "u16", "s16", "u32", "s32", "u64", "s64", "f32"]
+ sfxes = ["u8", "s8", "u16", "s16", "u32", "s32", "u64", "s64"]
if self.npyv.simd_f64:
sfxes.append("f64")
+ if self.npyv.simd_f32:
+ sfxes.append("f32")
for sfx in sfxes:
vec_name = getattr(self, "reinterpret_" + sfx)(vdata_a).__name__
assert vec_name == "npyv_" + sfx
@@ -1077,8 +1077,13 @@ for target_name, npyv in targets.items():
skip = f"target '{pretty_name}' isn't supported by current machine"
elif not npyv.simd:
skip = f"target '{pretty_name}' isn't supported by NPYV"
- elif not npyv.simd_f64:
- skip_sfx["f64"] = f"target '{pretty_name}' doesn't support double-precision"
+ else:
+ if not npyv.simd_f32:
+ skip_sfx["f32"] = f"target '{pretty_name}' "\
+ "doesn't support single-precision"
+ if not npyv.simd_f64:
+ skip_sfx["f64"] = f"target '{pretty_name}' doesn't"\
+ "support double-precision"
for sfxes, cls in tests_registry.items():
for sfx in sfxes:
diff --git a/numpy/core/tests/test_simd_module.py b/numpy/core/tests/test_simd_module.py
index 3d710884a..44dc58dac 100644
--- a/numpy/core/tests/test_simd_module.py
+++ b/numpy/core/tests/test_simd_module.py
@@ -12,7 +12,9 @@ npyv, npyv2 = (npyvs + [None, None])[:2]
unsigned_sfx = ["u8", "u16", "u32", "u64"]
signed_sfx = ["s8", "s16", "s32", "s64"]
-fp_sfx = ["f32"]
+fp_sfx = []
+if npyv and npyv.simd_f32:
+ fp_sfx.append("f32")
if npyv and npyv.simd_f64:
fp_sfx.append("f64")
diff --git a/numpy/core/tests/test_strings.py b/numpy/core/tests/test_strings.py
new file mode 100644
index 000000000..2b87ed654
--- /dev/null
+++ b/numpy/core/tests/test_strings.py
@@ -0,0 +1,85 @@
+import pytest
+
+import operator
+import numpy as np
+
+from numpy.testing import assert_array_equal
+
+
+COMPARISONS = [
+ (operator.eq, np.equal, "=="),
+ (operator.ne, np.not_equal, "!="),
+ (operator.lt, np.less, "<"),
+ (operator.le, np.less_equal, "<="),
+ (operator.gt, np.greater, ">"),
+ (operator.ge, np.greater_equal, ">="),
+]
+
+
+@pytest.mark.parametrize(["op", "ufunc", "sym"], COMPARISONS)
+def test_mixed_string_comparison_ufuncs_fail(op, ufunc, sym):
+ arr_string = np.array(["a", "b"], dtype="S")
+ arr_unicode = np.array(["a", "c"], dtype="U")
+
+ with pytest.raises(TypeError, match="did not contain a loop"):
+ ufunc(arr_string, arr_unicode)
+
+ with pytest.raises(TypeError, match="did not contain a loop"):
+ ufunc(arr_unicode, arr_string)
+
+@pytest.mark.parametrize(["op", "ufunc", "sym"], COMPARISONS)
+def test_mixed_string_comparisons_ufuncs_with_cast(op, ufunc, sym):
+ arr_string = np.array(["a", "b"], dtype="S")
+ arr_unicode = np.array(["a", "c"], dtype="U")
+
+ # While there is no loop, manual casting is acceptable:
+ res1 = ufunc(arr_string, arr_unicode, signature="UU->?", casting="unsafe")
+ res2 = ufunc(arr_string, arr_unicode, signature="SS->?", casting="unsafe")
+
+ expected = op(arr_string.astype('U'), arr_unicode)
+ assert_array_equal(res1, expected)
+ assert_array_equal(res2, expected)
+
+
+@pytest.mark.parametrize(["op", "ufunc", "sym"], COMPARISONS)
+@pytest.mark.parametrize("dtypes", [
+ ("S2", "S2"), ("S2", "S10"),
+ ("<U1", "<U1"), ("<U1", ">U1"), (">U1", ">U1"),
+ ("<U1", "<U10"), ("<U1", ">U10")])
+@pytest.mark.parametrize("aligned", [True, False])
+def test_string_comparisons(op, ufunc, sym, dtypes, aligned):
+ # ensure native byte-order for the first view to stay within unicode range
+ native_dt = np.dtype(dtypes[0]).newbyteorder("=")
+ arr = np.arange(2**15).view(native_dt).astype(dtypes[0])
+ if not aligned:
+ # Make `arr` unaligned:
+ new = np.zeros(arr.nbytes + 1, dtype=np.uint8)[1:].view(dtypes[0])
+ new[...] = arr
+ arr = new
+
+ arr2 = arr.astype(dtypes[1], copy=True)
+ np.random.shuffle(arr2)
+ arr[0] = arr2[0] # make sure one matches
+
+ expected = [op(d1, d2) for d1, d2 in zip(arr.tolist(), arr2.tolist())]
+ assert_array_equal(op(arr, arr2), expected)
+ assert_array_equal(ufunc(arr, arr2), expected)
+ assert_array_equal(np.compare_chararrays(arr, arr2, sym, False), expected)
+
+ expected = [op(d2, d1) for d1, d2 in zip(arr.tolist(), arr2.tolist())]
+ assert_array_equal(op(arr2, arr), expected)
+ assert_array_equal(ufunc(arr2, arr), expected)
+ assert_array_equal(np.compare_chararrays(arr2, arr, sym, False), expected)
+
+
+@pytest.mark.parametrize(["op", "ufunc", "sym"], COMPARISONS)
+@pytest.mark.parametrize("dtypes", [
+ ("S2", "S2"), ("S2", "S10"), ("<U1", "<U1"), ("<U1", ">U10")])
+def test_string_comparisons_empty(op, ufunc, sym, dtypes):
+ arr = np.empty((1, 0, 1, 5), dtype=dtypes[0])
+ arr2 = np.empty((100, 1, 0, 1), dtype=dtypes[1])
+
+ expected = np.empty(np.broadcast_shapes(arr.shape, arr2.shape), dtype=bool)
+ assert_array_equal(op(arr, arr2), expected)
+ assert_array_equal(ufunc(arr, arr2), expected)
+ assert_array_equal(np.compare_chararrays(arr, arr2, sym, False), expected)
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 852044d32..3466178a3 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -620,8 +620,9 @@ class TestUfunc:
atol = max(np.finfo(dtout).tiny, 3e-308)
else:
atol = 3e-308
- # Some test values result in invalid for float16.
- with np.errstate(invalid='ignore'):
+ # Some test values result in invalid for float16
+ # and the cast to it may overflow to inf.
+ with np.errstate(invalid='ignore', over='ignore'):
res = np.true_divide(x, y, dtype=dtout)
if not np.isfinite(res) and tcout == 'e':
continue
@@ -665,20 +666,22 @@ class TestUfunc:
for dt in (int, np.float16, np.float32, np.float64, np.longdouble):
for v in (0, 1, 2, 7, 8, 9, 15, 16, 19, 127,
128, 1024, 1235):
- tgt = dt(v * (v + 1) / 2)
- d = np.arange(1, v + 1, dtype=dt)
-
# warning if sum overflows, which it does in float16
- overflow = not np.isfinite(tgt)
-
with warnings.catch_warnings(record=True) as w:
- warnings.simplefilter("always")
- assert_almost_equal(np.sum(d), tgt)
+ warnings.simplefilter("always", RuntimeWarning)
+
+ tgt = dt(v * (v + 1) / 2)
+ overflow = not np.isfinite(tgt)
assert_equal(len(w), 1 * overflow)
- assert_almost_equal(np.sum(d[::-1]), tgt)
+ d = np.arange(1, v + 1, dtype=dt)
+
+ assert_almost_equal(np.sum(d), tgt)
assert_equal(len(w), 2 * overflow)
+ assert_almost_equal(np.sum(d[::-1]), tgt)
+ assert_equal(len(w), 3 * overflow)
+
d = np.ones(500, dtype=dt)
assert_almost_equal(np.sum(d[::2]), 250.)
assert_almost_equal(np.sum(d[1::2]), 250.)
@@ -2454,7 +2457,7 @@ def test_ufunc_warn_with_nan(ufunc):
@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
-def test_ufunc_casterrors():
+def test_ufunc_out_casterrors():
# Tests that casting errors are correctly reported and buffers are
# cleared.
# The following array can be added to itself as an object array, but
@@ -2485,6 +2488,28 @@ def test_ufunc_casterrors():
assert out[-1] == 1
+@pytest.mark.parametrize("bad_offset", [0, int(np.BUFSIZE * 1.5)])
+def test_ufunc_input_casterrors(bad_offset):
+ value = 123
+ arr = np.array([value] * bad_offset +
+ ["string"] +
+ [value] * int(1.5 * np.BUFSIZE), dtype=object)
+ with pytest.raises(ValueError):
+ # Force cast inputs, but the buffered cast of `arr` to intp fails:
+ np.add(arr, arr, dtype=np.intp, casting="unsafe")
+
+
+@pytest.mark.parametrize("bad_offset", [0, int(np.BUFSIZE * 1.5)])
+def test_ufunc_input_floatingpoint_error(bad_offset):
+ value = 123
+ arr = np.array([value] * bad_offset +
+ [np.nan] +
+ [value] * int(1.5 * np.BUFSIZE))
+ with np.errstate(invalid="raise"), pytest.raises(FloatingPointError):
+ # Force cast inputs, but the buffered cast of `arr` to intp fails:
+ np.add(arr, arr, dtype=np.intp, casting="unsafe")
+
+
def test_trivial_loop_invalid_cast():
# This tests the fast-path "invalid cast", see gh-19904.
with pytest.raises(TypeError,
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 7b6e2ee92..a696fceb8 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -327,7 +327,9 @@ class TestDivision:
a_lst, b_lst = a.tolist(), b.tolist()
c_div = lambda n, d: (
- 0 if d == 0 or (n and n == fo.min and d == -1) else n//d
+ 0 if d == 0 else (
+ fo.min if (n and n == fo.min and d == -1) else n//d
+ )
)
with np.errstate(divide='ignore'):
ac = a.copy()
@@ -342,7 +344,7 @@ class TestDivision:
for divisor in divisors:
ac = a.copy()
- with np.errstate(divide='ignore'):
+ with np.errstate(divide='ignore', over='ignore'):
div_a = a // divisor
ac //= divisor
div_lst = [c_div(i, divisor) for i in a_lst]
@@ -350,21 +352,25 @@ class TestDivision:
assert all(div_a == div_lst), msg
assert all(ac == div_lst), msg_eq
- with np.errstate(divide='raise'):
- if 0 in b or (fo.min and -1 in b and fo.min in a):
+ with np.errstate(divide='raise', over='raise'):
+ if 0 in b:
# Verify overflow case
- with pytest.raises(FloatingPointError):
+ with pytest.raises(FloatingPointError,
+ match="divide by zero encountered in floor_divide"):
a // b
else:
a // b
if fo.min and fo.min in a:
- with pytest.raises(FloatingPointError):
+ with pytest.raises(FloatingPointError,
+ match='overflow encountered in floor_divide'):
a // -1
elif fo.min:
a // -1
- with pytest.raises(FloatingPointError):
+ with pytest.raises(FloatingPointError,
+ match="divide by zero encountered in floor_divide"):
a // 0
- with pytest.raises(FloatingPointError):
+ with pytest.raises(FloatingPointError,
+ match="divide by zero encountered in floor_divide"):
ac = a.copy()
ac //= 0
@@ -392,11 +398,13 @@ class TestDivision:
msg = "Reduce floor integer division check"
assert div_a == div_lst, msg
- with np.errstate(divide='raise'):
- with pytest.raises(FloatingPointError):
+ with np.errstate(divide='raise', over='raise'):
+ with pytest.raises(FloatingPointError,
+ match="divide by zero encountered in reduce"):
np.floor_divide.reduce(np.arange(-100, 100, dtype=dtype))
if fo.min:
- with pytest.raises(FloatingPointError):
+ with pytest.raises(FloatingPointError,
+ match='overflow encountered in reduce'):
np.floor_divide.reduce(
np.array([fo.min, 1, -1], dtype=dtype)
)
diff --git a/numpy/core/tests/test_unicode.py b/numpy/core/tests/test_unicode.py
index 8e0dd47cb..12de25771 100644
--- a/numpy/core/tests/test_unicode.py
+++ b/numpy/core/tests/test_unicode.py
@@ -1,3 +1,5 @@
+import pytest
+
import numpy as np
from numpy.testing import assert_, assert_equal, assert_array_equal
@@ -33,8 +35,11 @@ def test_string_cast():
uni_arr1 = str_arr.astype('>U')
uni_arr2 = str_arr.astype('<U')
- assert_(str_arr != uni_arr1)
- assert_(str_arr != uni_arr2)
+ with pytest.warns(FutureWarning):
+ assert str_arr != uni_arr1
+ with pytest.warns(FutureWarning):
+ assert str_arr != uni_arr2
+
assert_array_equal(uni_arr1, uni_arr2)
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py
index befc83c16..2019dcb25 100644
--- a/numpy/distutils/ccompiler_opt.py
+++ b/numpy/distutils/ccompiler_opt.py
@@ -955,51 +955,57 @@ class _CCompiler:
def __init__(self):
if hasattr(self, "cc_is_cached"):
return
- # attr regex
+ # attr regex compiler-expression
detect_arch = (
- ("cc_on_x64", ".*(x|x86_|amd)64.*"),
- ("cc_on_x86", ".*(win32|x86|i386|i686).*"),
- ("cc_on_ppc64le", ".*(powerpc|ppc)64(el|le).*"),
- ("cc_on_ppc64", ".*(powerpc|ppc)64.*"),
- ("cc_on_aarch64", ".*(aarch64|arm64).*"),
- ("cc_on_armhf", ".*arm.*"),
- ("cc_on_s390x", ".*s390x.*"),
+ ("cc_on_x64", ".*(x|x86_|amd)64.*", ""),
+ ("cc_on_x86", ".*(win32|x86|i386|i686).*", ""),
+ ("cc_on_ppc64le", ".*(powerpc|ppc)64(el|le).*", ""),
+ ("cc_on_ppc64", ".*(powerpc|ppc)64.*", ""),
+ ("cc_on_aarch64", ".*(aarch64|arm64).*", ""),
+ ("cc_on_armhf", ".*arm.*", "defined(__ARM_ARCH_7__) || "
+ "defined(__ARM_ARCH_7A__)"),
+ ("cc_on_s390x", ".*s390x.*", ""),
# undefined platform
- ("cc_on_noarch", ""),
+ ("cc_on_noarch", "", ""),
)
detect_compiler = (
- ("cc_is_gcc", r".*(gcc|gnu\-g).*"),
- ("cc_is_clang", ".*clang.*"),
- ("cc_is_iccw", ".*(intelw|intelemw|iccw).*"), # intel msvc like
- ("cc_is_icc", ".*(intel|icc).*"), # intel unix like
- ("cc_is_msvc", ".*msvc.*"),
+ ("cc_is_gcc", r".*(gcc|gnu\-g).*", ""),
+ ("cc_is_clang", ".*clang.*", ""),
+ # intel msvc like
+ ("cc_is_iccw", ".*(intelw|intelemw|iccw).*", ""),
+ ("cc_is_icc", ".*(intel|icc).*", ""), # intel unix like
+ ("cc_is_msvc", ".*msvc.*", ""),
# undefined compiler will be treat it as gcc
- ("cc_is_nocc", ""),
+ ("cc_is_nocc", "", ""),
)
detect_args = (
- ("cc_has_debug", ".*(O0|Od|ggdb|coverage|debug:full).*"),
- ("cc_has_native", ".*(-march=native|-xHost|/QxHost).*"),
+ ("cc_has_debug", ".*(O0|Od|ggdb|coverage|debug:full).*", ""),
+ ("cc_has_native", ".*(-march=native|-xHost|/QxHost).*", ""),
# in case if the class run with -DNPY_DISABLE_OPTIMIZATION
- ("cc_noopt", ".*DISABLE_OPT.*"),
+ ("cc_noopt", ".*DISABLE_OPT.*", ""),
)
dist_info = self.dist_info()
platform, compiler_info, extra_args = dist_info
# set False to all attrs
for section in (detect_arch, detect_compiler, detect_args):
- for attr, rgex in section:
+ for attr, rgex, cexpr in section:
setattr(self, attr, False)
for detect, searchin in ((detect_arch, platform), (detect_compiler, compiler_info)):
- for attr, rgex in detect:
+ for attr, rgex, cexpr in detect:
if rgex and not re.match(rgex, searchin, re.IGNORECASE):
continue
+ if cexpr and not self.cc_test_cexpr(cexpr):
+ continue
setattr(self, attr, True)
break
- for attr, rgex in detect_args:
+ for attr, rgex, cexpr in detect_args:
if rgex and not re.match(rgex, extra_args, re.IGNORECASE):
continue
+ if cexpr and not self.cc_test_cexpr(cexpr):
+ continue
setattr(self, attr, True)
if self.cc_on_noarch:
@@ -1071,6 +1077,25 @@ class _CCompiler:
self.dist_log("testing failed", stderr=True)
return test
+ @_Cache.me
+ def cc_test_cexpr(self, cexpr, flags=[]):
+ """
+ Same as the above but supports compile-time expressions.
+ """
+ self.dist_log("testing compiler expression", cexpr)
+ test_path = os.path.join(self.conf_tmp_path, "npy_dist_test_cexpr.c")
+ with open(test_path, "w") as fd:
+ fd.write(textwrap.dedent(f"""\
+ #if !({cexpr})
+ #error "unsupported expression"
+ #endif
+ int dummy;
+ """))
+ test = self.dist_test(test_path, flags)
+ if not test:
+ self.dist_log("testing failed", stderr=True)
+ return test
+
def cc_normalize_flags(self, flags):
"""
Remove the conflicts that caused due gathering implied features flags.
diff --git a/numpy/distutils/checks/cpu_asimd.c b/numpy/distutils/checks/cpu_asimd.c
index 8df556b6c..6bc9022a5 100644
--- a/numpy/distutils/checks/cpu_asimd.c
+++ b/numpy/distutils/checks/cpu_asimd.c
@@ -3,9 +3,10 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f);
+ float *src = (float*)argv[argc-1];
+ float32x4_t v1 = vdupq_n_f32(src[0]), v2 = vdupq_n_f32(src[1]);
/* MAXMIN */
int ret = (int)vgetq_lane_f32(vmaxnmq_f32(v1, v2), 0);
ret += (int)vgetq_lane_f32(vminnmq_f32(v1, v2), 0);
@@ -13,7 +14,8 @@ int main(void)
ret += (int)vgetq_lane_f32(vrndq_f32(v1), 0);
#ifdef __aarch64__
{
- float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0);
+ double *src2 = (double*)argv[argc-1];
+ float64x2_t vd1 = vdupq_n_f64(src2[0]), vd2 = vdupq_n_f64(src2[1]);
/* MAXMIN */
ret += (int)vgetq_lane_f64(vmaxnmq_f64(vd1, vd2), 0);
ret += (int)vgetq_lane_f64(vminnmq_f64(vd1, vd2), 0);
diff --git a/numpy/distutils/checks/cpu_asimddp.c b/numpy/distutils/checks/cpu_asimddp.c
index 0158d1354..e7068ce02 100644
--- a/numpy/distutils/checks/cpu_asimddp.c
+++ b/numpy/distutils/checks/cpu_asimddp.c
@@ -3,9 +3,10 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- uint8x16_t v1 = vdupq_n_u8((unsigned char)1), v2 = vdupq_n_u8((unsigned char)2);
+ unsigned char *src = (unsigned char*)argv[argc-1];
+ uint8x16_t v1 = vdupq_n_u8(src[0]), v2 = vdupq_n_u8(src[1]);
uint32x4_t va = vdupq_n_u32(3);
int ret = (int)vgetq_lane_u32(vdotq_u32(va, v1, v2), 0);
#ifdef __aarch64__
diff --git a/numpy/distutils/checks/cpu_asimdfhm.c b/numpy/distutils/checks/cpu_asimdfhm.c
index cb49751c4..54e328098 100644
--- a/numpy/distutils/checks/cpu_asimdfhm.c
+++ b/numpy/distutils/checks/cpu_asimdfhm.c
@@ -3,12 +3,14 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- float16x8_t vhp = vdupq_n_f16((float16_t)1);
- float16x4_t vlhp = vdup_n_f16((float16_t)1);
- float32x4_t vf = vdupq_n_f32(1.0f);
- float32x2_t vlf = vdup_n_f32(1.0f);
+ float16_t *src = (float16_t*)argv[argc-1];
+ float *src2 = (float*)argv[argc-2];
+ float16x8_t vhp = vdupq_n_f16(src[0]);
+ float16x4_t vlhp = vdup_n_f16(src[1]);
+ float32x4_t vf = vdupq_n_f32(src2[0]);
+ float32x2_t vlf = vdup_n_f32(src2[1]);
int ret = (int)vget_lane_f32(vfmlal_low_f16(vlf, vlhp, vlhp), 0);
ret += (int)vgetq_lane_f32(vfmlslq_high_f16(vf, vhp, vhp), 0);
diff --git a/numpy/distutils/checks/cpu_asimdhp.c b/numpy/distutils/checks/cpu_asimdhp.c
index 80b94000f..e2de0306e 100644
--- a/numpy/distutils/checks/cpu_asimdhp.c
+++ b/numpy/distutils/checks/cpu_asimdhp.c
@@ -3,10 +3,11 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- float16x8_t vhp = vdupq_n_f16((float16_t)-1);
- float16x4_t vlhp = vdup_n_f16((float16_t)-1);
+ float16_t *src = (float16_t*)argv[argc-1];
+ float16x8_t vhp = vdupq_n_f16(src[0]);
+ float16x4_t vlhp = vdup_n_f16(src[1]);
int ret = (int)vgetq_lane_f16(vabdq_f16(vhp, vhp), 0);
ret += (int)vget_lane_f16(vabd_f16(vlhp, vlhp), 0);
diff --git a/numpy/distutils/checks/cpu_neon.c b/numpy/distutils/checks/cpu_neon.c
index 4eab1f384..8c64f864d 100644
--- a/numpy/distutils/checks/cpu_neon.c
+++ b/numpy/distutils/checks/cpu_neon.c
@@ -3,12 +3,16 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f);
+ // passing from untraced pointers to avoid optimizing out any constants
+ // so we can test against the linker.
+ float *src = (float*)argv[argc-1];
+ float32x4_t v1 = vdupq_n_f32(src[0]), v2 = vdupq_n_f32(src[1]);
int ret = (int)vgetq_lane_f32(vmulq_f32(v1, v2), 0);
#ifdef __aarch64__
- float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0);
+ double *src2 = (double*)argv[argc-2];
+ float64x2_t vd1 = vdupq_n_f64(src2[0]), vd2 = vdupq_n_f64(src2[1]);
ret += (int)vgetq_lane_f64(vmulq_f64(vd1, vd2), 0);
#endif
return ret;
diff --git a/numpy/distutils/checks/cpu_neon_fp16.c b/numpy/distutils/checks/cpu_neon_fp16.c
index 745d2e793..f3b949770 100644
--- a/numpy/distutils/checks/cpu_neon_fp16.c
+++ b/numpy/distutils/checks/cpu_neon_fp16.c
@@ -3,9 +3,9 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- short z4[] = {0, 0, 0, 0, 0, 0, 0, 0};
- float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16((const short*)z4));
+ short *src = (short*)argv[argc-1];
+ float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16(src));
return (int)vgetq_lane_f32(v_z4, 0);
}
diff --git a/numpy/distutils/checks/cpu_neon_vfpv4.c b/numpy/distutils/checks/cpu_neon_vfpv4.c
index 45f7b5d69..a039159dd 100644
--- a/numpy/distutils/checks/cpu_neon_vfpv4.c
+++ b/numpy/distutils/checks/cpu_neon_vfpv4.c
@@ -3,16 +3,18 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- float32x4_t v1 = vdupq_n_f32(1.0f);
- float32x4_t v2 = vdupq_n_f32(2.0f);
- float32x4_t v3 = vdupq_n_f32(3.0f);
+ float *src = (float*)argv[argc-1];
+ float32x4_t v1 = vdupq_n_f32(src[0]);
+ float32x4_t v2 = vdupq_n_f32(src[1]);
+ float32x4_t v3 = vdupq_n_f32(src[2]);
int ret = (int)vgetq_lane_f32(vfmaq_f32(v1, v2, v3), 0);
#ifdef __aarch64__
- float64x2_t vd1 = vdupq_n_f64(1.0);
- float64x2_t vd2 = vdupq_n_f64(2.0);
- float64x2_t vd3 = vdupq_n_f64(3.0);
+ double *src2 = (double*)argv[argc-2];
+ float64x2_t vd1 = vdupq_n_f64(src2[0]);
+ float64x2_t vd2 = vdupq_n_f64(src2[1]);
+ float64x2_t vd3 = vdupq_n_f64(src2[2]);
ret += (int)vgetq_lane_f64(vfmaq_f64(vd1, vd2, vd3), 0);
#endif
return ret;
diff --git a/numpy/distutils/misc_util.py b/numpy/distutils/misc_util.py
index 78665d351..b3916a2c8 100644
--- a/numpy/distutils/misc_util.py
+++ b/numpy/distutils/misc_util.py
@@ -358,7 +358,7 @@ if terminal_has_colors():
fgcode = 30 + _colour_codes.get(fg.lower(), 0)
seq.append(str(fgcode))
if bg:
- bgcode = 40 + _colour_codes.get(fg.lower(), 7)
+ bgcode = 40 + _colour_codes.get(bg.lower(), 7)
seq.append(str(bgcode))
if seq:
return '\x1b[%sm%s\x1b[0m' % (';'.join(seq), s)
diff --git a/numpy/f2py/capi_maps.py b/numpy/f2py/capi_maps.py
index e5dc2331a..f07066a09 100644
--- a/numpy/f2py/capi_maps.py
+++ b/numpy/f2py/capi_maps.py
@@ -176,6 +176,7 @@ f2cmap_all = {'real': {'': 'float', '4': 'float', '8': 'double',
f2cmap_default = copy.deepcopy(f2cmap_all)
+f2cmap_mapped = []
def load_f2cmap_file(f2cmap_file):
global f2cmap_all
@@ -212,6 +213,7 @@ def load_f2cmap_file(f2cmap_file):
f2cmap_all[k][k1] = d[k][k1]
outmess('\tMapping "%s(kind=%s)" to "%s"\n' %
(k, k1, d[k][k1]))
+ f2cmap_mapped.append(d[k][k1])
else:
errmess("\tIgnoring map {'%s':{'%s':'%s'}}: '%s' must be in %s\n" % (
k, k1, d[k][k1], d[k][k1], list(c2py_map.keys())))
diff --git a/numpy/f2py/rules.py b/numpy/f2py/rules.py
index c9c3b2383..63c48a878 100755
--- a/numpy/f2py/rules.py
+++ b/numpy/f2py/rules.py
@@ -1323,6 +1323,9 @@ def buildmodule(m, um):
rd = dictappend(rd, ar)
needs = cfuncs.get_needs()
+ # Add mapped definitions
+ needs['typedefs'] += [cvar for cvar in capi_maps.f2cmap_mapped #
+ if cvar in typedef_need_dict.values()]
code = {}
for n in needs.keys():
code[n] = []
diff --git a/numpy/f2py/src/fortranobject.h b/numpy/f2py/src/fortranobject.h
index 376b83dad..abd699c2f 100644
--- a/numpy/f2py/src/fortranobject.h
+++ b/numpy/f2py/src/fortranobject.h
@@ -6,7 +6,9 @@ extern "C" {
#include <Python.h>
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#ifndef NPY_NO_DEPRECATED_API
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#endif
#ifdef FORTRANOBJECT_C
#define NO_IMPORT_ARRAY
#endif
diff --git a/numpy/f2py/tests/src/f2cmap/.f2py_f2cmap b/numpy/f2py/tests/src/f2cmap/.f2py_f2cmap
new file mode 100644
index 000000000..a4425f887
--- /dev/null
+++ b/numpy/f2py/tests/src/f2cmap/.f2py_f2cmap
@@ -0,0 +1 @@
+dict(real=dict(real32='float', real64='double'), integer=dict(int64='long_long'))
diff --git a/numpy/f2py/tests/src/f2cmap/isoFortranEnvMap.f90 b/numpy/f2py/tests/src/f2cmap/isoFortranEnvMap.f90
new file mode 100644
index 000000000..3f0e12c76
--- /dev/null
+++ b/numpy/f2py/tests/src/f2cmap/isoFortranEnvMap.f90
@@ -0,0 +1,9 @@
+ subroutine func1(n, x, res)
+ use, intrinsic :: iso_fortran_env, only: int64, real64
+ implicit none
+ integer(int64), intent(in) :: n
+ real(real64), intent(in) :: x(n)
+ real(real64), intent(out) :: res
+Cf2py intent(hide) :: n
+ res = sum(x)
+ end
diff --git a/numpy/f2py/tests/test_f2cmap.py b/numpy/f2py/tests/test_f2cmap.py
new file mode 100644
index 000000000..d2967e4f7
--- /dev/null
+++ b/numpy/f2py/tests/test_f2cmap.py
@@ -0,0 +1,15 @@
+from . import util
+import numpy as np
+
+class TestF2Cmap(util.F2PyTest):
+ sources = [
+ util.getpath("tests", "src", "f2cmap", "isoFortranEnvMap.f90"),
+ util.getpath("tests", "src", "f2cmap", ".f2py_f2cmap")
+ ]
+
+ # gh-15095
+ def test_long_long_map(self):
+ inp = np.ones(3)
+ out = self.module.func1(inp)
+ exp_out = 3
+ assert out == exp_out
diff --git a/numpy/lib/tests/test_loadtxt.py b/numpy/lib/tests/test_loadtxt.py
index 8839ef0a8..0b8fe3c47 100644
--- a/numpy/lib/tests/test_loadtxt.py
+++ b/numpy/lib/tests/test_loadtxt.py
@@ -5,6 +5,7 @@ These tests complement those found in `test_io.py`.
"""
import sys
+import os
import pytest
from tempfile import NamedTemporaryFile, mkstemp
from io import StringIO
@@ -252,7 +253,7 @@ def test_ragged_usecols():
txt = StringIO("0,0,XXX\n0\n0,XXX,XXX,0,XXX\n")
with pytest.raises(ValueError,
- match="invalid column index -2 at row 1 with 2 columns"):
+ match="invalid column index -2 at row 2 with 1 columns"):
# There is no -2 column in the second row:
np.loadtxt(txt, dtype=float, delimiter=",", usecols=[0, -2])
@@ -960,9 +961,11 @@ def test_parametric_unit_discovery(
# file-obj path
fd, fname = mkstemp()
+ os.close(fd)
with open(fname, "w") as fh:
fh.write("\n".join(data))
a = np.loadtxt(fname, dtype=unitless_dtype)
+ os.remove(fname)
assert a.dtype == expected.dtype
assert_equal(a, expected)
@@ -982,9 +985,11 @@ def test_str_dtype_unit_discovery_with_converter():
# file-obj path
fd, fname = mkstemp()
+ os.close(fd)
with open(fname, "w") as fh:
fh.write("\n".join(data))
a = np.loadtxt(fname, dtype="U", converters=conv, encoding=None)
+ os.remove(fname)
assert a.dtype == expected.dtype
assert_equal(a, expected)
diff --git a/numpy/linalg/lapack_lite/f2c.c b/numpy/linalg/lapack_lite/f2c.c
index 9a1e9cec1..f1d3fdfbe 100644
--- a/numpy/linalg/lapack_lite/f2c.c
+++ b/numpy/linalg/lapack_lite/f2c.c
@@ -14,9 +14,9 @@
#include "f2c.h"
-extern void s_wsfe(cilist *f) {;}
-extern void e_wsfe(void) {;}
-extern void do_fio(integer *c, char *s, ftnlen l) {;}
+extern int s_wsfe(cilist *f) {return 0;}
+extern int e_wsfe(void) {return 0;}
+extern int do_fio(integer *c, char *s, ftnlen l) {return 0;}
/* You'll want this if you redo the f2c_*.c files with the -C option
* to f2c for checking array subscripts. (It's not suggested you do that
@@ -377,7 +377,7 @@ p->i = p1.i;
#endif /* NO_OVERWRITE */
- VOID
+ int
#ifdef KR_headers
s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnlen rnp[], *np, ll;
#else
@@ -485,9 +485,9 @@ return(0);
/* assign strings: a = b */
#ifdef KR_headers
-VOID s_copy(a, b, la, lb) register char *a, *b; ftnlen la, lb;
+int s_copy(a, b, la, lb) register char *a, *b; ftnlen la, lb;
#else
-void s_copy(register char *a, register char *b, ftnlen la, ftnlen lb)
+int s_copy(register char *a, register char *b, ftnlen la, ftnlen lb)
#endif
{
register char *aend, *bend;
@@ -524,6 +524,7 @@ void s_copy(register char *a, register char *b, ftnlen la, ftnlen lb)
while(a < aend)
*a++ = ' ';
}
+ return 0;
}
diff --git a/numpy/linalg/lapack_lite/f2c.h b/numpy/linalg/lapack_lite/f2c.h
index d3fbfc177..b44aaac44 100644
--- a/numpy/linalg/lapack_lite/f2c.h
+++ b/numpy/linalg/lapack_lite/f2c.h
@@ -263,7 +263,7 @@ extern double d_tan(double *);
extern double d_tanh(double *);
extern double derf_(double *);
extern double derfc_(double *);
-extern void do_fio(ftnint *, char *, ftnlen);
+extern int do_fio(ftnint *, char *, ftnlen);
extern integer do_lio(ftnint *, ftnint *, char *, ftnlen);
extern integer do_uio(ftnint *, char *, ftnlen);
extern integer e_rdfe(void);
@@ -275,7 +275,7 @@ extern integer e_rsli(void);
extern integer e_rsue(void);
extern integer e_wdfe(void);
extern integer e_wdue(void);
-extern void e_wsfe(void);
+extern int e_wsfe(void);
extern integer e_wsfi(void);
extern integer e_wsle(void);
extern integer e_wsli(void);
@@ -350,9 +350,9 @@ extern double r_sinh(float *);
extern double r_sqrt(float *);
extern double r_tan(float *);
extern double r_tanh(float *);
-extern void s_cat(char *, char **, integer *, integer *, ftnlen);
+extern int s_cat(char *, char **, integer *, integer *, ftnlen);
extern integer s_cmp(char *, char *, ftnlen, ftnlen);
-extern void s_copy(char *, char *, ftnlen, ftnlen);
+extern int s_copy(char *, char *, ftnlen, ftnlen);
extern int s_paus(char *, ftnlen);
extern integer s_rdfe(cilist *);
extern integer s_rdue(cilist *);
@@ -367,7 +367,7 @@ extern integer s_rsue(cilist *);
extern int s_stop(char *, ftnlen);
extern integer s_wdfe(cilist *);
extern integer s_wdue(cilist *);
-extern void s_wsfe( cilist *);
+extern int s_wsfe( cilist *);
extern integer s_wsfi(icilist *);
extern integer s_wsle(cilist *);
extern integer s_wsli(icilist *);
diff --git a/numpy/linalg/setup.py b/numpy/linalg/setup.py
index dc62dff8f..1c4e1295e 100644
--- a/numpy/linalg/setup.py
+++ b/numpy/linalg/setup.py
@@ -1,5 +1,6 @@
import os
import sys
+import sysconfig
def configuration(parent_package='', top_path=None):
from numpy.distutils.misc_util import Configuration
@@ -38,7 +39,14 @@ def configuration(parent_package='', top_path=None):
class numpy_linalg_lapack_lite(system_info):
def calc_info(self):
info = {'language': 'c'}
- if sys.maxsize > 2**32:
+ size_t_size = sysconfig.get_config_var("SIZEOF_SIZE_T")
+ if size_t_size:
+ maxsize = 2**(size_t_size - 1) - 1
+ else:
+ # We prefer using sysconfig as it allows cross-compilation
+ # but the information may be missing (e.g. on windows).
+ maxsize = sys.maxsize
+ if maxsize > 2**32:
# Build lapack-lite in 64-bit integer mode.
# The suffix is arbitrary (lapack_lite symbols follow it),
# but use the "64_" convention here.
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index 78333ed02..d8fd4f389 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -31,6 +31,7 @@ from functools import reduce
import numpy as np
import numpy.core.umath as umath
import numpy.core.numerictypes as ntypes
+from numpy.core import multiarray as mu
from numpy import ndarray, amax, amin, iscomplexobj, bool_, _NoValue
from numpy import array as narray
from numpy.lib.function_base import angle
@@ -5289,14 +5290,22 @@ class MaskedArray(ndarray):
"""
kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims}
-
if self._mask is nomask:
result = super().mean(axis=axis, dtype=dtype, **kwargs)[()]
else:
+ is_float16_result = False
+ if dtype is None:
+ if issubclass(self.dtype.type, (ntypes.integer, ntypes.bool_)):
+ dtype = mu.dtype('f8')
+ elif issubclass(self.dtype.type, ntypes.float16):
+ dtype = mu.dtype('f4')
+ is_float16_result = True
dsum = self.sum(axis=axis, dtype=dtype, **kwargs)
cnt = self.count(axis=axis, **kwargs)
if cnt.shape == () and (cnt == 0):
result = masked
+ elif is_float16_result:
+ result = self.dtype.type(dsum * 1. / cnt)
else:
result = dsum * 1. / cnt
if out is not None:
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index 0dada104d..4fac897de 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -4036,6 +4036,12 @@ class TestMaskedArrayMathMethods:
assert_equal(a.max(-1), [3, 6])
assert_equal(a.max(1), [3, 6])
+ def test_mean_overflow(self):
+ # Test overflow in masked arrays
+ # gh-20272
+ a = masked_array(np.full((10000, 10000), 65535, dtype=np.uint16),
+ mask=np.zeros((10000, 10000)))
+ assert_equal(a.mean(), 65535.0)
class TestMaskedArrayMathMethodsComplex:
# Test class for miscellaneous MaskedArrays methods.
@@ -4158,7 +4164,11 @@ class TestMaskedArrayFunctions:
# test that masked_where on a structured array sets a structured
# mask (see issue #2972)
a = np.zeros(10, dtype=[("A", "<f2"), ("B", "<f4")])
- am = np.ma.masked_where(a["A"] < 5, a)
+ with np.errstate(over="ignore"):
+ # NOTE: The float16 "uses" 1e20 as mask, which overflows to inf
+ # and warns. Unrelated to this test, but probably undesired.
+ # But NumPy previously did not warn for this overflow.
+ am = np.ma.masked_where(a["A"] < 5, a)
assert_equal(am.mask.dtype.names, am.dtype.names)
assert_equal(am["A"],
np.ma.masked_array(np.zeros(10), np.ones(10)))
@@ -4334,7 +4344,10 @@ class TestMaskedArrayFunctions:
tmp[(xm <= 2).filled(True)] = True
assert_equal(d._mask, tmp)
- ixm = xm.astype(int)
+ with np.errstate(invalid="warn"):
+ # The fill value is 1e20, it cannot be converted to `int`:
+ with pytest.warns(RuntimeWarning, match="invalid value"):
+ ixm = xm.astype(int)
d = where(ixm > 2, ixm, masked)
assert_equal(d, [-9, -9, -9, -9, -9, 4, -9, -9, 10, -9, -9, 3])
assert_equal(d.dtype, ixm.dtype)
diff --git a/numpy/polynomial/__init__.py b/numpy/polynomial/__init__.py
index 5a3addf4c..c4e7baf2c 100644
--- a/numpy/polynomial/__init__.py
+++ b/numpy/polynomial/__init__.py
@@ -156,17 +156,17 @@ def set_default_printstyle(style):
>>> c = np.polynomial.Chebyshev([1, 2, 3])
>>> np.polynomial.set_default_printstyle('unicode')
>>> print(p)
- 1.0 + 2.0·x¹ + 3.0·x²
+ 1.0 + 2.0·x + 3.0·x²
>>> print(c)
1.0 + 2.0·T₁(x) + 3.0·T₂(x)
>>> np.polynomial.set_default_printstyle('ascii')
>>> print(p)
- 1.0 + 2.0 x**1 + 3.0 x**2
+ 1.0 + 2.0 x + 3.0 x**2
>>> print(c)
1.0 + 2.0 T_1(x) + 3.0 T_2(x)
>>> # Formatting supersedes all class/package-level defaults
>>> print(f"{p:unicode}")
- 1.0 + 2.0·x¹ + 3.0·x²
+ 1.0 + 2.0·x + 3.0·x²
"""
if style not in ('unicode', 'ascii'):
raise ValueError(
diff --git a/numpy/polynomial/_polybase.py b/numpy/polynomial/_polybase.py
index 6382732dc..9674dee0b 100644
--- a/numpy/polynomial/_polybase.py
+++ b/numpy/polynomial/_polybase.py
@@ -366,7 +366,7 @@ class ABCPolyBase(abc.ABC):
linewidth = np.get_printoptions().get('linewidth', 75)
if linewidth < 1:
linewidth = 1
- out = f"{self.coef[0]}"
+ out = pu.format_float(self.coef[0])
for i, coef in enumerate(self.coef[1:]):
out += " "
power = str(i + 1)
@@ -376,9 +376,9 @@ class ABCPolyBase(abc.ABC):
# complex). In this case, represent the coefficient as-is.
try:
if coef >= 0:
- next_term = f"+ {coef}"
+ next_term = f"+ " + pu.format_float(coef, parens=True)
else:
- next_term = f"- {-coef}"
+ next_term = f"- " + pu.format_float(-coef, parens=True)
except TypeError:
next_term = f"+ {coef}"
# Polynomial term
@@ -432,10 +432,10 @@ class ABCPolyBase(abc.ABC):
return f"{{{cls.basis_name}}}_{{{i}}}({arg_str})"
@staticmethod
- def _repr_latex_scalar(x):
+ def _repr_latex_scalar(x, parens=False):
# TODO: we're stuck with disabling math formatting until we handle
# exponents in this function
- return r'\text{{{}}}'.format(x)
+ return r'\text{{{}}}'.format(pu.format_float(x, parens=parens))
def _repr_latex_(self):
# get the scaled argument string to the basis functions
@@ -466,9 +466,9 @@ class ABCPolyBase(abc.ABC):
elif not isinstance(c, numbers.Real):
coef_str = f" + ({self._repr_latex_scalar(c)})"
elif not np.signbit(c):
- coef_str = f" + {self._repr_latex_scalar(c)}"
+ coef_str = f" + {self._repr_latex_scalar(c, parens=True)}"
else:
- coef_str = f" - {self._repr_latex_scalar(-c)}"
+ coef_str = f" - {self._repr_latex_scalar(-c, parens=True)}"
# produce the string for the term
term_str = self._repr_latex_term(i, term, needs_parens)
diff --git a/numpy/polynomial/polynomial.py b/numpy/polynomial/polynomial.py
index b4741355f..8e2c6f002 100644
--- a/numpy/polynomial/polynomial.py
+++ b/numpy/polynomial/polynomial.py
@@ -1512,11 +1512,17 @@ class Polynomial(ABCPolyBase):
@classmethod
def _str_term_unicode(cls, i, arg_str):
- return f"·{arg_str}{i.translate(cls._superscript_mapping)}"
+ if i == '1':
+ return f"·{arg_str}"
+ else:
+ return f"·{arg_str}{i.translate(cls._superscript_mapping)}"
@staticmethod
def _str_term_ascii(i, arg_str):
- return f" {arg_str}**{i}"
+ if i == '1':
+ return f" {arg_str}"
+ else:
+ return f" {arg_str}**{i}"
@staticmethod
def _repr_latex_term(i, arg_str, needs_parens):
diff --git a/numpy/polynomial/polyutils.py b/numpy/polynomial/polyutils.py
index a2bc75a4d..482913892 100644
--- a/numpy/polynomial/polyutils.py
+++ b/numpy/polynomial/polyutils.py
@@ -32,9 +32,13 @@ import warnings
import numpy as np
+from numpy.core.multiarray import dragon4_positional, dragon4_scientific
+from numpy.core.umath import absolute
+
__all__ = [
'RankWarning', 'as_series', 'trimseq',
- 'trimcoef', 'getdomain', 'mapdomain', 'mapparms']
+ 'trimcoef', 'getdomain', 'mapdomain', 'mapparms',
+ 'format_float']
#
# Warnings and Exceptions
@@ -748,3 +752,38 @@ def _deprecate_as_int(x, desc):
return ix
raise TypeError(f"{desc} must be an integer") from e
+
+
+def format_float(x, parens=False):
+ if not np.issubdtype(type(x), np.floating):
+ return str(x)
+
+ opts = np.get_printoptions()
+
+ if np.isnan(x):
+ return opts['nanstr']
+ elif np.isinf(x):
+ return opts['infstr']
+
+ exp_format = False
+ if x != 0:
+ a = absolute(x)
+ if a >= 1.e8 or a < 10**min(0, -(opts['precision']-1)//2):
+ exp_format = True
+
+ trim, unique = '0', True
+ if opts['floatmode'] == 'fixed':
+ trim, unique = 'k', False
+
+ if exp_format:
+ s = dragon4_scientific(x, precision=opts['precision'],
+ unique=unique, trim=trim,
+ sign=opts['sign'] == '+')
+ if parens:
+ s = '(' + s + ')'
+ else:
+ s = dragon4_positional(x, precision=opts['precision'],
+ fractional=True,
+ unique=unique, trim=trim,
+ sign=opts['sign'] == '+')
+ return s
diff --git a/numpy/polynomial/polyutils.pyi b/numpy/polynomial/polyutils.pyi
index 06260a9f1..c0bcc6784 100644
--- a/numpy/polynomial/polyutils.pyi
+++ b/numpy/polynomial/polyutils.pyi
@@ -8,3 +8,4 @@ def trimcoef(c, tol=...): ...
def getdomain(x): ...
def mapparms(old, new): ...
def mapdomain(x, old, new): ...
+def format_float(x, parens=...): ...
diff --git a/numpy/polynomial/tests/test_printing.py b/numpy/polynomial/tests/test_printing.py
index 0c4316223..990a0d179 100644
--- a/numpy/polynomial/tests/test_printing.py
+++ b/numpy/polynomial/tests/test_printing.py
@@ -1,3 +1,4 @@
+from math import nan, inf
import pytest
from numpy.core import array, arange, printoptions
import numpy.polynomial as poly
@@ -15,9 +16,9 @@ class TestStrUnicodeSuperSubscripts:
poly.set_default_printstyle('unicode')
@pytest.mark.parametrize(('inp', 'tgt'), (
- ([1, 2, 3], "1.0 + 2.0·x¹ + 3.0·x²"),
- ([-1, 0, 3, -1], "-1.0 + 0.0·x¹ + 3.0·x² - 1.0·x³"),
- (arange(12), ("0.0 + 1.0·x¹ + 2.0·x² + 3.0·x³ + 4.0·x⁴ + 5.0·x⁵ + "
+ ([1, 2, 3], "1.0 + 2.0·x + 3.0·x²"),
+ ([-1, 0, 3, -1], "-1.0 + 0.0·x + 3.0·x² - 1.0·x³"),
+ (arange(12), ("0.0 + 1.0·x + 2.0·x² + 3.0·x³ + 4.0·x⁴ + 5.0·x⁵ + "
"6.0·x⁶ + 7.0·x⁷ +\n8.0·x⁸ + 9.0·x⁹ + 10.0·x¹⁰ + "
"11.0·x¹¹")),
))
@@ -89,9 +90,9 @@ class TestStrAscii:
poly.set_default_printstyle('ascii')
@pytest.mark.parametrize(('inp', 'tgt'), (
- ([1, 2, 3], "1.0 + 2.0 x**1 + 3.0 x**2"),
- ([-1, 0, 3, -1], "-1.0 + 0.0 x**1 + 3.0 x**2 - 1.0 x**3"),
- (arange(12), ("0.0 + 1.0 x**1 + 2.0 x**2 + 3.0 x**3 + 4.0 x**4 + "
+ ([1, 2, 3], "1.0 + 2.0 x + 3.0 x**2"),
+ ([-1, 0, 3, -1], "-1.0 + 0.0 x + 3.0 x**2 - 1.0 x**3"),
+ (arange(12), ("0.0 + 1.0 x + 2.0 x**2 + 3.0 x**3 + 4.0 x**4 + "
"5.0 x**5 + 6.0 x**6 +\n7.0 x**7 + 8.0 x**8 + "
"9.0 x**9 + 10.0 x**10 + 11.0 x**11")),
))
@@ -168,51 +169,51 @@ class TestLinebreaking:
def test_single_line_one_less(self):
# With 'ascii' style, len(str(p)) is default linewidth - 1 (i.e. 74)
- p = poly.Polynomial([123456789, 123456789, 123456789, 1234, 1])
+ p = poly.Polynomial([12345678, 12345678, 12345678, 12345678, 123])
assert_equal(len(str(p)), 74)
assert_equal(str(p), (
- '123456789.0 + 123456789.0 x**1 + 123456789.0 x**2 + '
- '1234.0 x**3 + 1.0 x**4'
+ '12345678.0 + 12345678.0 x + 12345678.0 x**2 + '
+ '12345678.0 x**3 + 123.0 x**4'
))
def test_num_chars_is_linewidth(self):
# len(str(p)) == default linewidth == 75
- p = poly.Polynomial([123456789, 123456789, 123456789, 1234, 10])
+ p = poly.Polynomial([12345678, 12345678, 12345678, 12345678, 1234])
assert_equal(len(str(p)), 75)
assert_equal(str(p), (
- '123456789.0 + 123456789.0 x**1 + 123456789.0 x**2 + '
- '1234.0 x**3 +\n10.0 x**4'
+ '12345678.0 + 12345678.0 x + 12345678.0 x**2 + '
+ '12345678.0 x**3 +\n1234.0 x**4'
))
def test_first_linebreak_multiline_one_less_than_linewidth(self):
# Multiline str where len(first_line) + len(next_term) == lw - 1 == 74
p = poly.Polynomial(
- [123456789, 123456789, 123456789, 12, 1, 123456789]
+ [12345678, 12345678, 12345678, 12345678, 1, 12345678]
)
assert_equal(len(str(p).split('\n')[0]), 74)
assert_equal(str(p), (
- '123456789.0 + 123456789.0 x**1 + 123456789.0 x**2 + '
- '12.0 x**3 + 1.0 x**4 +\n123456789.0 x**5'
+ '12345678.0 + 12345678.0 x + 12345678.0 x**2 + '
+ '12345678.0 x**3 + 1.0 x**4 +\n12345678.0 x**5'
))
def test_first_linebreak_multiline_on_linewidth(self):
# First line is one character longer than previous test
p = poly.Polynomial(
- [123456789, 123456789, 123456789, 123, 1, 123456789]
+ [12345678, 12345678, 12345678, 12345678.12, 1, 12345678]
)
assert_equal(str(p), (
- '123456789.0 + 123456789.0 x**1 + 123456789.0 x**2 + '
- '123.0 x**3 +\n1.0 x**4 + 123456789.0 x**5'
+ '12345678.0 + 12345678.0 x + 12345678.0 x**2 + '
+ '12345678.12 x**3 +\n1.0 x**4 + 12345678.0 x**5'
))
@pytest.mark.parametrize(('lw', 'tgt'), (
- (75, ('0.0 + 10.0 x**1 + 200.0 x**2 + 3000.0 x**3 + 40000.0 x**4 +\n'
- '500000.0 x**5 + 600000.0 x**6 + 70000.0 x**7 + 8000.0 x**8 + '
+ (75, ('0.0 + 10.0 x + 200.0 x**2 + 3000.0 x**3 + 40000.0 x**4 + '
+ '500000.0 x**5 +\n600000.0 x**6 + 70000.0 x**7 + 8000.0 x**8 + '
'900.0 x**9')),
- (45, ('0.0 + 10.0 x**1 + 200.0 x**2 + 3000.0 x**3 +\n40000.0 x**4 + '
+ (45, ('0.0 + 10.0 x + 200.0 x**2 + 3000.0 x**3 +\n40000.0 x**4 + '
'500000.0 x**5 +\n600000.0 x**6 + 70000.0 x**7 + 8000.0 x**8 +\n'
'900.0 x**9')),
- (132, ('0.0 + 10.0 x**1 + 200.0 x**2 + 3000.0 x**3 + 40000.0 x**4 + '
+ (132, ('0.0 + 10.0 x + 200.0 x**2 + 3000.0 x**3 + 40000.0 x**4 + '
'500000.0 x**5 + 600000.0 x**6 + 70000.0 x**7 + 8000.0 x**8 + '
'900.0 x**9')),
))
@@ -230,10 +231,10 @@ def test_set_default_printoptions():
p = poly.Polynomial([1, 2, 3])
c = poly.Chebyshev([1, 2, 3])
poly.set_default_printstyle('ascii')
- assert_equal(str(p), "1.0 + 2.0 x**1 + 3.0 x**2")
+ assert_equal(str(p), "1.0 + 2.0 x + 3.0 x**2")
assert_equal(str(c), "1.0 + 2.0 T_1(x) + 3.0 T_2(x)")
poly.set_default_printstyle('unicode')
- assert_equal(str(p), "1.0 + 2.0·x¹ + 3.0·x²")
+ assert_equal(str(p), "1.0 + 2.0·x + 3.0·x²")
assert_equal(str(c), "1.0 + 2.0·T₁(x) + 3.0·T₂(x)")
with pytest.raises(ValueError):
poly.set_default_printstyle('invalid_input')
@@ -247,22 +248,22 @@ def test_complex_coefficients():
# Python complex
p2 = poly.Polynomial(array(coefs, dtype=object))
poly.set_default_printstyle('unicode')
- assert_equal(str(p1), "1j + (1+1j)·x¹ - (2-2j)·x² + (3+0j)·x³")
- assert_equal(str(p2), "1j + (1+1j)·x¹ + (-2+2j)·x² + (3+0j)·x³")
+ assert_equal(str(p1), "1j + (1+1j)·x - (2-2j)·x² + (3+0j)·x³")
+ assert_equal(str(p2), "1j + (1+1j)·x + (-2+2j)·x² + (3+0j)·x³")
poly.set_default_printstyle('ascii')
- assert_equal(str(p1), "1j + (1+1j) x**1 - (2-2j) x**2 + (3+0j) x**3")
- assert_equal(str(p2), "1j + (1+1j) x**1 + (-2+2j) x**2 + (3+0j) x**3")
+ assert_equal(str(p1), "1j + (1+1j) x - (2-2j) x**2 + (3+0j) x**3")
+ assert_equal(str(p2), "1j + (1+1j) x + (-2+2j) x**2 + (3+0j) x**3")
@pytest.mark.parametrize(('coefs', 'tgt'), (
(array([Fraction(1, 2), Fraction(3, 4)], dtype=object), (
- "1/2 + 3/4·x¹"
+ "1/2 + 3/4·x"
)),
(array([1, 2, Fraction(5, 7)], dtype=object), (
- "1 + 2·x¹ + 5/7·x²"
+ "1 + 2·x + 5/7·x²"
)),
(array([Decimal('1.00'), Decimal('2.2'), 3], dtype=object), (
- "1.00 + 2.2·x¹ + 3·x²"
+ "1.00 + 2.2·x + 3·x²"
)),
))
def test_numeric_object_coefficients(coefs, tgt):
@@ -272,8 +273,8 @@ def test_numeric_object_coefficients(coefs, tgt):
@pytest.mark.parametrize(('coefs', 'tgt'), (
- (array([1, 2, 'f'], dtype=object), '1 + 2·x¹ + f·x²'),
- (array([1, 2, [3, 4]], dtype=object), '1 + 2·x¹ + [3, 4]·x²'),
+ (array([1, 2, 'f'], dtype=object), '1 + 2·x + f·x²'),
+ (array([1, 2, [3, 4]], dtype=object), '1 + 2·x + [3, 4]·x²'),
))
def test_nonnumeric_object_coefficients(coefs, tgt):
"""
@@ -288,20 +289,20 @@ class TestFormat:
def test_format_unicode(self):
poly.set_default_printstyle('ascii')
p = poly.Polynomial([1, 2, 0, -1])
- assert_equal(format(p, 'unicode'), "1.0 + 2.0·x¹ + 0.0·x² - 1.0·x³")
+ assert_equal(format(p, 'unicode'), "1.0 + 2.0·x + 0.0·x² - 1.0·x³")
def test_format_ascii(self):
poly.set_default_printstyle('unicode')
p = poly.Polynomial([1, 2, 0, -1])
assert_equal(
- format(p, 'ascii'), "1.0 + 2.0 x**1 + 0.0 x**2 - 1.0 x**3"
+ format(p, 'ascii'), "1.0 + 2.0 x + 0.0 x**2 - 1.0 x**3"
)
def test_empty_formatstr(self):
poly.set_default_printstyle('ascii')
p = poly.Polynomial([1, 2, 3])
- assert_equal(format(p), "1.0 + 2.0 x**1 + 3.0 x**2")
- assert_equal(f"{p}", "1.0 + 2.0 x**1 + 3.0 x**2")
+ assert_equal(format(p), "1.0 + 2.0 x + 3.0 x**2")
+ assert_equal(f"{p}", "1.0 + 2.0 x + 3.0 x**2")
def test_bad_formatstr(self):
p = poly.Polynomial([1, 2, 0, -1])
@@ -310,7 +311,7 @@ class TestFormat:
@pytest.mark.parametrize(('poly', 'tgt'), (
- (poly.Polynomial, '1.0 + 2.0·z¹ + 3.0·z²'),
+ (poly.Polynomial, '1.0 + 2.0·z + 3.0·z²'),
(poly.Chebyshev, '1.0 + 2.0·T₁(z) + 3.0·T₂(z)'),
(poly.Hermite, '1.0 + 2.0·H₁(z) + 3.0·H₂(z)'),
(poly.HermiteE, '1.0 + 2.0·He₁(z) + 3.0·He₂(z)'),
@@ -379,7 +380,7 @@ class TestLatexRepr:
# right now we ignore the formatting of scalars in our tests, since
# it makes them too verbose. Ideally, the formatting of scalars will
# be fixed such that tests below continue to pass
- obj._repr_latex_scalar = lambda x: str(x)
+ obj._repr_latex_scalar = lambda x, parens=False: str(x)
try:
return obj._repr_latex_()
finally:
@@ -455,3 +456,71 @@ class TestLatexRepr:
r'\left(1.0 + 2.0z\right)^{2}$'
),
)
+
+
+SWITCH_TO_EXP = (
+ '1.0 + (1.0e-01) x + (1.0e-02) x**2',
+ '1.2 + (1.2e-01) x + (1.2e-02) x**2',
+ '1.23 + 0.12 x + (1.23e-02) x**2 + (1.23e-03) x**3',
+ '1.235 + 0.123 x + (1.235e-02) x**2 + (1.235e-03) x**3',
+ '1.2346 + 0.1235 x + 0.0123 x**2 + (1.2346e-03) x**3 + (1.2346e-04) x**4',
+ '1.23457 + 0.12346 x + 0.01235 x**2 + (1.23457e-03) x**3 + '
+ '(1.23457e-04) x**4',
+ '1.234568 + 0.123457 x + 0.012346 x**2 + 0.001235 x**3 + '
+ '(1.234568e-04) x**4 + (1.234568e-05) x**5',
+ '1.2345679 + 0.1234568 x + 0.0123457 x**2 + 0.0012346 x**3 + '
+ '(1.2345679e-04) x**4 + (1.2345679e-05) x**5')
+
+class TestPrintOptions:
+ """
+ Test the output is properly configured via printoptions.
+ The exponential notation is enabled automatically when the values
+ are too small or too large.
+ """
+
+ def test_str(self):
+ p = poly.Polynomial([1/2, 1/7, 1/7*10**8, 1/7*10**9])
+ assert_equal(str(p), '0.5 + 0.14285714 x + 14285714.28571429 x**2 '
+ '+ (1.42857143e+08) x**3')
+
+ with printoptions(precision=3):
+ assert_equal(str(p), '0.5 + 0.143 x + 14285714.286 x**2 '
+ '+ (1.429e+08) x**3')
+
+ def test_latex(self):
+ p = poly.Polynomial([1/2, 1/7, 1/7*10**8, 1/7*10**9])
+ assert_equal(p._repr_latex_(),
+ r'$x \mapsto \text{0.5} + \text{0.14285714}\,x + '
+ r'\text{14285714.28571429}\,x^{2} + '
+ r'\text{(1.42857143e+08)}\,x^{3}$')
+
+ with printoptions(precision=3):
+ assert_equal(p._repr_latex_(),
+ r'$x \mapsto \text{0.5} + \text{0.143}\,x + '
+ r'\text{14285714.286}\,x^{2} + \text{(1.429e+08)}\,x^{3}$')
+
+ def test_fixed(self):
+ p = poly.Polynomial([1/2])
+ assert_equal(str(p), '0.5')
+
+ with printoptions(floatmode='fixed'):
+ assert_equal(str(p), '0.50000000')
+
+ with printoptions(floatmode='fixed', precision=4):
+ assert_equal(str(p), '0.5000')
+
+ def test_switch_to_exp(self):
+ for i, s in enumerate(SWITCH_TO_EXP):
+ with printoptions(precision=i):
+ p = poly.Polynomial([1.23456789*10**-i
+ for i in range(i//2+3)])
+ assert str(p).replace('\n', ' ') == s
+
+ def test_non_finite(self):
+ p = poly.Polynomial([nan, inf])
+ assert str(p) == 'nan + inf x'
+ assert p._repr_latex_() == r'$x \mapsto \text{nan} + \text{inf}\,x$'
+ with printoptions(nanstr='NAN', infstr='INF'):
+ assert str(p) == 'NAN + INF x'
+ assert p._repr_latex_() == \
+ r'$x \mapsto \text{NAN} + \text{INF}\,x$'
diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx
index b54fe3610..0019c4bcd 100644
--- a/numpy/random/_generator.pyx
+++ b/numpy/random/_generator.pyx
@@ -3660,6 +3660,11 @@ cdef class Generator:
# Check preconditions on arguments
mean = np.array(mean)
cov = np.array(cov)
+
+ if (np.issubdtype(mean.dtype, np.complexfloating) or
+ np.issubdtype(cov.dtype, np.complexfloating)):
+ raise TypeError("mean and cov must not be complex")
+
if size is None:
shape = []
elif isinstance(size, (int, long, np.integer)):
diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py
index 3ccb9103c..fa55ac0ee 100644
--- a/numpy/random/tests/test_generator_mt19937.py
+++ b/numpy/random/tests/test_generator_mt19937.py
@@ -1452,6 +1452,12 @@ class TestRandomDist:
mu, np.empty((3, 2)))
assert_raises(ValueError, random.multivariate_normal,
mu, np.eye(3))
+
+ @pytest.mark.parametrize('mean, cov', [([0], [[1+1j]]), ([0j], [[1]])])
+ def test_multivariate_normal_disallow_complex(self, mean, cov):
+ random = Generator(MT19937(self.seed))
+ with pytest.raises(TypeError, match="must not be complex"):
+ random.multivariate_normal(mean, cov)
@pytest.mark.parametrize("method", ["svd", "eigh", "cholesky"])
def test_multivariate_normal_basic_stats(self, method):
diff --git a/numpy/typing/tests/data/pass/arithmetic.py b/numpy/typing/tests/data/pass/arithmetic.py
index 4ed69c923..07a990127 100644
--- a/numpy/typing/tests/data/pass/arithmetic.py
+++ b/numpy/typing/tests/data/pass/arithmetic.py
@@ -2,6 +2,7 @@ from __future__ import annotations
from typing import Any
import numpy as np
+import pytest
c16 = np.complex128(1)
f8 = np.float64(1)
@@ -330,8 +331,9 @@ AR_O **= AR_LIKE_O
-f4
-i8
-i4
--u8
--u4
+with pytest.warns(RuntimeWarning):
+ -u8
+ -u4
-td
-AR_f