summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorSebastian Berg <sebastianb@nvidia.com>2023-04-26 10:51:21 +0200
committerGitHub <noreply@github.com>2023-04-26 10:51:21 +0200
commitb4f0e4122e83115d99de849fcfc4fd1fc8b8f65d (patch)
tree0695fcc40428a235161418f2bd288f9a90e3d640 /numpy
parentc7ff4118a345c966e2a0fc688e054e3fd9191e99 (diff)
parent5665fdcdf21dec575d204852b208eaecf1a1638d (diff)
downloadnumpy-b4f0e4122e83115d99de849fcfc4fd1fc8b8f65d.tar.gz
Merge branch 'main' into f2pyFuncFix_23598
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/_add_newdocs_scalars.py6
-rw-r--r--numpy/core/arrayprint.py34
-rw-r--r--numpy/core/code_generators/generate_umath.py4
-rw-r--r--numpy/core/code_generators/ufunc_docstrings.py2
-rw-r--r--numpy/core/function_base.py2
-rw-r--r--numpy/core/include/numpy/_dtype_api.h26
-rw-r--r--numpy/core/include/numpy/npy_common.h8
-rw-r--r--numpy/core/meson.build5
-rw-r--r--numpy/core/setup.py8
-rw-r--r--numpy/core/src/common/common.hpp3
-rw-r--r--numpy/core/src/common/float_status.hpp134
-rw-r--r--numpy/core/src/common/half.hpp255
-rw-r--r--numpy/core/src/common/half_private.hpp330
-rw-r--r--numpy/core/src/common/npdef.hpp28
-rw-r--r--numpy/core/src/common/npstd.hpp2
-rw-r--r--numpy/core/src/common/utils.hpp51
-rw-r--r--numpy/core/src/multiarray/array_coercion.c47
-rw-r--r--numpy/core/src/multiarray/common.c47
-rw-r--r--numpy/core/src/multiarray/common.h10
-rw-r--r--numpy/core/src/multiarray/convert_datatype.c18
-rw-r--r--numpy/core/src/multiarray/ctors.c61
-rw-r--r--numpy/core/src/multiarray/dtype_traversal.c74
-rw-r--r--numpy/core/src/multiarray/dtype_traversal.h21
-rw-r--r--numpy/core/src/multiarray/dtypemeta.c9
-rw-r--r--numpy/core/src/multiarray/dtypemeta.h19
-rw-r--r--numpy/core/src/multiarray/getset.c19
-rw-r--r--numpy/core/src/multiarray/methods.c4
-rw-r--r--numpy/core/src/multiarray/number.c6
-rw-r--r--numpy/core/src/multiarray/refcount.c7
-rw-r--r--numpy/core/src/multiarray/refcount.h3
-rw-r--r--numpy/core/src/npymath/halffloat.c555
-rw-r--r--numpy/core/src/npymath/halffloat.cpp238
-rw-r--r--numpy/core/src/umath/loops.h.src16
-rw-r--r--numpy/core/src/umath/loops_trigonometric.dispatch.c.src243
-rw-r--r--numpy/core/src/umath/loops_umath_fp.dispatch.c.src54
-rw-r--r--numpy/core/tests/test_array_coercion.py18
-rw-r--r--numpy/core/tests/test_arrayprint.py39
-rw-r--r--numpy/core/tests/test_deprecations.py98
-rw-r--r--numpy/core/tests/test_multiarray.py25
-rw-r--r--numpy/core/tests/test_umath.py37
-rw-r--r--numpy/distutils/command/build_ext.py2
-rwxr-xr-xnumpy/f2py/crackfortran.py40
-rw-r--r--numpy/f2py/tests/src/string/scalar_string.f902
-rw-r--r--numpy/f2py/tests/test_character.py22
-rw-r--r--numpy/f2py/tests/test_crackfortran.py38
-rw-r--r--numpy/lib/npyio.py3
-rw-r--r--numpy/lib/npyio.pyi1
-rw-r--r--numpy/ma/core.py12
-rw-r--r--numpy/ma/tests/test_core.py34
49 files changed, 1734 insertions, 986 deletions
diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py
index 86fe5583c..58661ed09 100644
--- a/numpy/core/_add_newdocs_scalars.py
+++ b/numpy/core/_add_newdocs_scalars.py
@@ -204,7 +204,11 @@ add_newdoc_for_scalar_type('str_', ['unicode_'],
r"""
A unicode string.
- When used in arrays, this type strips trailing null codepoints.
+ This type strips trailing null codepoints.
+
+ >>> s = np.str_("abc\x00")
+ >>> s
+ 'abc'
Unlike the builtin `str`, this supports the :ref:`python:bufferobjects`, exposing its
contents as UCS4:
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index dcfb6e6a8..62cd52707 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -1096,7 +1096,7 @@ def format_float_scientific(x, precision=None, unique=True, trim='k',
identify the value may be printed and rounded unbiased.
-- versionadded:: 1.21.0
-
+
Returns
-------
rep : string
@@ -1181,7 +1181,7 @@ def format_float_positional(x, precision=None, unique=True,
Minimum number of digits to print. Only has an effect if `unique=True`
in which case additional digits past those necessary to uniquely
identify the value may be printed, rounding the last additional digit.
-
+
-- versionadded:: 1.21.0
Returns
@@ -1339,13 +1339,29 @@ class TimedeltaFormat(_TimelikeFormat):
class SubArrayFormat:
- def __init__(self, format_function):
+ def __init__(self, format_function, **options):
self.format_function = format_function
+ self.threshold = options['threshold']
+ self.edge_items = options['edgeitems']
+
+ def __call__(self, a):
+ self.summary_insert = "..." if a.size > self.threshold else ""
+ return self.format_array(a)
+
+ def format_array(self, a):
+ if np.ndim(a) == 0:
+ return self.format_function(a)
+
+ if self.summary_insert and a.shape[0] > 2*self.edge_items:
+ formatted = (
+ [self.format_array(a_) for a_ in a[:self.edge_items]]
+ + [self.summary_insert]
+ + [self.format_array(a_) for a_ in a[-self.edge_items:]]
+ )
+ else:
+ formatted = [self.format_array(a_) for a_ in a]
- def __call__(self, arr):
- if arr.ndim <= 1:
- return "[" + ", ".join(self.format_function(a) for a in arr) + "]"
- return "[" + ", ".join(self.__call__(a) for a in arr) + "]"
+ return "[" + ", ".join(formatted) + "]"
class StructuredVoidFormat:
@@ -1369,7 +1385,7 @@ class StructuredVoidFormat:
for field_name in data.dtype.names:
format_function = _get_format_function(data[field_name], **options)
if data.dtype[field_name].shape != ():
- format_function = SubArrayFormat(format_function)
+ format_function = SubArrayFormat(format_function, **options)
format_functions.append(format_function)
return cls(format_functions)
@@ -1428,7 +1444,7 @@ def dtype_is_implied(dtype):
# not just void types can be structured, and names are not part of the repr
if dtype.names is not None:
return False
-
+
# should care about endianness *unless size is 1* (e.g., int8, bool)
if not dtype.isnative:
return False
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 1fe2e43c0..d0306bb72 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -799,7 +799,7 @@ defdict = {
None,
TD('e', dispatch=[('loops_umath_fp', 'e')]),
TD('f', dispatch=[('loops_trigonometric', 'f')]),
- TD('d', dispatch=[('loops_umath_fp', 'd')]),
+ TD('d', dispatch=[('loops_trigonometric', 'd')]),
TD('g' + cmplx, f='cos'),
TD(P, f='cos'),
),
@@ -809,7 +809,7 @@ defdict = {
None,
TD('e', dispatch=[('loops_umath_fp', 'e')]),
TD('f', dispatch=[('loops_trigonometric', 'f')]),
- TD('d', dispatch=[('loops_umath_fp', 'd')]),
+ TD('d', dispatch=[('loops_trigonometric', 'd')]),
TD('g' + cmplx, f='sin'),
TD(P, f='sin'),
),
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index 05f947c15..1695b4d27 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -3833,7 +3833,7 @@ add_newdoc('numpy.core.umath', 'sqrt',
--------
emath.sqrt
A version which returns complex numbers when given negative reals.
- Note: 0.0 and -0.0 are handled differently for complex inputs.
+ Note that 0.0 and -0.0 are handled differently for complex inputs.
Notes
-----
diff --git a/numpy/core/function_base.py b/numpy/core/function_base.py
index c82b495b1..00e4e6b0e 100644
--- a/numpy/core/function_base.py
+++ b/numpy/core/function_base.py
@@ -168,7 +168,7 @@ def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None,
y += start
if endpoint and num > 1:
- y[-1] = stop
+ y[-1, ...] = stop
if axis != 0:
y = _nx.moveaxis(y, 0, axis)
diff --git a/numpy/core/include/numpy/_dtype_api.h b/numpy/core/include/numpy/_dtype_api.h
index 2f801eace..c397699c7 100644
--- a/numpy/core/include/numpy/_dtype_api.h
+++ b/numpy/core/include/numpy/_dtype_api.h
@@ -5,14 +5,14 @@
#ifndef NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_
#define NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_
-#define __EXPERIMENTAL_DTYPE_API_VERSION 9
+#define __EXPERIMENTAL_DTYPE_API_VERSION 10
struct PyArrayMethodObject_tag;
/*
* Largely opaque struct for DType classes (i.e. metaclass instances).
* The internal definition is currently in `ndarraytypes.h` (export is a bit
- * more complex because `PyArray_Descr` is a DTypeMeta internall but not
+ * more complex because `PyArray_Descr` is a DTypeMeta internally but not
* externally).
*/
#if !(defined(NPY_INTERNAL_BUILD) && NPY_INTERNAL_BUILD)
@@ -199,9 +199,8 @@ typedef int (get_reduction_initial_function)(
PyArrayMethod_Context *context, npy_bool reduction_is_empty,
char *initial);
-
/*
- * The following functions are only used be the wrapping array method defined
+ * The following functions are only used by the wrapping array method defined
* in umath/wrapping_array_method.c
*/
@@ -257,14 +256,13 @@ typedef int translate_loop_descrs_func(int nin, int nout,
* strided-loop function. This is designed for loops that need to visit every
* element of a single array.
*
- * Currently this is only used for array clearing, via the
- * NPY_DT_get_clear_loop, api hook, particularly for arrays storing embedded
- * references to python objects or heap-allocated data. If you define a dtype
- * that uses embedded references, the NPY_ITEM_REFCOUNT flag must be set on the
- * dtype instance.
+ * Currently this is used for array clearing, via the NPY_DT_get_clear_loop
+ * API hook, and zero-filling, via the NPY_DT_get_fill_zero_loop API hook.
+ * These are most useful for handling arrays storing embedded references to
+ * python objects or heap-allocated data.
*
* The `void *traverse_context` is passed in because we may need to pass in
- * Intepreter state or similar in the futurem, but we don't want to pass in
+ * Intepreter state or similar in the future, but we don't want to pass in
* a full context (with pointers to dtypes, method, caller which all make
* no sense for a traverse function).
*
@@ -280,9 +278,10 @@ typedef int (traverse_loop_function)(
/*
* Simplified get_loop function specific to dtype traversal
*
- * Currently this is only used for clearing arrays. It should set the flags
- * needed for the traversal loop and set out_loop to the loop function, which
- * must be a valid traverse_loop_function pointer.
+ * It should set the flags needed for the traversal loop and set out_loop to the
+ * loop function, which must be a valid traverse_loop_function
+ * pointer. Currently this is used for zero-filling and clearing arrays storing
+ * embedded references.
*
*/
typedef int (get_traverse_loop_function)(
@@ -319,6 +318,7 @@ typedef int (get_traverse_loop_function)(
#define NPY_DT_setitem 7
#define NPY_DT_getitem 8
#define NPY_DT_get_clear_loop 9
+#define NPY_DT_get_fill_zero_loop 10
// These PyArray_ArrFunc slots will be deprecated and replaced eventually
// getitem and setitem can be defined as a performance optimization;
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h
index 3b31bcf2d..728cedbf1 100644
--- a/numpy/core/include/numpy/npy_common.h
+++ b/numpy/core/include/numpy/npy_common.h
@@ -105,6 +105,14 @@
#define NPY_FINLINE static
#endif
+#if defined(_MSC_VER)
+ #define NPY_NOINLINE static __declspec(noinline)
+#elif defined(__GNUC__) || defined(__clang__)
+ #define NPY_NOINLINE static __attribute__((noinline))
+#else
+ #define NPY_NOINLINE static
+#endif
+
#ifdef HAVE___THREAD
#define NPY_TLS __thread
#else
diff --git a/numpy/core/meson.build b/numpy/core/meson.build
index e968fb336..e51a7ba3b 100644
--- a/numpy/core/meson.build
+++ b/numpy/core/meson.build
@@ -430,6 +430,7 @@ _numpyconfig_h = configure_file(
# ----------------------------
staticlib_cflags = []
+staticlib_cppflags = []
if cc.get_id() == 'msvc'
# Disable voltbl section for vc142 to allow link using mingw-w64; see:
# https://github.com/matthew-brett/dll_investigation/issues/1#issuecomment-1100468171
@@ -443,6 +444,7 @@ endif
# https://mesonbuild.com/Build-options.html#build-options
if get_option('disable-simd-optimizations')
staticlib_cflags += '-DNPY_DISABLE_OPTIMIZATION'
+ staticlib_cppflags += '-DNPY_DISABLE_OPTIMIZATION'
endif
npy_math_internal_h = custom_target(
@@ -455,12 +457,13 @@ npymath_sources = [
src_file.process('src/npymath/ieee754.c.src'),
src_file.process('src/npymath/npy_math_complex.c.src'),
npy_math_internal_h,
- 'src/npymath/halffloat.c',
+ 'src/npymath/halffloat.cpp',
'src/npymath/npy_math.c',
]
npymath_lib = static_library('npymath',
npymath_sources,
c_args: staticlib_cflags,
+ cpp_args: staticlib_cppflags,
include_directories: ['include', 'src/npymath', 'src/common'],
dependencies: py_dep,
install: true,
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 680c2a5f6..8ef012f70 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -669,7 +669,7 @@ def configuration(parent_package='',top_path=None):
# join('src', 'npymath', 'ieee754.cpp'),
join('src', 'npymath', 'ieee754.c.src'),
join('src', 'npymath', 'npy_math_complex.c.src'),
- join('src', 'npymath', 'halffloat.c'),
+ join('src', 'npymath', 'halffloat.cpp'),
]
config.add_installed_library('npymath',
@@ -727,7 +727,8 @@ def configuration(parent_package='',top_path=None):
join('src', 'common', 'numpyos.h'),
join('src', 'common', 'npy_cpu_dispatch.h'),
join('src', 'common', 'simd', 'simd.h'),
- ]
+ join('src', 'common', 'common.hpp'),
+ ]
common_src = [
join('src', 'common', 'array_assign.c'),
@@ -1010,9 +1011,6 @@ def configuration(parent_package='',top_path=None):
svml_objs.sort()
config.add_extension('_multiarray_umath',
- # Forcing C language even though we have C++ sources.
- # It forces the C linker and don't link C++ runtime.
- language = 'c',
sources=multiarray_src + umath_src +
common_src +
[generate_config_h,
diff --git a/numpy/core/src/common/common.hpp b/numpy/core/src/common/common.hpp
index 47d790bcf..44ba449d8 100644
--- a/numpy/core/src/common/common.hpp
+++ b/numpy/core/src/common/common.hpp
@@ -4,8 +4,11 @@
* The following C++ headers are safe to be used standalone, however,
* they are gathered to make it easy for us and for the future need to support PCH.
*/
+#include "npdef.hpp"
+#include "utils.hpp"
#include "npstd.hpp"
#include "half.hpp"
#include "meta.hpp"
+#include "float_status.hpp"
#endif // NUMPY_CORE_SRC_COMMON_COMMON_HPP
diff --git a/numpy/core/src/common/float_status.hpp b/numpy/core/src/common/float_status.hpp
new file mode 100644
index 000000000..8e4d5e06a
--- /dev/null
+++ b/numpy/core/src/common/float_status.hpp
@@ -0,0 +1,134 @@
+#ifndef NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP
+#define NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP
+
+#include "npstd.hpp"
+
+#include <fenv.h>
+
+namespace np {
+
+/// @addtogroup cpp_core_utility
+/// @{
+/**
+ * Class wraps floating-point environment operations,
+ * provides lazy access to its functionality.
+ */
+class FloatStatus {
+ public:
+/*
+ * According to the C99 standard FE_DIVBYZERO, etc. may not be provided when
+ * unsupported. In such cases NumPy will not report these correctly, but we
+ * should still allow compiling (whether tests pass or not).
+ * By defining them as 0 locally, we make them no-ops. Unlike these defines,
+ * for example `musl` still defines all of the functions (as no-ops):
+ * https://git.musl-libc.org/cgit/musl/tree/src/fenv/fenv.c
+ * and does similar replacement in its tests:
+ * http://nsz.repo.hu/git/?p=libc-test;a=blob;f=src/common/mtest.h;h=706c1ba23ea8989b17a2f72ed1a919e187c06b6a;hb=HEAD#l30
+ */
+#ifdef FE_DIVBYZERO
+ static constexpr int kDivideByZero = FE_DIVBYZERO;
+#else
+ static constexpr int kDivideByZero = 0;
+#endif
+#ifdef FE_INVALID
+ static constexpr int kInvalid = FE_INVALID;
+#else
+ static constexpr int kInvalid = 0;
+#endif
+#ifdef FE_INEXACT
+ static constexpr int kInexact = FE_INEXACT;
+#else
+ static constexpr int kInexact = 0;
+#endif
+#ifdef FE_OVERFLOW
+ static constexpr int kOverflow = FE_OVERFLOW;
+#else
+ static constexpr int kOverflow = 0;
+#endif
+#ifdef FE_UNDERFLOW
+ static constexpr int kUnderflow = FE_UNDERFLOW;
+#else
+ static constexpr int kUnderflow = 0;
+#endif
+ static constexpr int kAllExcept = (kDivideByZero | kInvalid | kInexact |
+ kOverflow | kUnderflow);
+
+ FloatStatus(bool clear_on_dst=true)
+ : clear_on_dst_(clear_on_dst)
+ {
+ if constexpr (kAllExcept != 0) {
+ fpstatus_ = fetestexcept(kAllExcept);
+ }
+ else {
+ fpstatus_ = 0;
+ }
+ }
+ ~FloatStatus()
+ {
+ if constexpr (kAllExcept != 0) {
+ if (fpstatus_ != 0 && clear_on_dst_) {
+ feclearexcept(kAllExcept);
+ }
+ }
+ }
+ constexpr bool IsDivideByZero() const
+ {
+ return (fpstatus_ & kDivideByZero) != 0;
+ }
+ constexpr bool IsInexact() const
+ {
+ return (fpstatus_ & kInexact) != 0;
+ }
+ constexpr bool IsInvalid() const
+ {
+ return (fpstatus_ & kInvalid) != 0;
+ }
+ constexpr bool IsOverFlow() const
+ {
+ return (fpstatus_ & kOverflow) != 0;
+ }
+ constexpr bool IsUnderFlow() const
+ {
+ return (fpstatus_ & kUnderflow) != 0;
+ }
+ static void RaiseDivideByZero()
+ {
+ if constexpr (kDivideByZero != 0) {
+ feraiseexcept(kDivideByZero);
+ }
+ }
+ static void RaiseInexact()
+ {
+ if constexpr (kInexact != 0) {
+ feraiseexcept(kInexact);
+ }
+ }
+ static void RaiseInvalid()
+ {
+ if constexpr (kInvalid != 0) {
+ feraiseexcept(kInvalid);
+ }
+ }
+ static void RaiseOverflow()
+ {
+ if constexpr (kOverflow != 0) {
+ feraiseexcept(kOverflow);
+ }
+ }
+ static void RaiseUnderflow()
+ {
+ if constexpr (kUnderflow != 0) {
+ feraiseexcept(kUnderflow);
+ }
+ }
+
+ private:
+ bool clear_on_dst_;
+ int fpstatus_;
+};
+
+/// @} cpp_core_utility
+} // namespace np
+
+#endif // NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP
+
diff --git a/numpy/core/src/common/half.hpp b/numpy/core/src/common/half.hpp
index 399f2fa79..e5f3f7a40 100644
--- a/numpy/core/src/common/half.hpp
+++ b/numpy/core/src/common/half.hpp
@@ -3,11 +3,14 @@
#include "npstd.hpp"
+#include "npy_cpu_dispatch.h" // NPY_HAVE_CPU_FEATURES
+#include "half_private.hpp"
+
// TODO(@seiko2plus):
// - covers half-precision operations that being supported by numpy/halffloat.h
-// - support __fp16
-// - optimize x86 half<->single via cpu_fp16
-// - optimize ppc64 half<->single via cpu_vsx3
+// - add support for arithmetic operations
+// - enables __fp16 causes massive FP exceptions on aarch64,
+// needs a deep investigation
namespace np {
@@ -16,48 +19,246 @@ namespace np {
/// Provides a type that implements 16-bit floating point (half-precision).
/// This type is ensured to be 16-bit size.
+#if 1 // ndef __ARM_FP16_FORMAT_IEEE
class Half final {
- public:
- /// @name Public Constructors
- /// @{
+ public:
+ /// Whether `Half` has a full native HW support.
+ static constexpr bool kNative = false;
+ /// Whether `Half` has a native HW support for single/double conversion.
+ template<typename T>
+ static constexpr bool kNativeConversion = (
+ (
+ std::is_same_v<T, float> &&
+ #if defined(NPY_HAVE_FP16) || defined(NPY_HAVE_VSX3)
+ true
+ #else
+ false
+ #endif
+ ) || (
+ std::is_same_v<T, double> &&
+ #if defined(NPY_HAVE_AVX512FP16) || defined(NPY_HAVE_VSX3)
+ true
+ #else
+ false
+ #endif
+ )
+ );
/// Default constructor. initialize nothing.
Half() = default;
- /// Copy.
- Half(const Half &r)
+
+ /// Constract from float
+ /// If there are no hardware optimization available, rounding will always
+ /// be set to ties to even.
+ explicit Half(float f)
{
- data_.u = r.data_.u;
+ #if defined(NPY_HAVE_FP16)
+ __m128 mf = _mm_load_ss(&f);
+ bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_cvtps_ph(mf, _MM_FROUND_TO_NEAREST_INT)));
+ #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
+ __vector float vf32 = vec_splats(f);
+ __vector unsigned short vf16;
+ __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (vf32));
+ bits_ = vec_extract(vf16, 0);
+ #else
+ bits_ = half_private::FromFloatBits(BitCast<uint32_t>(f));
+ #endif
}
- /// @}
+ /// Construct from double.
+ /// If there are no hardware optimization available, rounding will always
+ /// be set to ties to even.
+ explicit Half(double f)
+ {
+ #if defined(NPY_HAVE_AVX512FP16)
+ __m128d md = _mm_load_sd(&f);
+ bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_castph_si128(_mm_cvtpd_ph(mf))));
+ #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
+ __vector double vf64 = vec_splats(f);
+ __vector unsigned short vf16;
+ __asm__ __volatile__ ("xvcvdphp %x0,%x1" : "=wa" (vf16) : "wa" (vf64));
+ bits_ = vec_extract(vf16, 0);
+ #else
+ bits_ = half_private::FromDoubleBits(BitCast<uint64_t>(f));
+ #endif
+ }
+
+ /// Cast to float
+ explicit operator float() const
+ {
+ #if defined(NPY_HAVE_FP16)
+ float ret;
+ _mm_store_ss(&ret, _mm_cvtph_ps(_mm_cvtsi32_si128(bits_)));
+ return ret;
+ #elif defined(NPY_HAVE_VSX3) && defined(vec_extract_fp_from_shorth)
+ return vec_extract(vec_extract_fp_from_shorth(vec_splats(bits_)), 0);
+ #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
+ __vector float vf32;
+ __asm__ __volatile__("xvcvhpsp %x0,%x1"
+ : "=wa"(vf32)
+ : "wa"(vec_splats(bits_.u)));
+ return vec_extract(vf32, 0);
+ #else
+ return BitCast<float>(half_private::ToFloatBits(bits_));
+ #endif
+ }
+
+ /// Cast to double
+ explicit operator double() const
+ {
+ #if defined(NPY_HAVE_AVX512FP16)
+ double ret;
+ _mm_store_sd(&ret, _mm_cvtph_pd(_mm_castsi128_ph(_mm_cvtsi32_si128(bits_))));
+ return ret;
+ #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
+ __vector float vf64;
+ __asm__ __volatile__("xvcvhpdp %x0,%x1"
+ : "=wa"(vf32)
+ : "wa"(vec_splats(bits_)));
+ return vec_extract(vf64, 0);
+ #else
+ return BitCast<double>(half_private::ToDoubleBits(bits_));
+ #endif
+ }
/// Returns a new Half constracted from the IEEE 754 binary16.
- /// @param b the value of binary16.
- static Half FromBits(uint16_t b)
+ static constexpr Half FromBits(uint16_t bits)
{
- Half f;
- f.data_.u = b;
- return f;
+ Half h{};
+ h.bits_ = bits;
+ return h;
}
/// Returns the IEEE 754 binary16 representation.
- uint16_t Bits() const
+ constexpr uint16_t Bits() const
{
- return data_.u;
+ return bits_;
}
- private:
- union {
- uint16_t u;
-/*
-TODO(@seiko2plus): support __fp16
-#ifdef NPY_HAVE_HW_FP16
- __fp16 f;
-#endif
-*/
- } data_;
+ /// @name Comparison operators (orderd)
+ /// @{
+ constexpr bool operator==(Half r) const
+ {
+ return !(IsNaN() || r.IsNaN()) && Equal(r);
+ }
+ constexpr bool operator<(Half r) const
+ {
+ return !(IsNaN() || r.IsNaN()) && Less(r);
+ }
+ constexpr bool operator<=(Half r) const
+ {
+ return !(IsNaN() || r.IsNaN()) && LessEqual(r);
+ }
+ constexpr bool operator>(Half r) const
+ {
+ return r < *this;
+ }
+ constexpr bool operator>=(Half r) const
+ {
+ return r <= *this;
+ }
+ /// @}
+
+ /// @name Comparison operators (unorderd)
+ /// @{
+ constexpr bool operator!=(Half r) const
+ {
+ return !(*this == r);
+ }
+ /// @} Comparison operators
+
+ /// @name Comparison with no guarantee of NaN behavior
+ /// @{
+ constexpr bool Less(Half r) const
+ {
+ uint_fast16_t a = static_cast<uint_fast16_t>(bits_),
+ b = static_cast<uint_fast16_t>(r.bits_);
+ bool sign_a = (a & 0x8000u) == 0x8000u;
+ bool sign_b = (b & 0x8000u) == 0x8000u;
+ // if both `a` and `b` have same sign
+ // Test if `a` > `b` when `a` has the sign
+ // or `a` < `b` when is not.
+ // And make sure they are not equal to each other
+ // in case of both are equal to +-0
+ // else
+ // Test if `a` has the sign.
+ // and `a` != -0.0 and `b` != 0.0
+ return (sign_a == sign_b) ? (sign_a ^ (a < b)) && (a != b)
+ : sign_a && ((a | b) != 0x8000u);
+ }
+ constexpr bool LessEqual(Half r) const
+ {
+ uint_fast16_t a = static_cast<uint_fast16_t>(bits_),
+ b = static_cast<uint_fast16_t>(r.bits_);
+ bool sign_a = (a & 0x8000u) == 0x8000u;
+ bool sign_b = (b & 0x8000u) == 0x8000u;
+ // if both `a` and `b` have same sign
+ // Test if `a` > `b` when `a` has the sign
+ // or `a` < `b` when is not.
+ // or a == b (needed even if we used <= above instead
+ // since testing +-0 still required)
+ // else
+ // Test if `a` has the sign
+ // or `a` and `b` equal to +-0.0
+ return (sign_a == sign_b) ? (sign_a ^ (a < b)) || (a == b)
+ : sign_a || ((a | b) == 0x8000u);
+ }
+ constexpr bool Equal(Half r) const
+ {
+ // fast16 cast is not worth it, since unpack op should involved.
+ uint16_t a = bits_, b = r.bits_;
+ return a == b || ((a | b) == 0x8000u);
+ }
+ /// @} Comparison
+
+ /// @name Properties
+ // @{
+ constexpr bool IsNaN() const
+ {
+ return ((bits_ & 0x7c00u) == 0x7c00u) &&
+ ((bits_ & 0x03ffu) != 0);
+ }
+ /// @} Properties
+
+ private:
+ uint16_t bits_;
+};
+#else // __ARM_FP16_FORMAT_IEEE
+class Half final {
+ public:
+ static constexpr bool kNative = true;
+ template<typename T>
+ static constexpr bool kNativeConversion = (
+ std::is_same_v<T, float> || std::is_same_v<T, double>
+ );
+ Half() = default;
+ constexpr Half(__fp16 h) : half_(h)
+ {}
+ constexpr operator __fp16() const
+ { return half_; }
+ static Half FromBits(uint16_t bits)
+ {
+ Half h;
+ h.half_ = BitCast<__fp16>(bits);
+ return h;
+ }
+ uint16_t Bits() const
+ { return BitCast<uint16_t>(half_); }
+ constexpr bool Less(Half r) const
+ { return half_ < r.half_; }
+ constexpr bool LessEqual(Half r) const
+ { return half_ <= r.half_; }
+ constexpr bool Equal(Half r) const
+ { return half_ == r.half_; }
+ constexpr bool IsNaN() const
+ { return half_ != half_; }
+
+ private:
+ __fp16 half_;
};
+#endif // __ARM_FP16_FORMAT_IEEE
/// @} cpp_core_types
} // namespace np
+
#endif // NUMPY_CORE_SRC_COMMON_HALF_HPP
diff --git a/numpy/core/src/common/half_private.hpp b/numpy/core/src/common/half_private.hpp
new file mode 100644
index 000000000..7a64eb397
--- /dev/null
+++ b/numpy/core/src/common/half_private.hpp
@@ -0,0 +1,330 @@
+#ifndef NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP
+#define NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP
+
+#include "npstd.hpp"
+#include "float_status.hpp"
+
+/*
+ * The following functions that emulating float/double/half conversions
+ * are copied from npymath without any changes to its functionalty.
+ */
+namespace np { namespace half_private {
+
+template<bool gen_overflow=true, bool gen_underflow=true, bool round_even=true>
+inline uint16_t FromFloatBits(uint32_t f)
+{
+ uint32_t f_exp, f_sig;
+ uint16_t h_sgn, h_exp, h_sig;
+
+ h_sgn = (uint16_t) ((f&0x80000000u) >> 16);
+ f_exp = (f&0x7f800000u);
+
+ /* Exponent overflow/NaN converts to signed inf/NaN */
+ if (f_exp >= 0x47800000u) {
+ if (f_exp == 0x7f800000u) {
+ /* Inf or NaN */
+ f_sig = (f&0x007fffffu);
+ if (f_sig != 0) {
+ /* NaN - propagate the flag in the significand... */
+ uint16_t ret = (uint16_t) (0x7c00u + (f_sig >> 13));
+ /* ...but make sure it stays a NaN */
+ if (ret == 0x7c00u) {
+ ret++;
+ }
+ return h_sgn + ret;
+ } else {
+ /* signed inf */
+ return (uint16_t) (h_sgn + 0x7c00u);
+ }
+ } else {
+ if constexpr (gen_overflow) {
+ /* overflow to signed inf */
+ FloatStatus::RaiseOverflow();
+ }
+ return (uint16_t) (h_sgn + 0x7c00u);
+ }
+ }
+
+ /* Exponent underflow converts to a subnormal half or signed zero */
+ if (f_exp <= 0x38000000u) {
+ /*
+ * Signed zeros, subnormal floats, and floats with small
+ * exponents all convert to signed zero half-floats.
+ */
+ if (f_exp < 0x33000000u) {
+ if constexpr (gen_underflow) {
+ /* If f != 0, it underflowed to 0 */
+ if ((f&0x7fffffff) != 0) {
+ FloatStatus::RaiseUnderflow();
+ }
+ }
+ return h_sgn;
+ }
+ /* Make the subnormal significand */
+ f_exp >>= 23;
+ f_sig = (0x00800000u + (f&0x007fffffu));
+ if constexpr (gen_underflow) {
+ /* If it's not exactly represented, it underflowed */
+ if ((f_sig&(((uint32_t)1 << (126 - f_exp)) - 1)) != 0) {
+ FloatStatus::RaiseUnderflow();
+ }
+ }
+ /*
+ * Usually the significand is shifted by 13. For subnormals an
+ * additional shift needs to occur. This shift is one for the largest
+ * exponent giving a subnormal `f_exp = 0x38000000 >> 23 = 112`, which
+ * offsets the new first bit. At most the shift can be 1+10 bits.
+ */
+ f_sig >>= (113 - f_exp);
+ /* Handle rounding by adding 1 to the bit beyond half precision */
+ if constexpr (round_even) {
+ /*
+ * If the last bit in the half significand is 0 (already even), and
+ * the remaining bit pattern is 1000...0, then we do not add one
+ * to the bit after the half significand. However, the (113 - f_exp)
+ * shift can lose up to 11 bits, so the || checks them in the original.
+ * In all other cases, we can just add one.
+ */
+ if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) {
+ f_sig += 0x00001000u;
+ }
+ }
+ else {
+ f_sig += 0x00001000u;
+ }
+ h_sig = (uint16_t) (f_sig >> 13);
+ /*
+ * If the rounding causes a bit to spill into h_exp, it will
+ * increment h_exp from zero to one and h_sig will be zero.
+ * This is the correct result.
+ */
+ return (uint16_t) (h_sgn + h_sig);
+ }
+
+ /* Regular case with no overflow or underflow */
+ h_exp = (uint16_t) ((f_exp - 0x38000000u) >> 13);
+ /* Handle rounding by adding 1 to the bit beyond half precision */
+ f_sig = (f&0x007fffffu);
+ if constexpr (round_even) {
+ /*
+ * If the last bit in the half significand is 0 (already even), and
+ * the remaining bit pattern is 1000...0, then we do not add one
+ * to the bit after the half significand. In all other cases, we do.
+ */
+ if ((f_sig&0x00003fffu) != 0x00001000u) {
+ f_sig += 0x00001000u;
+ }
+ }
+ else {
+ f_sig += 0x00001000u;
+ }
+ h_sig = (uint16_t) (f_sig >> 13);
+ /*
+ * If the rounding causes a bit to spill into h_exp, it will
+ * increment h_exp by one and h_sig will be zero. This is the
+ * correct result. h_exp may increment to 15, at greatest, in
+ * which case the result overflows to a signed inf.
+ */
+ if constexpr (gen_overflow) {
+ h_sig += h_exp;
+ if (h_sig == 0x7c00u) {
+ FloatStatus::RaiseOverflow();
+ }
+ return h_sgn + h_sig;
+ }
+ else {
+ return h_sgn + h_exp + h_sig;
+ }
+}
+
+template<bool gen_overflow=true, bool gen_underflow=true, bool round_even=true>
+inline uint16_t FromDoubleBits(uint64_t d)
+{
+ uint64_t d_exp, d_sig;
+ uint16_t h_sgn, h_exp, h_sig;
+
+ h_sgn = (d&0x8000000000000000ULL) >> 48;
+ d_exp = (d&0x7ff0000000000000ULL);
+
+ /* Exponent overflow/NaN converts to signed inf/NaN */
+ if (d_exp >= 0x40f0000000000000ULL) {
+ if (d_exp == 0x7ff0000000000000ULL) {
+ /* Inf or NaN */
+ d_sig = (d&0x000fffffffffffffULL);
+ if (d_sig != 0) {
+ /* NaN - propagate the flag in the significand... */
+ uint16_t ret = (uint16_t) (0x7c00u + (d_sig >> 42));
+ /* ...but make sure it stays a NaN */
+ if (ret == 0x7c00u) {
+ ret++;
+ }
+ return h_sgn + ret;
+ } else {
+ /* signed inf */
+ return h_sgn + 0x7c00u;
+ }
+ } else {
+ /* overflow to signed inf */
+ if constexpr (gen_overflow) {
+ FloatStatus::RaiseOverflow();
+ }
+ return h_sgn + 0x7c00u;
+ }
+ }
+
+ /* Exponent underflow converts to subnormal half or signed zero */
+ if (d_exp <= 0x3f00000000000000ULL) {
+ /*
+ * Signed zeros, subnormal floats, and floats with small
+ * exponents all convert to signed zero half-floats.
+ */
+ if (d_exp < 0x3e60000000000000ULL) {
+ if constexpr (gen_underflow) {
+ /* If d != 0, it underflowed to 0 */
+ if ((d&0x7fffffffffffffffULL) != 0) {
+ FloatStatus::RaiseUnderflow();
+ }
+ }
+ return h_sgn;
+ }
+ /* Make the subnormal significand */
+ d_exp >>= 52;
+ d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL));
+ if constexpr (gen_underflow) {
+ /* If it's not exactly represented, it underflowed */
+ if ((d_sig&(((uint64_t)1 << (1051 - d_exp)) - 1)) != 0) {
+ FloatStatus::RaiseUnderflow();
+ }
+ }
+ /*
+ * Unlike floats, doubles have enough room to shift left to align
+ * the subnormal significand leading to no loss of the last bits.
+ * The smallest possible exponent giving a subnormal is:
+ * `d_exp = 0x3e60000000000000 >> 52 = 998`. All larger subnormals are
+ * shifted with respect to it. This adds a shift of 10+1 bits the final
+ * right shift when comparing it to the one in the normal branch.
+ */
+ assert(d_exp - 998 >= 0);
+ d_sig <<= (d_exp - 998);
+ /* Handle rounding by adding 1 to the bit beyond half precision */
+ if constexpr (round_even) {
+ /*
+ * If the last bit in the half significand is 0 (already even), and
+ * the remaining bit pattern is 1000...0, then we do not add one
+ * to the bit after the half significand. In all other cases, we do.
+ */
+ if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) {
+ d_sig += 0x0010000000000000ULL;
+ }
+ }
+ else {
+ d_sig += 0x0010000000000000ULL;
+ }
+ h_sig = (uint16_t) (d_sig >> 53);
+ /*
+ * If the rounding causes a bit to spill into h_exp, it will
+ * increment h_exp from zero to one and h_sig will be zero.
+ * This is the correct result.
+ */
+ return h_sgn + h_sig;
+ }
+
+ /* Regular case with no overflow or underflow */
+ h_exp = (uint16_t) ((d_exp - 0x3f00000000000000ULL) >> 42);
+ /* Handle rounding by adding 1 to the bit beyond half precision */
+ d_sig = (d&0x000fffffffffffffULL);
+ if constexpr (round_even) {
+ /*
+ * If the last bit in the half significand is 0 (already even), and
+ * the remaining bit pattern is 1000...0, then we do not add one
+ * to the bit after the half significand. In all other cases, we do.
+ */
+ if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
+ d_sig += 0x0000020000000000ULL;
+ }
+ }
+ else {
+ d_sig += 0x0000020000000000ULL;
+ }
+ h_sig = (uint16_t) (d_sig >> 42);
+
+ /*
+ * If the rounding causes a bit to spill into h_exp, it will
+ * increment h_exp by one and h_sig will be zero. This is the
+ * correct result. h_exp may increment to 15, at greatest, in
+ * which case the result overflows to a signed inf.
+ */
+ if constexpr (gen_overflow) {
+ h_sig += h_exp;
+ if (h_sig == 0x7c00u) {
+ FloatStatus::RaiseOverflow();
+ }
+ return h_sgn + h_sig;
+ }
+ else {
+ return h_sgn + h_exp + h_sig;
+ }
+}
+
+constexpr uint32_t ToFloatBits(uint16_t h)
+{
+ uint16_t h_exp = (h&0x7c00u);
+ uint32_t f_sgn = ((uint32_t)h&0x8000u) << 16;
+ switch (h_exp) {
+ case 0x0000u: { // 0 or subnormal
+ uint16_t h_sig = (h&0x03ffu);
+ // Signed zero
+ if (h_sig == 0) {
+ return f_sgn;
+ }
+ // Subnormal
+ h_sig <<= 1;
+ while ((h_sig&0x0400u) == 0) {
+ h_sig <<= 1;
+ h_exp++;
+ }
+ uint32_t f_exp = ((uint32_t)(127 - 15 - h_exp)) << 23;
+ uint32_t f_sig = ((uint32_t)(h_sig&0x03ffu)) << 13;
+ return f_sgn + f_exp + f_sig;
+ }
+ case 0x7c00u: // inf or NaN
+ // All-ones exponent and a copy of the significand
+ return f_sgn + 0x7f800000u + (((uint32_t)(h&0x03ffu)) << 13);
+ default: // normalized
+ // Just need to adjust the exponent and shift
+ return f_sgn + (((uint32_t)(h&0x7fffu) + 0x1c000u) << 13);
+ }
+}
+
+constexpr uint64_t ToDoubleBits(uint16_t h)
+{
+ uint16_t h_exp = (h&0x7c00u);
+ uint64_t d_sgn = ((uint64_t)h&0x8000u) << 48;
+ switch (h_exp) {
+ case 0x0000u: { // 0 or subnormal
+ uint16_t h_sig = (h&0x03ffu);
+ // Signed zero
+ if (h_sig == 0) {
+ return d_sgn;
+ }
+ // Subnormal
+ h_sig <<= 1;
+ while ((h_sig&0x0400u) == 0) {
+ h_sig <<= 1;
+ h_exp++;
+ }
+ uint64_t d_exp = ((uint64_t)(1023 - 15 - h_exp)) << 52;
+ uint64_t d_sig = ((uint64_t)(h_sig&0x03ffu)) << 42;
+ return d_sgn + d_exp + d_sig;
+ }
+ case 0x7c00u: // inf or NaN
+ // All-ones exponent and a copy of the significand
+ return d_sgn + 0x7ff0000000000000ULL + (((uint64_t)(h&0x03ffu)) << 42);
+ default: // normalized
+ // Just need to adjust the exponent and shift
+ return d_sgn + (((uint64_t)(h&0x7fffu) + 0xfc000u) << 42);
+ }
+}
+
+}} // namespace np::half_private
+#endif // NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP
diff --git a/numpy/core/src/common/npdef.hpp b/numpy/core/src/common/npdef.hpp
new file mode 100644
index 000000000..56a0df52e
--- /dev/null
+++ b/numpy/core/src/common/npdef.hpp
@@ -0,0 +1,28 @@
+#ifndef NUMPY_CORE_SRC_COMMON_NPDEF_HPP
+#define NUMPY_CORE_SRC_COMMON_NPDEF_HPP
+
+#if !defined(__cplusplus) || __cplusplus < 201703L
+ #error "NumPy requires a compiler with at least C++17 enabled"
+#endif
+
+/// @addtogroup cpp_core_defs
+/// @{
+
+/// Whether compiler supports C++20
+#if __cplusplus > 202002L
+ #define NP_HAS_CPP20 1
+#else
+ #define NP_HAS_CPP20 0
+#endif
+
+/// Wraps `__has_builtin`
+#if defined(__has_builtin)
+ #define NP_HAS_BUILTIN(INTRIN) __has_builtin(INTRIN)
+#else
+ #define NP_HAS_BUILTIN(INTRIN) 0
+#endif
+
+/// @} cpp_core_defs
+
+#endif // NUMPY_CORE_SRC_COMMON_NPDEF_HPP
+
diff --git a/numpy/core/src/common/npstd.hpp b/numpy/core/src/common/npstd.hpp
index 71993bd7c..ca664229a 100644
--- a/numpy/core/src/common/npstd.hpp
+++ b/numpy/core/src/common/npstd.hpp
@@ -31,6 +31,8 @@ using std::int64_t;
using std::uintptr_t;
using std::intptr_t;
using std::complex;
+using std::uint_fast16_t;
+using std::uint_fast32_t;
/** Guard for long double.
*
diff --git a/numpy/core/src/common/utils.hpp b/numpy/core/src/common/utils.hpp
new file mode 100644
index 000000000..f847cab44
--- /dev/null
+++ b/numpy/core/src/common/utils.hpp
@@ -0,0 +1,51 @@
+#ifndef NUMPY_CORE_SRC_COMMON_UTILS_HPP
+#define NUMPY_CORE_SRC_COMMON_UTILS_HPP
+
+#include "npdef.hpp"
+
+#if NP_HAS_CPP20
+ #include <bit>
+#endif
+
+#include <type_traits>
+#include <string.h>
+
+namespace np {
+
+/** Create a value of type `To` from the bits of `from`.
+ *
+ * similar to `std::bit_cast` but compatible with C++17,
+ * should perform similar to `*reinterpret_cast<To*>(&from)`
+ * or through punning without expecting any undefined behaviors.
+ */
+template<typename To, typename From>
+#if NP_HAS_BUILTIN(__builtin_bit_cast) || NP_HAS_CPP20
+[[nodiscard]] constexpr
+#else
+inline
+#endif
+To BitCast(const From &from) noexcept
+{
+ static_assert(
+ sizeof(To) == sizeof(From),
+ "both data types must have the same size");
+
+ static_assert(
+ std::is_trivially_copyable_v<To> &&
+ std::is_trivially_copyable_v<From>,
+ "both data types must be trivially copyable");
+
+#if NP_HAS_CPP20
+ return std::bit_cast<To>(from);
+#elif NP_HAS_BUILTIN(__builtin_bit_cast)
+ return __builtin_bit_cast(To, from);
+#else
+ To to;
+ memcpy(&to, &from, sizeof(from));
+ return to;
+#endif
+}
+
+} // namespace np
+#endif // NUMPY_CORE_SRC_COMMON_UTILS_HPP
+
diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c
index d55a5752b..ba9118052 100644
--- a/numpy/core/src/multiarray/array_coercion.c
+++ b/numpy/core/src/multiarray/array_coercion.c
@@ -1025,53 +1025,6 @@ PyArray_DiscoverDTypeAndShape_Recursive(
Py_DECREF(arr);
arr = NULL;
}
- else if (curr_dims > 0 && curr_dims != max_dims) {
- /*
- * Deprecated 2020-12-09, NumPy 1.20
- *
- * See https://github.com/numpy/numpy/issues/17965
- * Shapely had objects which are not sequences but did export
- * the array-interface (and so are arguably array-like).
- * Previously numpy would not use array-like information during
- * shape discovery, so that it ended up acting as if this was
- * an (unknown) scalar but with the specified dtype.
- * Thus we ignore "scalars" here, as the value stored in the
- * array should be acceptable.
- */
- if (PyArray_NDIM(arr) > 0 && NPY_UNLIKELY(!PySequence_Check(obj))) {
- if (PyErr_WarnFormat(PyExc_FutureWarning, 1,
- "The input object of type '%s' is an array-like "
- "implementing one of the corresponding protocols "
- "(`__array__`, `__array_interface__` or "
- "`__array_struct__`); but not a sequence (or 0-D). "
- "In the future, this object will be coerced as if it "
- "was first converted using `np.array(obj)`. "
- "To retain the old behaviour, you have to either "
- "modify the type '%s', or assign to an empty array "
- "created with `np.empty(correct_shape, dtype=object)`.",
- Py_TYPE(obj)->tp_name, Py_TYPE(obj)->tp_name) < 0) {
- Py_DECREF(arr);
- return -1;
- }
- /*
- * Strangely enough, even though we threw away the result here,
- * we did use it during descriptor discovery, so promote it:
- */
- if (update_shape(curr_dims, &max_dims, out_shape,
- 0, NULL, NPY_FALSE, flags) < 0) {
- *flags |= FOUND_RAGGED_ARRAY;
- Py_DECREF(arr);
- return max_dims;
- }
- if (!(*flags & DESCRIPTOR_WAS_SET) && handle_promotion(
- out_descr, PyArray_DESCR(arr), fixed_DType, flags) < 0) {
- Py_DECREF(arr);
- return -1;
- }
- Py_DECREF(arr);
- return max_dims;
- }
- }
}
if (arr != NULL) {
/*
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index da8d23a26..573d0d606 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -128,24 +128,6 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype)
}
-NPY_NO_EXPORT int
-_zerofill(PyArrayObject *ret)
-{
- if (PyDataType_REFCHK(PyArray_DESCR(ret))) {
- PyObject *zero = PyLong_FromLong(0);
- PyArray_FillObjectArray(ret, zero);
- Py_DECREF(zero);
- if (PyErr_Occurred()) {
- return -1;
- }
- }
- else {
- npy_intp n = PyArray_NBYTES(ret);
- memset(PyArray_DATA(ret), 0, n);
- }
- return 0;
-}
-
NPY_NO_EXPORT npy_bool
_IsWriteable(PyArrayObject *ap)
{
@@ -459,3 +441,32 @@ new_array_for_sum(PyArrayObject *ap1, PyArrayObject *ap2, PyArrayObject* out,
}
}
+NPY_NO_EXPORT int
+check_is_convertible_to_scalar(PyArrayObject *v)
+{
+ if (PyArray_NDIM(v) == 0) {
+ return 0;
+ }
+
+ /* Remove this if-else block when the deprecation expires */
+ if (PyArray_SIZE(v) == 1) {
+ /* Numpy 1.25.0, 2023-01-02 */
+ if (DEPRECATE(
+ "Conversion of an array with ndim > 0 to a scalar "
+ "is deprecated, and will error in future. "
+ "Ensure you extract a single element from your array "
+ "before performing this operation. "
+ "(Deprecated NumPy 1.25.)") < 0) {
+ return -1;
+ }
+ return 0;
+ } else {
+ PyErr_SetString(PyExc_TypeError,
+ "only length-1 arrays can be converted to Python scalars");
+ return -1;
+ }
+
+ PyErr_SetString(PyExc_TypeError,
+ "only 0-dimensional arrays can be converted to Python scalars");
+ return -1;
+}
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index 4e067b22c..cb9fadc4e 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -55,9 +55,6 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims,
NPY_NO_EXPORT PyArray_Descr *
_array_find_python_scalar_type(PyObject *op);
-NPY_NO_EXPORT int
-_zerofill(PyArrayObject *ret);
-
NPY_NO_EXPORT npy_bool
_IsWriteable(PyArrayObject *ap);
@@ -335,6 +332,13 @@ PyArray_TupleFromItems(int n, PyObject *const *items, int make_null_none)
return tuple;
}
+/*
+ * Returns 0 if the array has rank 0, -1 otherwise. Prints a deprecation
+ * warning for arrays of _size_ 1.
+ */
+NPY_NO_EXPORT int
+check_is_convertible_to_scalar(PyArrayObject *v);
+
#include "ucsnarrow.h"
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index 53db5c577..de1cd075d 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -3913,21 +3913,6 @@ PyArray_InitializeObjectToObjectCast(void)
}
-static int
-PyArray_SetClearFunctions(void)
-{
- PyArray_DTypeMeta *Object = PyArray_DTypeFromTypeNum(NPY_OBJECT);
- NPY_DT_SLOTS(Object)->get_clear_loop = &npy_get_clear_object_strided_loop;
- Py_DECREF(Object); /* use borrowed */
-
- PyArray_DTypeMeta *Void = PyArray_DTypeFromTypeNum(NPY_VOID);
- NPY_DT_SLOTS(Void)->get_clear_loop = &npy_get_clear_void_and_legacy_user_dtype_loop;
- Py_DECREF(Void); /* use borrowed */
- return 0;
-}
-
-
-
NPY_NO_EXPORT int
PyArray_InitializeCasts()
{
@@ -3947,8 +3932,5 @@ PyArray_InitializeCasts()
if (PyArray_InitializeDatetimeCasts() < 0) {
return -1;
}
- if (PyArray_SetClearFunctions() < 0) {
- return -1;
- }
return 0;
}
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index a6335c783..79a1905a7 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -715,6 +715,11 @@ PyArray_NewFromDescr_int(
fa->base = (PyObject *)NULL;
fa->weakreflist = (PyObject *)NULL;
+ /* needed for zero-filling logic below, defined and initialized up here
+ so cleanup logic can go in the fail block */
+ NPY_traverse_info fill_zero_info;
+ NPY_traverse_info_init(&fill_zero_info);
+
if (nd > 0) {
fa->dimensions = npy_alloc_cache_dim(2 * nd);
if (fa->dimensions == NULL) {
@@ -784,6 +789,31 @@ PyArray_NewFromDescr_int(
if (data == NULL) {
+ /* float errors do not matter and we do not release GIL */
+ NPY_ARRAYMETHOD_FLAGS zero_flags;
+ get_traverse_loop_function *get_fill_zero_loop =
+ NPY_DT_SLOTS(NPY_DTYPE(descr))->get_fill_zero_loop;
+ if (get_fill_zero_loop != NULL) {
+ if (get_fill_zero_loop(
+ NULL, descr, 1, descr->elsize, &(fill_zero_info.func),
+ &(fill_zero_info.auxdata), &zero_flags) < 0) {
+ goto fail;
+ }
+ }
+
+ /*
+ * We always want a zero-filled array allocated with calloc if
+ * NPY_NEEDS_INIT is set on the dtype, for safety. We also want a
+ * zero-filled array if zeroed is set and the zero-filling loop isn't
+ * defined, for better performance.
+ *
+ * If the zero-filling loop is defined and zeroed is set, allocate
+ * with malloc and let the zero-filling loop fill the array buffer
+ * with valid zero values for the dtype.
+ */
+ int use_calloc = (PyDataType_FLAGCHK(descr, NPY_NEEDS_INIT) ||
+ (zeroed && (fill_zero_info.func == NULL)));
+
/* Store the handler in case the default is modified */
fa->mem_handler = PyDataMem_GetHandler();
if (fa->mem_handler == NULL) {
@@ -801,11 +831,8 @@ PyArray_NewFromDescr_int(
fa->strides[i] = 0;
}
}
- /*
- * It is bad to have uninitialized OBJECT pointers
- * which could also be sub-fields of a VOID array
- */
- if (zeroed || PyDataType_FLAGCHK(descr, NPY_NEEDS_INIT)) {
+
+ if (use_calloc) {
data = PyDataMem_UserNEW_ZEROED(nbytes, 1, fa->mem_handler);
}
else {
@@ -816,6 +843,18 @@ PyArray_NewFromDescr_int(
goto fail;
}
+ /*
+ * If the array needs special dtype-specific zero-filling logic, do that
+ */
+ if (NPY_UNLIKELY(zeroed && (fill_zero_info.func != NULL))) {
+ npy_intp size = PyArray_MultiplyList(fa->dimensions, fa->nd);
+ if (fill_zero_info.func(
+ NULL, descr, data, size, descr->elsize,
+ fill_zero_info.auxdata) < 0) {
+ goto fail;
+ }
+ }
+
fa->flags |= NPY_ARRAY_OWNDATA;
}
else {
@@ -910,9 +949,11 @@ PyArray_NewFromDescr_int(
}
}
}
+ NPY_traverse_info_xfree(&fill_zero_info);
return (PyObject *)fa;
fail:
+ NPY_traverse_info_xfree(&fill_zero_info);
Py_XDECREF(fa->mem_handler);
Py_DECREF(fa);
return NULL;
@@ -3017,17 +3058,7 @@ PyArray_Zeros(int nd, npy_intp const *dims, PyArray_Descr *type, int is_f_order)
return NULL;
}
- /* handle objects */
- if (PyDataType_REFCHK(PyArray_DESCR(ret))) {
- if (_zerofill(ret) < 0) {
- Py_DECREF(ret);
- return NULL;
- }
- }
-
-
return (PyObject *)ret;
-
}
/*NUMPY_API
diff --git a/numpy/core/src/multiarray/dtype_traversal.c b/numpy/core/src/multiarray/dtype_traversal.c
index cefa7d6e1..769c2e015 100644
--- a/numpy/core/src/multiarray/dtype_traversal.c
+++ b/numpy/core/src/multiarray/dtype_traversal.c
@@ -24,7 +24,7 @@
#include "alloc.h"
#include "array_method.h"
#include "dtypemeta.h"
-
+#include "refcount.h"
#include "dtype_traversal.h"
@@ -124,6 +124,39 @@ npy_get_clear_object_strided_loop(
}
+/**************** Python Object zero fill *********************/
+
+static int
+fill_zero_object_strided_loop(
+ void *NPY_UNUSED(traverse_context), PyArray_Descr *NPY_UNUSED(descr),
+ char *data, npy_intp size, npy_intp stride,
+ NpyAuxData *NPY_UNUSED(auxdata))
+{
+ PyObject *zero = PyLong_FromLong(0);
+ while (size--) {
+ Py_INCREF(zero);
+ // assumes `data` doesn't have a pre-existing object inside it
+ memcpy(data, &zero, sizeof(zero));
+ data += stride;
+ }
+ Py_DECREF(zero);
+ return 0;
+}
+
+NPY_NO_EXPORT int
+npy_object_get_fill_zero_loop(void *NPY_UNUSED(traverse_context),
+ PyArray_Descr *NPY_UNUSED(descr),
+ int NPY_UNUSED(aligned),
+ npy_intp NPY_UNUSED(fixed_stride),
+ traverse_loop_function **out_loop,
+ NpyAuxData **NPY_UNUSED(out_auxdata),
+ NPY_ARRAYMETHOD_FLAGS *flags)
+{
+ *flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_NO_FLOATINGPOINT_ERRORS;
+ *out_loop = &fill_zero_object_strided_loop;
+ return 0;
+}
+
/**************** Structured DType clear funcationality ***************/
/*
@@ -408,7 +441,6 @@ clear_no_op(
return 0;
}
-
NPY_NO_EXPORT int
npy_get_clear_void_and_legacy_user_dtype_loop(
void *traverse_context, PyArray_Descr *dtype, int aligned,
@@ -472,3 +504,41 @@ npy_get_clear_void_and_legacy_user_dtype_loop(
dtype);
return -1;
}
+
+/**************** Structured DType zero fill ***************/
+
+static int
+fill_zero_void_with_objects_strided_loop(
+ void *NPY_UNUSED(traverse_context), PyArray_Descr *descr,
+ char *data, npy_intp size, npy_intp stride,
+ NpyAuxData *NPY_UNUSED(auxdata))
+{
+ PyObject *zero = PyLong_FromLong(0);
+ while (size--) {
+ _fillobject(data, zero, descr);
+ data += stride;
+ }
+ Py_DECREF(zero);
+ return 0;
+}
+
+
+NPY_NO_EXPORT int
+npy_void_get_fill_zero_loop(void *NPY_UNUSED(traverse_context),
+ PyArray_Descr *descr,
+ int NPY_UNUSED(aligned),
+ npy_intp NPY_UNUSED(fixed_stride),
+ traverse_loop_function **out_loop,
+ NpyAuxData **NPY_UNUSED(out_auxdata),
+ NPY_ARRAYMETHOD_FLAGS *flags)
+{
+ *flags = NPY_METH_NO_FLOATINGPOINT_ERRORS;
+ if (PyDataType_REFCHK(descr)) {
+ *flags |= NPY_METH_REQUIRES_PYAPI;
+ *out_loop = &fill_zero_void_with_objects_strided_loop;
+ }
+ else {
+ *out_loop = NULL;
+ }
+ return 0;
+}
diff --git a/numpy/core/src/multiarray/dtype_traversal.h b/numpy/core/src/multiarray/dtype_traversal.h
index fd060a0f0..a9c185382 100644
--- a/numpy/core/src/multiarray/dtype_traversal.h
+++ b/numpy/core/src/multiarray/dtype_traversal.h
@@ -19,6 +19,22 @@ npy_get_clear_void_and_legacy_user_dtype_loop(
traverse_loop_function **out_loop, NpyAuxData **out_traversedata,
NPY_ARRAYMETHOD_FLAGS *flags);
+/* NumPy DType zero-filling implementations */
+
+NPY_NO_EXPORT int
+npy_object_get_fill_zero_loop(
+ void *NPY_UNUSED(traverse_context), PyArray_Descr *NPY_UNUSED(descr),
+ int NPY_UNUSED(aligned), npy_intp NPY_UNUSED(fixed_stride),
+ traverse_loop_function **out_loop, NpyAuxData **NPY_UNUSED(out_auxdata),
+ NPY_ARRAYMETHOD_FLAGS *flags);
+
+NPY_NO_EXPORT int
+npy_void_get_fill_zero_loop(
+ void *NPY_UNUSED(traverse_context), PyArray_Descr *descr,
+ int NPY_UNUSED(aligned), npy_intp NPY_UNUSED(fixed_stride),
+ traverse_loop_function **out_loop, NpyAuxData **NPY_UNUSED(out_auxdata),
+ NPY_ARRAYMETHOD_FLAGS *flags);
+
/* Helper to deal with calling or nesting simple strided loops */
@@ -34,6 +50,7 @@ NPY_traverse_info_init(NPY_traverse_info *cast_info)
{
cast_info->func = NULL; /* mark as uninitialized. */
cast_info->auxdata = NULL; /* allow leaving auxdata untouched */
+ cast_info->descr = NULL; /* mark as uninitialized. */
}
@@ -45,7 +62,7 @@ NPY_traverse_info_xfree(NPY_traverse_info *traverse_info)
}
traverse_info->func = NULL;
NPY_AUXDATA_FREE(traverse_info->auxdata);
- Py_DECREF(traverse_info->descr);
+ Py_XDECREF(traverse_info->descr);
}
@@ -79,4 +96,4 @@ PyArray_GetClearFunction(
NPY_traverse_info *clear_info, NPY_ARRAYMETHOD_FLAGS *flags);
-#endif /* NUMPY_CORE_SRC_MULTIARRAY_DTYPE_TRAVERSAL_H_ */ \ No newline at end of file
+#endif /* NUMPY_CORE_SRC_MULTIARRAY_DTYPE_TRAVERSAL_H_ */
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
index f268ba2cb..f8c1b6617 100644
--- a/numpy/core/src/multiarray/dtypemeta.c
+++ b/numpy/core/src/multiarray/dtypemeta.c
@@ -20,6 +20,8 @@
#include "usertypes.h"
#include "conversion_utils.h"
#include "templ_common.h"
+#include "refcount.h"
+#include "dtype_traversal.h"
#include <assert.h>
@@ -524,6 +526,7 @@ void_common_instance(PyArray_Descr *descr1, PyArray_Descr *descr2)
return NULL;
}
+
NPY_NO_EXPORT int
python_builtins_are_known_scalar_types(
PyArray_DTypeMeta *NPY_UNUSED(cls), PyTypeObject *pytype)
@@ -855,6 +858,7 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
dt_slots->common_dtype = default_builtin_common_dtype;
dt_slots->common_instance = NULL;
dt_slots->ensure_canonical = ensure_native_byteorder;
+ dt_slots->get_fill_zero_loop = NULL;
if (PyTypeNum_ISSIGNED(dtype_class->type_num)) {
/* Convert our scalars (raise on too large unsigned and NaN, etc.) */
@@ -866,6 +870,8 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
}
else if (descr->type_num == NPY_OBJECT) {
dt_slots->common_dtype = object_common_dtype;
+ dt_slots->get_fill_zero_loop = npy_object_get_fill_zero_loop;
+ dt_slots->get_clear_loop = npy_get_clear_object_strided_loop;
}
else if (PyTypeNum_ISDATETIME(descr->type_num)) {
/* Datetimes are flexible, but were not considered previously */
@@ -887,6 +893,9 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
void_discover_descr_from_pyobject);
dt_slots->common_instance = void_common_instance;
dt_slots->ensure_canonical = void_ensure_canonical;
+ dt_slots->get_fill_zero_loop = npy_void_get_fill_zero_loop;
+ dt_slots->get_clear_loop =
+ npy_get_clear_void_and_legacy_user_dtype_loop;
}
else {
dt_slots->default_descr = string_and_unicode_default_descr;
diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h
index 3b4dbad24..6dbfd1549 100644
--- a/numpy/core/src/multiarray/dtypemeta.h
+++ b/numpy/core/src/multiarray/dtypemeta.h
@@ -42,6 +42,23 @@ typedef struct {
*/
get_traverse_loop_function *get_clear_loop;
/*
+ Either NULL or a function that sets a function pointer to a traversal
+ loop that fills an array with zero values appropriate for the dtype. If
+ get_fill_zero_loop is undefined or the function pointer set by it is
+ NULL, the array buffer is allocated with calloc. If this function is
+ defined and it sets a non-NULL function pointer, the array buffer is
+ allocated with malloc and the zero-filling loop function pointer is
+ called to fill the buffer. For the best performance, avoid using this
+ function if a zero-filled array buffer allocated with calloc makes sense
+ for the dtype.
+
+ Note that this is currently used only for zero-filling a newly allocated
+ array. While it can be used to zero-fill an already-filled buffer, that
+ will not work correctly for arrays holding references. If you need to do
+ that, clear the array first.
+ */
+ get_traverse_loop_function *get_fill_zero_loop;
+ /*
* The casting implementation (ArrayMethod) to convert between two
* instances of this DType, stored explicitly for fast access:
*/
@@ -63,7 +80,7 @@ typedef struct {
// This must be updated if new slots before within_dtype_castingimpl
// are added
-#define NPY_NUM_DTYPE_SLOTS 9
+#define NPY_NUM_DTYPE_SLOTS 10
#define NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS 22
#define NPY_DT_MAX_ARRFUNCS_SLOT \
NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS + _NPY_DT_ARRFUNCS_OFFSET
diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c
index ab35548ed..d019acbb5 100644
--- a/numpy/core/src/multiarray/getset.c
+++ b/numpy/core/src/multiarray/getset.c
@@ -797,20 +797,17 @@ array_imag_get(PyArrayObject *self, void *NPY_UNUSED(ignored))
}
else {
Py_INCREF(PyArray_DESCR(self));
- ret = (PyArrayObject *)PyArray_NewFromDescr(Py_TYPE(self),
- PyArray_DESCR(self),
- PyArray_NDIM(self),
- PyArray_DIMS(self),
- NULL, NULL,
- PyArray_ISFORTRAN(self),
- (PyObject *)self);
+ ret = (PyArrayObject *)PyArray_NewFromDescr_int(
+ Py_TYPE(self),
+ PyArray_DESCR(self),
+ PyArray_NDIM(self),
+ PyArray_DIMS(self),
+ NULL, NULL,
+ PyArray_ISFORTRAN(self),
+ (PyObject *)self, NULL, 1, 0);
if (ret == NULL) {
return NULL;
}
- if (_zerofill(ret) < 0) {
- Py_DECREF(ret);
- return NULL;
- }
PyArray_CLEARFLAGS(ret, NPY_ARRAY_WRITEABLE);
}
return (PyObject *) ret;
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 93b290020..3b2e72820 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -2804,9 +2804,7 @@ array_complex(PyArrayObject *self, PyObject *NPY_UNUSED(args))
PyArray_Descr *dtype;
PyObject *c;
- if (PyArray_SIZE(self) != 1) {
- PyErr_SetString(PyExc_TypeError,
- "only length-1 arrays can be converted to Python scalars");
+ if (check_is_convertible_to_scalar(self) < 0) {
return NULL;
}
diff --git a/numpy/core/src/multiarray/number.c b/numpy/core/src/multiarray/number.c
index c208fb203..35e4af79c 100644
--- a/numpy/core/src/multiarray/number.c
+++ b/numpy/core/src/multiarray/number.c
@@ -869,13 +869,11 @@ array_scalar_forward(PyArrayObject *v,
PyObject *(*builtin_func)(PyObject *),
const char *where)
{
- PyObject *scalar;
- if (PyArray_SIZE(v) != 1) {
- PyErr_SetString(PyExc_TypeError, "only size-1 arrays can be"\
- " converted to Python scalars");
+ if (check_is_convertible_to_scalar(v) < 0) {
return NULL;
}
+ PyObject *scalar;
scalar = PyArray_GETITEM(v, PyArray_DATA(v));
if (scalar == NULL) {
return NULL;
diff --git a/numpy/core/src/multiarray/refcount.c b/numpy/core/src/multiarray/refcount.c
index 20527f7af..d200957c3 100644
--- a/numpy/core/src/multiarray/refcount.c
+++ b/numpy/core/src/multiarray/refcount.c
@@ -17,15 +17,12 @@
#include "numpy/arrayscalars.h"
#include "iterators.h"
#include "dtypemeta.h"
+#include "refcount.h"
#include "npy_config.h"
#include "npy_pycompat.h"
-static void
-_fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype);
-
-
/*
* Helper function to clear a strided memory (normally or always contiguous)
* from all Python (or other) references. The function does nothing if the
@@ -395,7 +392,7 @@ PyArray_FillObjectArray(PyArrayObject *arr, PyObject *obj)
}
}
-static void
+NPY_NO_EXPORT void
_fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype)
{
if (!PyDataType_FLAGCHK(dtype, NPY_ITEM_REFCOUNT)) {
diff --git a/numpy/core/src/multiarray/refcount.h b/numpy/core/src/multiarray/refcount.h
index 16d34e292..7f39b9ca4 100644
--- a/numpy/core/src/multiarray/refcount.h
+++ b/numpy/core/src/multiarray/refcount.h
@@ -24,4 +24,7 @@ PyArray_XDECREF(PyArrayObject *mp);
NPY_NO_EXPORT void
PyArray_FillObjectArray(PyArrayObject *arr, PyObject *obj);
+NPY_NO_EXPORT void
+_fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype);
+
#endif /* NUMPY_CORE_SRC_MULTIARRAY_REFCOUNT_H_ */
diff --git a/numpy/core/src/npymath/halffloat.c b/numpy/core/src/npymath/halffloat.c
deleted file mode 100644
index 51948c736..000000000
--- a/numpy/core/src/npymath/halffloat.c
+++ /dev/null
@@ -1,555 +0,0 @@
-#define NPY_NO_DEPRECATED_API NPY_API_VERSION
-
-#include "numpy/halffloat.h"
-
-/*
- * This chooses between 'ties to even' and 'ties away from zero'.
- */
-#define NPY_HALF_ROUND_TIES_TO_EVEN 1
-/*
- * If these are 1, the conversions try to trigger underflow,
- * overflow, and invalid exceptions in the FP system when needed.
- */
-#define NPY_HALF_GENERATE_OVERFLOW 1
-#define NPY_HALF_GENERATE_UNDERFLOW 1
-#define NPY_HALF_GENERATE_INVALID 1
-
-/*
- ********************************************************************
- * HALF-PRECISION ROUTINES *
- ********************************************************************
- */
-
-float npy_half_to_float(npy_half h)
-{
- union { float ret; npy_uint32 retbits; } conv;
- conv.retbits = npy_halfbits_to_floatbits(h);
- return conv.ret;
-}
-
-double npy_half_to_double(npy_half h)
-{
- union { double ret; npy_uint64 retbits; } conv;
- conv.retbits = npy_halfbits_to_doublebits(h);
- return conv.ret;
-}
-
-npy_half npy_float_to_half(float f)
-{
- union { float f; npy_uint32 fbits; } conv;
- conv.f = f;
- return npy_floatbits_to_halfbits(conv.fbits);
-}
-
-npy_half npy_double_to_half(double d)
-{
- union { double d; npy_uint64 dbits; } conv;
- conv.d = d;
- return npy_doublebits_to_halfbits(conv.dbits);
-}
-
-int npy_half_iszero(npy_half h)
-{
- return (h&0x7fff) == 0;
-}
-
-int npy_half_isnan(npy_half h)
-{
- return ((h&0x7c00u) == 0x7c00u) && ((h&0x03ffu) != 0x0000u);
-}
-
-int npy_half_isinf(npy_half h)
-{
- return ((h&0x7fffu) == 0x7c00u);
-}
-
-int npy_half_isfinite(npy_half h)
-{
- return ((h&0x7c00u) != 0x7c00u);
-}
-
-int npy_half_signbit(npy_half h)
-{
- return (h&0x8000u) != 0;
-}
-
-npy_half npy_half_spacing(npy_half h)
-{
- npy_half ret;
- npy_uint16 h_exp = h&0x7c00u;
- npy_uint16 h_sig = h&0x03ffu;
- if (h_exp == 0x7c00u) {
-#if NPY_HALF_GENERATE_INVALID
- npy_set_floatstatus_invalid();
-#endif
- ret = NPY_HALF_NAN;
- } else if (h == 0x7bffu) {
-#if NPY_HALF_GENERATE_OVERFLOW
- npy_set_floatstatus_overflow();
-#endif
- ret = NPY_HALF_PINF;
- } else if ((h&0x8000u) && h_sig == 0) { /* Negative boundary case */
- if (h_exp > 0x2c00u) { /* If result is normalized */
- ret = h_exp - 0x2c00u;
- } else if(h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */
- ret = 1 << ((h_exp >> 10) - 2);
- } else {
- ret = 0x0001u; /* Smallest subnormal half */
- }
- } else if (h_exp > 0x2800u) { /* If result is still normalized */
- ret = h_exp - 0x2800u;
- } else if (h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */
- ret = 1 << ((h_exp >> 10) - 1);
- } else {
- ret = 0x0001u;
- }
-
- return ret;
-}
-
-npy_half npy_half_copysign(npy_half x, npy_half y)
-{
- return (x&0x7fffu) | (y&0x8000u);
-}
-
-npy_half npy_half_nextafter(npy_half x, npy_half y)
-{
- npy_half ret;
-
- if (npy_half_isnan(x) || npy_half_isnan(y)) {
- ret = NPY_HALF_NAN;
- } else if (npy_half_eq_nonan(x, y)) {
- ret = x;
- } else if (npy_half_iszero(x)) {
- ret = (y&0x8000u) + 1; /* Smallest subnormal half */
- } else if (!(x&0x8000u)) { /* x > 0 */
- if ((npy_int16)x > (npy_int16)y) { /* x > y */
- ret = x-1;
- } else {
- ret = x+1;
- }
- } else {
- if (!(y&0x8000u) || (x&0x7fffu) > (y&0x7fffu)) { /* x < y */
- ret = x-1;
- } else {
- ret = x+1;
- }
- }
-#if NPY_HALF_GENERATE_OVERFLOW
- if (npy_half_isinf(ret) && npy_half_isfinite(x)) {
- npy_set_floatstatus_overflow();
- }
-#endif
-
- return ret;
-}
-
-int npy_half_eq_nonan(npy_half h1, npy_half h2)
-{
- return (h1 == h2 || ((h1 | h2) & 0x7fff) == 0);
-}
-
-int npy_half_eq(npy_half h1, npy_half h2)
-{
- /*
- * The equality cases are as follows:
- * - If either value is NaN, never equal.
- * - If the values are equal, equal.
- * - If the values are both signed zeros, equal.
- */
- return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) &&
- (h1 == h2 || ((h1 | h2) & 0x7fff) == 0);
-}
-
-int npy_half_ne(npy_half h1, npy_half h2)
-{
- return !npy_half_eq(h1, h2);
-}
-
-int npy_half_lt_nonan(npy_half h1, npy_half h2)
-{
- if (h1&0x8000u) {
- if (h2&0x8000u) {
- return (h1&0x7fffu) > (h2&0x7fffu);
- } else {
- /* Signed zeros are equal, have to check for it */
- return (h1 != 0x8000u) || (h2 != 0x0000u);
- }
- } else {
- if (h2&0x8000u) {
- return 0;
- } else {
- return (h1&0x7fffu) < (h2&0x7fffu);
- }
- }
-}
-
-int npy_half_lt(npy_half h1, npy_half h2)
-{
- return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && npy_half_lt_nonan(h1, h2);
-}
-
-int npy_half_gt(npy_half h1, npy_half h2)
-{
- return npy_half_lt(h2, h1);
-}
-
-int npy_half_le_nonan(npy_half h1, npy_half h2)
-{
- if (h1&0x8000u) {
- if (h2&0x8000u) {
- return (h1&0x7fffu) >= (h2&0x7fffu);
- } else {
- return 1;
- }
- } else {
- if (h2&0x8000u) {
- /* Signed zeros are equal, have to check for it */
- return (h1 == 0x0000u) && (h2 == 0x8000u);
- } else {
- return (h1&0x7fffu) <= (h2&0x7fffu);
- }
- }
-}
-
-int npy_half_le(npy_half h1, npy_half h2)
-{
- return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && npy_half_le_nonan(h1, h2);
-}
-
-int npy_half_ge(npy_half h1, npy_half h2)
-{
- return npy_half_le(h2, h1);
-}
-
-npy_half npy_half_divmod(npy_half h1, npy_half h2, npy_half *modulus)
-{
- float fh1 = npy_half_to_float(h1);
- float fh2 = npy_half_to_float(h2);
- float div, mod;
-
- div = npy_divmodf(fh1, fh2, &mod);
- *modulus = npy_float_to_half(mod);
- return npy_float_to_half(div);
-}
-
-
-
-/*
- ********************************************************************
- * BIT-LEVEL CONVERSIONS *
- ********************************************************************
- */
-
-npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
-{
- npy_uint32 f_exp, f_sig;
- npy_uint16 h_sgn, h_exp, h_sig;
-
- h_sgn = (npy_uint16) ((f&0x80000000u) >> 16);
- f_exp = (f&0x7f800000u);
-
- /* Exponent overflow/NaN converts to signed inf/NaN */
- if (f_exp >= 0x47800000u) {
- if (f_exp == 0x7f800000u) {
- /* Inf or NaN */
- f_sig = (f&0x007fffffu);
- if (f_sig != 0) {
- /* NaN - propagate the flag in the significand... */
- npy_uint16 ret = (npy_uint16) (0x7c00u + (f_sig >> 13));
- /* ...but make sure it stays a NaN */
- if (ret == 0x7c00u) {
- ret++;
- }
- return h_sgn + ret;
- } else {
- /* signed inf */
- return (npy_uint16) (h_sgn + 0x7c00u);
- }
- } else {
- /* overflow to signed inf */
-#if NPY_HALF_GENERATE_OVERFLOW
- npy_set_floatstatus_overflow();
-#endif
- return (npy_uint16) (h_sgn + 0x7c00u);
- }
- }
-
- /* Exponent underflow converts to a subnormal half or signed zero */
- if (f_exp <= 0x38000000u) {
- /*
- * Signed zeros, subnormal floats, and floats with small
- * exponents all convert to signed zero half-floats.
- */
- if (f_exp < 0x33000000u) {
-#if NPY_HALF_GENERATE_UNDERFLOW
- /* If f != 0, it underflowed to 0 */
- if ((f&0x7fffffff) != 0) {
- npy_set_floatstatus_underflow();
- }
-#endif
- return h_sgn;
- }
- /* Make the subnormal significand */
- f_exp >>= 23;
- f_sig = (0x00800000u + (f&0x007fffffu));
-#if NPY_HALF_GENERATE_UNDERFLOW
- /* If it's not exactly represented, it underflowed */
- if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) {
- npy_set_floatstatus_underflow();
- }
-#endif
- /*
- * Usually the significand is shifted by 13. For subnormals an
- * additional shift needs to occur. This shift is one for the largest
- * exponent giving a subnormal `f_exp = 0x38000000 >> 23 = 112`, which
- * offsets the new first bit. At most the shift can be 1+10 bits.
- */
- f_sig >>= (113 - f_exp);
- /* Handle rounding by adding 1 to the bit beyond half precision */
-#if NPY_HALF_ROUND_TIES_TO_EVEN
- /*
- * If the last bit in the half significand is 0 (already even), and
- * the remaining bit pattern is 1000...0, then we do not add one
- * to the bit after the half significand. However, the (113 - f_exp)
- * shift can lose up to 11 bits, so the || checks them in the original.
- * In all other cases, we can just add one.
- */
- if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) {
- f_sig += 0x00001000u;
- }
-#else
- f_sig += 0x00001000u;
-#endif
- h_sig = (npy_uint16) (f_sig >> 13);
- /*
- * If the rounding causes a bit to spill into h_exp, it will
- * increment h_exp from zero to one and h_sig will be zero.
- * This is the correct result.
- */
- return (npy_uint16) (h_sgn + h_sig);
- }
-
- /* Regular case with no overflow or underflow */
- h_exp = (npy_uint16) ((f_exp - 0x38000000u) >> 13);
- /* Handle rounding by adding 1 to the bit beyond half precision */
- f_sig = (f&0x007fffffu);
-#if NPY_HALF_ROUND_TIES_TO_EVEN
- /*
- * If the last bit in the half significand is 0 (already even), and
- * the remaining bit pattern is 1000...0, then we do not add one
- * to the bit after the half significand. In all other cases, we do.
- */
- if ((f_sig&0x00003fffu) != 0x00001000u) {
- f_sig += 0x00001000u;
- }
-#else
- f_sig += 0x00001000u;
-#endif
- h_sig = (npy_uint16) (f_sig >> 13);
- /*
- * If the rounding causes a bit to spill into h_exp, it will
- * increment h_exp by one and h_sig will be zero. This is the
- * correct result. h_exp may increment to 15, at greatest, in
- * which case the result overflows to a signed inf.
- */
-#if NPY_HALF_GENERATE_OVERFLOW
- h_sig += h_exp;
- if (h_sig == 0x7c00u) {
- npy_set_floatstatus_overflow();
- }
- return h_sgn + h_sig;
-#else
- return h_sgn + h_exp + h_sig;
-#endif
-}
-
-npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
-{
- npy_uint64 d_exp, d_sig;
- npy_uint16 h_sgn, h_exp, h_sig;
-
- h_sgn = (d&0x8000000000000000ULL) >> 48;
- d_exp = (d&0x7ff0000000000000ULL);
-
- /* Exponent overflow/NaN converts to signed inf/NaN */
- if (d_exp >= 0x40f0000000000000ULL) {
- if (d_exp == 0x7ff0000000000000ULL) {
- /* Inf or NaN */
- d_sig = (d&0x000fffffffffffffULL);
- if (d_sig != 0) {
- /* NaN - propagate the flag in the significand... */
- npy_uint16 ret = (npy_uint16) (0x7c00u + (d_sig >> 42));
- /* ...but make sure it stays a NaN */
- if (ret == 0x7c00u) {
- ret++;
- }
- return h_sgn + ret;
- } else {
- /* signed inf */
- return h_sgn + 0x7c00u;
- }
- } else {
- /* overflow to signed inf */
-#if NPY_HALF_GENERATE_OVERFLOW
- npy_set_floatstatus_overflow();
-#endif
- return h_sgn + 0x7c00u;
- }
- }
-
- /* Exponent underflow converts to subnormal half or signed zero */
- if (d_exp <= 0x3f00000000000000ULL) {
- /*
- * Signed zeros, subnormal floats, and floats with small
- * exponents all convert to signed zero half-floats.
- */
- if (d_exp < 0x3e60000000000000ULL) {
-#if NPY_HALF_GENERATE_UNDERFLOW
- /* If d != 0, it underflowed to 0 */
- if ((d&0x7fffffffffffffffULL) != 0) {
- npy_set_floatstatus_underflow();
- }
-#endif
- return h_sgn;
- }
- /* Make the subnormal significand */
- d_exp >>= 52;
- d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL));
-#if NPY_HALF_GENERATE_UNDERFLOW
- /* If it's not exactly represented, it underflowed */
- if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) {
- npy_set_floatstatus_underflow();
- }
-#endif
- /*
- * Unlike floats, doubles have enough room to shift left to align
- * the subnormal significand leading to no loss of the last bits.
- * The smallest possible exponent giving a subnormal is:
- * `d_exp = 0x3e60000000000000 >> 52 = 998`. All larger subnormals are
- * shifted with respect to it. This adds a shift of 10+1 bits the final
- * right shift when comparing it to the one in the normal branch.
- */
- assert(d_exp - 998 >= 0);
- d_sig <<= (d_exp - 998);
- /* Handle rounding by adding 1 to the bit beyond half precision */
-#if NPY_HALF_ROUND_TIES_TO_EVEN
- /*
- * If the last bit in the half significand is 0 (already even), and
- * the remaining bit pattern is 1000...0, then we do not add one
- * to the bit after the half significand. In all other cases, we do.
- */
- if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) {
- d_sig += 0x0010000000000000ULL;
- }
-#else
- d_sig += 0x0010000000000000ULL;
-#endif
- h_sig = (npy_uint16) (d_sig >> 53);
- /*
- * If the rounding causes a bit to spill into h_exp, it will
- * increment h_exp from zero to one and h_sig will be zero.
- * This is the correct result.
- */
- return h_sgn + h_sig;
- }
-
- /* Regular case with no overflow or underflow */
- h_exp = (npy_uint16) ((d_exp - 0x3f00000000000000ULL) >> 42);
- /* Handle rounding by adding 1 to the bit beyond half precision */
- d_sig = (d&0x000fffffffffffffULL);
-#if NPY_HALF_ROUND_TIES_TO_EVEN
- /*
- * If the last bit in the half significand is 0 (already even), and
- * the remaining bit pattern is 1000...0, then we do not add one
- * to the bit after the half significand. In all other cases, we do.
- */
- if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
- d_sig += 0x0000020000000000ULL;
- }
-#else
- d_sig += 0x0000020000000000ULL;
-#endif
- h_sig = (npy_uint16) (d_sig >> 42);
-
- /*
- * If the rounding causes a bit to spill into h_exp, it will
- * increment h_exp by one and h_sig will be zero. This is the
- * correct result. h_exp may increment to 15, at greatest, in
- * which case the result overflows to a signed inf.
- */
-#if NPY_HALF_GENERATE_OVERFLOW
- h_sig += h_exp;
- if (h_sig == 0x7c00u) {
- npy_set_floatstatus_overflow();
- }
- return h_sgn + h_sig;
-#else
- return h_sgn + h_exp + h_sig;
-#endif
-}
-
-npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
-{
- npy_uint16 h_exp, h_sig;
- npy_uint32 f_sgn, f_exp, f_sig;
-
- h_exp = (h&0x7c00u);
- f_sgn = ((npy_uint32)h&0x8000u) << 16;
- switch (h_exp) {
- case 0x0000u: /* 0 or subnormal */
- h_sig = (h&0x03ffu);
- /* Signed zero */
- if (h_sig == 0) {
- return f_sgn;
- }
- /* Subnormal */
- h_sig <<= 1;
- while ((h_sig&0x0400u) == 0) {
- h_sig <<= 1;
- h_exp++;
- }
- f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23;
- f_sig = ((npy_uint32)(h_sig&0x03ffu)) << 13;
- return f_sgn + f_exp + f_sig;
- case 0x7c00u: /* inf or NaN */
- /* All-ones exponent and a copy of the significand */
- return f_sgn + 0x7f800000u + (((npy_uint32)(h&0x03ffu)) << 13);
- default: /* normalized */
- /* Just need to adjust the exponent and shift */
- return f_sgn + (((npy_uint32)(h&0x7fffu) + 0x1c000u) << 13);
- }
-}
-
-npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
-{
- npy_uint16 h_exp, h_sig;
- npy_uint64 d_sgn, d_exp, d_sig;
-
- h_exp = (h&0x7c00u);
- d_sgn = ((npy_uint64)h&0x8000u) << 48;
- switch (h_exp) {
- case 0x0000u: /* 0 or subnormal */
- h_sig = (h&0x03ffu);
- /* Signed zero */
- if (h_sig == 0) {
- return d_sgn;
- }
- /* Subnormal */
- h_sig <<= 1;
- while ((h_sig&0x0400u) == 0) {
- h_sig <<= 1;
- h_exp++;
- }
- d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52;
- d_sig = ((npy_uint64)(h_sig&0x03ffu)) << 42;
- return d_sgn + d_exp + d_sig;
- case 0x7c00u: /* inf or NaN */
- /* All-ones exponent and a copy of the significand */
- return d_sgn + 0x7ff0000000000000ULL +
- (((npy_uint64)(h&0x03ffu)) << 42);
- default: /* normalized */
- /* Just need to adjust the exponent and shift */
- return d_sgn + (((npy_uint64)(h&0x7fffu) + 0xfc000u) << 42);
- }
-}
diff --git a/numpy/core/src/npymath/halffloat.cpp b/numpy/core/src/npymath/halffloat.cpp
new file mode 100644
index 000000000..aa582c1b9
--- /dev/null
+++ b/numpy/core/src/npymath/halffloat.cpp
@@ -0,0 +1,238 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+/*
+ * If these are 1, the conversions try to trigger underflow,
+ * overflow, and invalid exceptions in the FP system when needed.
+ */
+#define NPY_HALF_GENERATE_OVERFLOW 1
+#define NPY_HALF_GENERATE_INVALID 1
+
+#include "numpy/halffloat.h"
+
+#include "common.hpp"
+/*
+ ********************************************************************
+ * HALF-PRECISION ROUTINES *
+ ********************************************************************
+ */
+using namespace np;
+
+float npy_half_to_float(npy_half h)
+{
+ return static_cast<float>(Half::FromBits(h));
+}
+
+double npy_half_to_double(npy_half h)
+{
+ return static_cast<double>(Half::FromBits(h));
+}
+
+npy_half npy_float_to_half(float f)
+{
+ return Half(f).Bits();
+}
+
+npy_half npy_double_to_half(double d)
+{
+ return Half(d).Bits();
+}
+
+int npy_half_iszero(npy_half h)
+{
+ return (h&0x7fff) == 0;
+}
+
+int npy_half_isnan(npy_half h)
+{
+ return Half::FromBits(h).IsNaN();
+}
+
+int npy_half_isinf(npy_half h)
+{
+ return ((h&0x7fffu) == 0x7c00u);
+}
+
+int npy_half_isfinite(npy_half h)
+{
+ return ((h&0x7c00u) != 0x7c00u);
+}
+
+int npy_half_signbit(npy_half h)
+{
+ return (h&0x8000u) != 0;
+}
+
+npy_half npy_half_spacing(npy_half h)
+{
+ npy_half ret;
+ npy_uint16 h_exp = h&0x7c00u;
+ npy_uint16 h_sig = h&0x03ffu;
+ if (h_exp == 0x7c00u) {
+#if NPY_HALF_GENERATE_INVALID
+ npy_set_floatstatus_invalid();
+#endif
+ ret = NPY_HALF_NAN;
+ } else if (h == 0x7bffu) {
+#if NPY_HALF_GENERATE_OVERFLOW
+ npy_set_floatstatus_overflow();
+#endif
+ ret = NPY_HALF_PINF;
+ } else if ((h&0x8000u) && h_sig == 0) { /* Negative boundary case */
+ if (h_exp > 0x2c00u) { /* If result is normalized */
+ ret = h_exp - 0x2c00u;
+ } else if(h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */
+ ret = 1 << ((h_exp >> 10) - 2);
+ } else {
+ ret = 0x0001u; /* Smallest subnormal half */
+ }
+ } else if (h_exp > 0x2800u) { /* If result is still normalized */
+ ret = h_exp - 0x2800u;
+ } else if (h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */
+ ret = 1 << ((h_exp >> 10) - 1);
+ } else {
+ ret = 0x0001u;
+ }
+
+ return ret;
+}
+
+npy_half npy_half_copysign(npy_half x, npy_half y)
+{
+ return (x&0x7fffu) | (y&0x8000u);
+}
+
+npy_half npy_half_nextafter(npy_half x, npy_half y)
+{
+ npy_half ret;
+
+ if (npy_half_isnan(x) || npy_half_isnan(y)) {
+ ret = NPY_HALF_NAN;
+ } else if (npy_half_eq_nonan(x, y)) {
+ ret = x;
+ } else if (npy_half_iszero(x)) {
+ ret = (y&0x8000u) + 1; /* Smallest subnormal half */
+ } else if (!(x&0x8000u)) { /* x > 0 */
+ if ((npy_int16)x > (npy_int16)y) { /* x > y */
+ ret = x-1;
+ } else {
+ ret = x+1;
+ }
+ } else {
+ if (!(y&0x8000u) || (x&0x7fffu) > (y&0x7fffu)) { /* x < y */
+ ret = x-1;
+ } else {
+ ret = x+1;
+ }
+ }
+#if NPY_HALF_GENERATE_OVERFLOW
+ if (npy_half_isinf(ret) && npy_half_isfinite(x)) {
+ npy_set_floatstatus_overflow();
+ }
+#endif
+
+ return ret;
+}
+
+int npy_half_eq_nonan(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1).Equal(Half::FromBits(h2));
+}
+
+int npy_half_eq(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1) == Half::FromBits(h2);
+}
+
+int npy_half_ne(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1) != Half::FromBits(h2);
+}
+
+int npy_half_lt_nonan(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1).Less(Half::FromBits(h2));
+}
+
+int npy_half_lt(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1) < Half::FromBits(h2);
+}
+
+int npy_half_gt(npy_half h1, npy_half h2)
+{
+ return npy_half_lt(h2, h1);
+}
+
+int npy_half_le_nonan(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1).LessEqual(Half::FromBits(h2));
+}
+
+int npy_half_le(npy_half h1, npy_half h2)
+{
+ return Half::FromBits(h1) <= Half::FromBits(h2);
+}
+
+int npy_half_ge(npy_half h1, npy_half h2)
+{
+ return npy_half_le(h2, h1);
+}
+
+npy_half npy_half_divmod(npy_half h1, npy_half h2, npy_half *modulus)
+{
+ float fh1 = npy_half_to_float(h1);
+ float fh2 = npy_half_to_float(h2);
+ float div, mod;
+
+ div = npy_divmodf(fh1, fh2, &mod);
+ *modulus = npy_float_to_half(mod);
+ return npy_float_to_half(div);
+}
+
+
+/*
+ ********************************************************************
+ * BIT-LEVEL CONVERSIONS *
+ ********************************************************************
+ */
+
+npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
+{
+ if constexpr (Half::kNativeConversion<float>) {
+ return BitCast<uint16_t>(Half(BitCast<float>(f)));
+ }
+ else {
+ return half_private::FromFloatBits(f);
+ }
+}
+
+npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
+{
+ if constexpr (Half::kNativeConversion<double>) {
+ return BitCast<uint16_t>(Half(BitCast<double>(d)));
+ }
+ else {
+ return half_private::FromDoubleBits(d);
+ }
+}
+
+npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
+{
+ if constexpr (Half::kNativeConversion<float>) {
+ return BitCast<uint32_t>(static_cast<float>(Half::FromBits(h)));
+ }
+ else {
+ return half_private::ToFloatBits(h);
+ }
+}
+
+npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
+{
+ if constexpr (Half::kNativeConversion<double>) {
+ return BitCast<uint64_t>(static_cast<double>(Half::FromBits(h)));
+ }
+ else {
+ return half_private::ToDoubleBits(h);
+ }
+}
+
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index e36067822..ab54c1966 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -336,15 +336,6 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void HALF_@func@,
/**end repeat**/
/**begin repeat
- * #func = sin, cos#
- */
-
-NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void DOUBLE_@func@,
- (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)))
-
-/**end repeat**/
-
-/**begin repeat
* #TYPE = FLOAT, DOUBLE#
*/
/**begin repeat1
@@ -360,12 +351,17 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@func@,
#ifndef NPY_DISABLE_OPTIMIZATION
#include "loops_trigonometric.dispatch.h"
#endif
+
/**begin repeat
+ * #TYPE = FLOAT, DOUBLE#
+ */
+/**begin repeat1
* #func = sin, cos#
*/
-NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void FLOAT_@func@, (
+NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@func@, (
char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)
))
+/**end repeat1**/
/**end repeat**/
#ifndef NPY_DISABLE_OPTIMIZATION
diff --git a/numpy/core/src/umath/loops_trigonometric.dispatch.c.src b/numpy/core/src/umath/loops_trigonometric.dispatch.c.src
index 9f9978e66..28af04f6b 100644
--- a/numpy/core/src/umath/loops_trigonometric.dispatch.c.src
+++ b/numpy/core/src/umath/loops_trigonometric.dispatch.c.src
@@ -9,12 +9,18 @@
#include "simd/simd.h"
#include "loops_utils.h"
#include "loops.h"
+#include "fast_loop_macros.h"
/*
* TODO:
* - use vectorized version of Payne-Hanek style reduction for large elements or
* when there's no native FUSED support instead of fallback to libc
*/
-#if NPY_SIMD_F32 && NPY_SIMD_FMA3 // native support
+#if NPY_SIMD_FMA3 // native support
+/**begin repeat
+ * #check = F64, F32#
+ * #sfx = f64, f32#
+ */
+#if NPY_SIMD_@check@
/*
* Vectorized Cody-Waite range reduction technique
* Performs the reduction step x* = x - y*C in three steps:
@@ -23,14 +29,189 @@
* 3) x* = x - y*c3
* c1, c2 are exact floating points, c3 = C - c1 - c2 simulates higher precision
*/
-NPY_FINLINE npyv_f32
-simd_range_reduction_f32(npyv_f32 x, npyv_f32 y, npyv_f32 c1, npyv_f32 c2, npyv_f32 c3)
+NPY_FINLINE npyv_@sfx@
+simd_range_reduction_@sfx@(npyv_@sfx@ x, npyv_@sfx@ y, npyv_@sfx@ c1, npyv_@sfx@ c2, npyv_@sfx@ c3)
{
- npyv_f32 reduced_x = npyv_muladd_f32(y, c1, x);
- reduced_x = npyv_muladd_f32(y, c2, reduced_x);
- reduced_x = npyv_muladd_f32(y, c3, reduced_x);
+ npyv_@sfx@ reduced_x = npyv_muladd_@sfx@(y, c1, x);
+ reduced_x = npyv_muladd_@sfx@(y, c2, reduced_x);
+ reduced_x = npyv_muladd_@sfx@(y, c3, reduced_x);
return reduced_x;
}
+#endif
+/**end repeat**/
+
+#if NPY_SIMD_F64
+/**begin repeat
+ * #op = cos, sin#
+ */
+#if defined(NPY_OS_WIN32) || defined(NPY_OS_CYGWIN)
+NPY_FINLINE npyv_f64
+#else
+NPY_NOINLINE npyv_f64
+#endif
+simd_@op@_scalar_f64(npyv_f64 out, npy_uint64 cmp_bits)
+{
+ // MSVC doesn't compile with direct vector access, so we copy it here
+ // as we have no npyv_get_lane/npyv_set_lane intrinsics
+ npy_double NPY_DECL_ALIGNED(NPY_SIMD_WIDTH) out_copy[npyv_nlanes_f64];
+ npyv_storea_f64(out_copy, out);
+
+ for (unsigned i = 0; i < npyv_nlanes_f64; ++i) {
+ if (cmp_bits & (1 << i)) {
+ out_copy[i] = npy_@op@(out_copy[i]);
+ }
+ }
+
+ return npyv_loada_f64(out_copy);
+}
+/**end repeat**/
+
+/*
+ * Approximate sine algorithm for x \in [-pi/2, pi/2]
+ * worst-case error is 3.5 ulp.
+ * abs error: 0x1.be222a58p-53 in [-pi/2, pi/2].
+ */
+NPY_FINLINE npyv_f64
+simd_approx_sine_poly_f64(npyv_f64 r)
+{
+ const npyv_f64 poly1 = npyv_setall_f64(-0x1.9f4a9c8b21dc9p-41);
+ const npyv_f64 poly2 = npyv_setall_f64(0x1.60e88a10163f2p-33);
+ const npyv_f64 poly3 = npyv_setall_f64(-0x1.ae6361b7254e7p-26);
+ const npyv_f64 poly4 = npyv_setall_f64(0x1.71de382e8d62bp-19);
+ const npyv_f64 poly5 = npyv_setall_f64(-0x1.a01a019aeb4ffp-13);
+ const npyv_f64 poly6 = npyv_setall_f64(0x1.111111110b25ep-7);
+ const npyv_f64 poly7 = npyv_setall_f64(-0x1.55555555554c3p-3);
+
+ npyv_f64 r2 = npyv_mul_f64(r, r);
+ npyv_f64 y = npyv_muladd_f64(poly1, r2, poly2);
+ y = npyv_muladd_f64(y, r2, poly3);
+ y = npyv_muladd_f64(y, r2, poly4);
+ y = npyv_muladd_f64(y, r2, poly5);
+ y = npyv_muladd_f64(y, r2, poly6);
+ y = npyv_muladd_f64(y, r2, poly7);
+ y = npyv_muladd_f64(npyv_mul_f64(y, r2), r, r);
+
+ return y;
+}
+
+/* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */
+NPY_FINLINE npyv_f64
+simd_range_reduction_pi2(npyv_f64 r, npyv_f64 n) {
+ const npyv_f64 pi1 = npyv_setall_f64(-0x1.921fb54442d18p+1);
+ const npyv_f64 pi2 = npyv_setall_f64(-0x1.1a62633145c06p-53);
+ const npyv_f64 pi3 = npyv_setall_f64(-0x1.c1cd129024e09p-106);
+
+ return simd_range_reduction_f64(r, n, pi1, pi2, pi3);
+}
+
+NPY_FINLINE npyv_b64 simd_cos_range_check_f64(npyv_u64 ir) {
+ const npyv_u64 tiny_bound = npyv_setall_u64(0x202); /* top12 (asuint64 (0x1p-509)). */
+ const npyv_u64 simd_thresh = npyv_setall_u64(0x214); /* top12 (asuint64 (RangeVal)) - SIMD_TINY_BOUND. */
+
+ return npyv_cmpge_u64(npyv_sub_u64(npyv_shri_u64(ir, 52), tiny_bound), simd_thresh);
+}
+
+NPY_FINLINE npyv_b64 simd_sin_range_check_f64(npyv_u64 ir) {
+ const npyv_f64 range_val = npyv_setall_f64(0x1p23);
+
+ return npyv_cmpge_u64(ir, npyv_reinterpret_u64_f64(range_val));
+}
+
+NPY_FINLINE npyv_f64
+simd_cos_poly_f64(npyv_f64 r, npyv_u64 ir, npyv_u64 sign)
+{
+ const npyv_f64 inv_pi = npyv_setall_f64(0x1.45f306dc9c883p-2);
+ const npyv_f64 half_pi = npyv_setall_f64(0x1.921fb54442d18p+0);
+ const npyv_f64 shift = npyv_setall_f64(0x1.8p52);
+
+ /* n = rint((|x|+pi/2)/pi) - 0.5. */
+ npyv_f64 n = npyv_muladd_f64(inv_pi, npyv_add_f64(r, half_pi), shift);
+ npyv_u64 odd = npyv_shli_u64(npyv_reinterpret_u64_f64(n), 63);
+ n = npyv_sub_f64(n, shift);
+ n = npyv_sub_f64(n, npyv_setall_f64(0.5));
+
+ /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */
+ r = simd_range_reduction_pi2(r, n);
+
+ /* sin(r) poly approx. */
+ npyv_f64 y = simd_approx_sine_poly_f64(r);
+
+ /* sign. */
+ return npyv_reinterpret_f64_u64(npyv_xor_u64(npyv_reinterpret_u64_f64(y), odd));
+}
+
+NPY_FINLINE npyv_f64
+simd_sin_poly_f64(npyv_f64 r, npyv_u64 ir, npyv_u64 sign)
+{
+ const npyv_f64 inv_pi = npyv_setall_f64(0x1.45f306dc9c883p-2);
+ const npyv_f64 shift = npyv_setall_f64(0x1.8p52);
+
+ /* n = rint(|x|/pi). */
+ npyv_f64 n = npyv_muladd_f64(inv_pi, r, shift);
+ npyv_u64 odd = npyv_shli_u64(npyv_reinterpret_u64_f64(n), 63);
+ n = npyv_sub_f64(n, shift);
+
+ /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */
+ r = simd_range_reduction_pi2(r, n);
+
+ /* sin(r) poly approx. */
+ npyv_f64 y = simd_approx_sine_poly_f64(r);
+
+ /* sign. */
+ return npyv_reinterpret_f64_u64(npyv_xor_u64(npyv_xor_u64(npyv_reinterpret_u64_f64(y), sign), odd));
+}
+
+/**begin repeat
+ * #op = cos, sin#
+ */
+NPY_FINLINE void
+simd_@op@_f64(const double *src, npy_intp ssrc, double *dst, npy_intp sdst, npy_intp len)
+{
+ const npyv_u64 abs_mask = npyv_setall_u64(0x7fffffffffffffff);
+ const int vstep = npyv_nlanes_f64;
+
+ npyv_f64 out = npyv_zero_f64();
+ npyv_f64 x_in;
+
+ for (; len > 0; len -= vstep, src += ssrc*vstep, dst += sdst*vstep) {
+ if (ssrc == 1) {
+ x_in = npyv_load_tillz_f64(src, len);
+ } else {
+ x_in = npyv_loadn_tillz_f64(src, ssrc, len);
+ }
+
+ npyv_u64 ir = npyv_and_u64(npyv_reinterpret_u64_f64(x_in), abs_mask);
+ npyv_f64 r = npyv_reinterpret_f64_u64(ir);
+ npyv_u64 sign = npyv_and_u64(npyv_reinterpret_u64_f64(x_in), npyv_not_u64(abs_mask));
+
+ npyv_b64 cmp = simd_@op@_range_check_f64(ir);
+ /* If fenv exceptions are to be triggered correctly, set any special lanes
+ to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
+ scalar loop later. */
+ r = npyv_select_f64(cmp, npyv_setall_f64(1.0), r);
+
+ // Some in range, at least one calculation is useful
+ if (!npyv_all_b64(cmp)) {
+ out = simd_@op@_poly_f64(r, ir, sign);
+ }
+
+ if (npyv_any_b64(cmp)) {
+ out = npyv_select_f64(cmp, x_in, out);
+ out = simd_@op@_scalar_f64(out, npyv_tobits_b64(cmp));
+ }
+
+ if (sdst == 1) {
+ npyv_store_till_f64(dst, len, out);
+ } else {
+ npyv_storen_till_f64(dst, sdst, len, out);
+ }
+ }
+ npyv_cleanup();
+}
+/**end repeat**/
+#endif // NPY_SIMD_F64
+
+#if NPY_SIMD_F32
/*
* Approximate cosine algorithm for x \in [-PI/4, PI/4]
* Maximum ULP across all 32-bit floats = 0.875
@@ -198,24 +379,58 @@ simd_sincos_f32(const float *src, npy_intp ssrc, float *dst, npy_intp sdst,
}
npyv_cleanup();
}
-#endif // NPY_SIMD_FMA3
+#endif // NPY_SIMD_FP32
+#endif // NYP_SIMD_FMA3
/**begin repeat
* #func = cos, sin#
- * #enum = SIMD_COMPUTE_COS, SIMD_COMPUTE_SIN#
+ */
+NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_@func@)
+(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
+{
+#if NPY_SIMD_F64 && NPY_SIMD_FMA3
+ const double *src = (double*)args[0];
+ double *dst = (double*)args[1];
+ const int lsize = sizeof(src[0]);
+ const npy_intp ssrc = steps[0] / lsize;
+ const npy_intp sdst = steps[1] / lsize;
+ npy_intp len = dimensions[0];
+ assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0));
+
+ if (is_mem_overlap(src, steps[0], dst, steps[1], len) ||
+ !npyv_loadable_stride_f64(ssrc) || !npyv_storable_stride_f64(sdst)
+ ) {
+ for (; len > 0; --len, src += ssrc, dst += sdst) {
+ simd_@func@_f64(src, 1, dst, 1, 1);
+ }
+ } else {
+ simd_@func@_f64(src, ssrc, dst, sdst, len);
+ }
+#else
+ UNARY_LOOP {
+ const npy_double in1 = *(npy_double *)ip1;
+ *(npy_double *)op1 = npy_@func@(in1);
+ }
+#endif
+}
+/**end repeat**/
+
+/**begin repeat
+ * #func = sin, cos#
+ * #enum = SIMD_COMPUTE_SIN, SIMD_COMPUTE_COS#
*/
NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_@func@)
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
{
- const float *src = (float*)args[0];
- float *dst = (float*)args[1];
+#if NPY_SIMD_F32 && NPY_SIMD_FMA3
+ const npy_float *src = (npy_float*)args[0];
+ npy_float *dst = (npy_float*)args[1];
const int lsize = sizeof(src[0]);
const npy_intp ssrc = steps[0] / lsize;
const npy_intp sdst = steps[1] / lsize;
npy_intp len = dimensions[0];
assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0));
-#if NPY_SIMD_F32 && NPY_SIMD_FMA3
if (is_mem_overlap(src, steps[0], dst, steps[1], len) ||
!npyv_loadable_stride_f32(ssrc) || !npyv_storable_stride_f32(sdst)
) {
@@ -226,9 +441,9 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_@func@)
simd_sincos_f32(src, ssrc, dst, sdst, len, @enum@);
}
#else
- for (; len > 0; --len, src += ssrc, dst += sdst) {
- const float src0 = *src;
- *dst = npy_@func@f(src0);
+ UNARY_LOOP {
+ const npy_float in1 = *(npy_float *)ip1;
+ *(npy_float *)op1 = npy_@func@f(in1);
}
#endif
}
diff --git a/numpy/core/src/umath/loops_umath_fp.dispatch.c.src b/numpy/core/src/umath/loops_umath_fp.dispatch.c.src
index d6703bfb9..89999e879 100644
--- a/numpy/core/src/umath/loops_umath_fp.dispatch.c.src
+++ b/numpy/core/src/umath/loops_umath_fp.dispatch.c.src
@@ -60,32 +60,6 @@ simd_@func@_@sfx@(const npyv_lanetype_@sfx@ *src, npy_intp ssrc,
/**end repeat**/
/**begin repeat
- * #func = sin, cos#
- */
-static void
-simd_@func@_f64(const double *src, npy_intp ssrc,
- double *dst, npy_intp sdst, npy_intp len)
-{
- const int vstep = npyv_nlanes_f64;
- for (; len > 0; len -= vstep, src += ssrc*vstep, dst += sdst*vstep) {
- npyv_f64 x;
- if (ssrc == 1) {
- x = npyv_load_tillz_f64(src, len);
- } else {
- x = npyv_loadn_tillz_f64(src, ssrc, len);
- }
- npyv_f64 out = __svml_@func@8(x);
- if (sdst == 1) {
- npyv_store_till_f64(dst, len, out);
- } else {
- npyv_storen_till_f64(dst, sdst, len, out);
- }
- }
- npyv_cleanup();
-}
-/**end repeat**/
-
-/**begin repeat
* #sfx = f32, f64#
* #func_suffix = f16, 8#
*/
@@ -277,31 +251,3 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@func@)
}
/**end repeat1**/
/**end repeat**/
-
-/**begin repeat
- * #func = sin, cos#
- */
-NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_@func@)
-(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
-{
-#if NPY_SIMD && defined(NPY_HAVE_AVX512_SKX) && defined(NPY_CAN_LINK_SVML)
- const double *src = (double*)args[0];
- double *dst = (double*)args[1];
- const int lsize = sizeof(src[0]);
- const npy_intp ssrc = steps[0] / lsize;
- const npy_intp sdst = steps[1] / lsize;
- const npy_intp len = dimensions[0];
- assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0));
- if (!is_mem_overlap(src, steps[0], dst, steps[1], len) &&
- npyv_loadable_stride_f64(ssrc) &&
- npyv_storable_stride_f64(sdst)) {
- simd_@func@_f64(src, ssrc, dst, sdst, len);
- return;
- }
-#endif
- UNARY_LOOP {
- const npy_double in1 = *(npy_double *)ip1;
- *(npy_double *)op1 = npy_@func@(in1);
- }
-}
-/**end repeat**/
diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
index aeddac585..baca076ae 100644
--- a/numpy/core/tests/test_array_coercion.py
+++ b/numpy/core/tests/test_array_coercion.py
@@ -39,9 +39,9 @@ def arraylikes():
yield subclass
class _SequenceLike():
- # We are giving a warning that array-like's were also expected to be
- # sequence-like in `np.array([array_like])`. This can be removed
- # when the deprecation expired (started NumPy 1.20).
+ # Older NumPy versions, sometimes cared whether a protocol array was
+ # also _SequenceLike. This shouldn't matter, but keep it for now
+ # for __array__ and not the others.
def __len__(self):
raise TypeError
@@ -62,7 +62,7 @@ def arraylikes():
yield param(memoryview, id="memoryview")
# Array-interface
- class ArrayInterface(_SequenceLike):
+ class ArrayInterface:
def __init__(self, a):
self.a = a # need to hold on to keep interface valid
self.__array_interface__ = a.__array_interface__
@@ -70,7 +70,7 @@ def arraylikes():
yield param(ArrayInterface, id="__array_interface__")
# Array-Struct
- class ArrayStruct(_SequenceLike):
+ class ArrayStruct:
def __init__(self, a):
self.a = a # need to hold on to keep struct valid
self.__array_struct__ = a.__array_struct__
@@ -654,6 +654,14 @@ class TestArrayLikes:
res = np.array([obj], dtype=object)
assert res[0] is obj
+ @pytest.mark.parametrize("arraylike", arraylikes())
+ @pytest.mark.parametrize("arr", [np.array(0.), np.arange(4)])
+ def test_object_assignment_special_case(self, arraylike, arr):
+ obj = arraylike(arr)
+ empty = np.arange(1, dtype=object)
+ empty[:] = [obj]
+ assert empty[0] is obj
+
def test_0d_generic_special_case(self):
class ArraySubclass(np.ndarray):
def __float__(self):
diff --git a/numpy/core/tests/test_arrayprint.py b/numpy/core/tests/test_arrayprint.py
index 372cb8d41..b92c8ae8c 100644
--- a/numpy/core/tests/test_arrayprint.py
+++ b/numpy/core/tests/test_arrayprint.py
@@ -353,6 +353,33 @@ class TestArray2String:
' [ 501, 502, 503, ..., 999, 1000, 1001]])'
assert_equal(repr(A), reprA)
+ def test_summarize_structure(self):
+ A = (np.arange(2002, dtype="<i8").reshape(2, 1001)
+ .view([('i', "<i8", (1001,))]))
+ strA = ("[[([ 0, 1, 2, ..., 998, 999, 1000],)]\n"
+ " [([1001, 1002, 1003, ..., 1999, 2000, 2001],)]]")
+ assert_equal(str(A), strA)
+
+ reprA = ("array([[([ 0, 1, 2, ..., 998, 999, 1000],)],\n"
+ " [([1001, 1002, 1003, ..., 1999, 2000, 2001],)]],\n"
+ " dtype=[('i', '<i8', (1001,))])")
+ assert_equal(repr(A), reprA)
+
+ B = np.ones(2002, dtype=">i8").view([('i', ">i8", (2, 1001))])
+ strB = "[([[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]],)]"
+ assert_equal(str(B), strB)
+
+ reprB = (
+ "array([([[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]],)],\n"
+ " dtype=[('i', '>i8', (2, 1001))])"
+ )
+ assert_equal(repr(B), reprB)
+
+ C = (np.arange(22, dtype="<i8").reshape(2, 11)
+ .view([('i1', "<i8"), ('i10', "<i8", (10,))]))
+ strC = "[[( 0, [ 1, ..., 10])]\n [(11, [12, ..., 21])]]"
+ assert_equal(np.array2string(C, threshold=1, edgeitems=1), strC)
+
def test_linewidth(self):
a = np.full(6, 1)
@@ -817,7 +844,7 @@ class TestPrintOptions:
)
def test_dtype_endianness_repr(self, native):
'''
- there was an issue where
+ there was an issue where
repr(array([0], dtype='<u2')) and repr(array([0], dtype='>u2'))
both returned the same thing:
array([0], dtype=uint16)
@@ -963,6 +990,16 @@ class TestPrintOptions:
[[ 0.]]]])""")
)
+ def test_edgeitems_structured(self):
+ np.set_printoptions(edgeitems=1, threshold=1)
+ A = np.arange(5*2*3, dtype="<i8").view([('i', "<i8", (5, 2, 3))])
+ reprA = (
+ "array([([[[ 0, ..., 2], [ 3, ..., 5]], ..., "
+ "[[24, ..., 26], [27, ..., 29]]],)],\n"
+ " dtype=[('i', '<i8', (5, 2, 3))])"
+ )
+ assert_equal(repr(A), reprA)
+
def test_bad_args(self):
assert_raises(ValueError, np.set_printoptions, threshold=float('nan'))
assert_raises(TypeError, np.set_printoptions, threshold='1')
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index b92a20a12..0449b3257 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -580,92 +580,6 @@ class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase):
self.assert_deprecated(check)
-class TestFutureWarningArrayLikeNotIterable(_DeprecationTestCase):
- # Deprecated 2020-12-09, NumPy 1.20
- warning_cls = FutureWarning
- message = "The input object of type.*but not a sequence"
-
- @pytest.mark.parametrize("protocol",
- ["__array__", "__array_interface__", "__array_struct__"])
- def test_deprecated(self, protocol):
- """Test that these objects give a warning since they are not 0-D,
- not coerced at the top level `np.array(obj)`, but nested, and do
- *not* define the sequence protocol.
-
- NOTE: Tests for the versions including __len__ and __getitem__ exist
- in `test_array_coercion.py` and they can be modified or amended
- when this deprecation expired.
- """
- blueprint = np.arange(10)
- MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)})
- self.assert_deprecated(lambda: np.array([MyArr()], dtype=object))
-
- @pytest.mark.parametrize("protocol",
- ["__array__", "__array_interface__", "__array_struct__"])
- def test_0d_not_deprecated(self, protocol):
- # 0-D always worked (albeit it would use __float__ or similar for the
- # conversion, which may not happen anymore)
- blueprint = np.array(1.)
- MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)})
- myarr = MyArr()
-
- self.assert_not_deprecated(lambda: np.array([myarr], dtype=object))
- res = np.array([myarr], dtype=object)
- expected = np.empty(1, dtype=object)
- expected[0] = myarr
- assert_array_equal(res, expected)
-
- @pytest.mark.parametrize("protocol",
- ["__array__", "__array_interface__", "__array_struct__"])
- def test_unnested_not_deprecated(self, protocol):
- blueprint = np.arange(10)
- MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)})
- myarr = MyArr()
-
- self.assert_not_deprecated(lambda: np.array(myarr))
- res = np.array(myarr)
- assert_array_equal(res, blueprint)
-
- @pytest.mark.parametrize("protocol",
- ["__array__", "__array_interface__", "__array_struct__"])
- def test_strange_dtype_handling(self, protocol):
- """The old code would actually use the dtype from the array, but
- then end up not using the array (for dimension discovery)
- """
- blueprint = np.arange(10).astype("f4")
- MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol),
- "__float__": lambda _: 0.5})
- myarr = MyArr()
-
- # Make sure we warn (and capture the FutureWarning)
- with pytest.warns(FutureWarning, match=self.message):
- res = np.array([[myarr]])
-
- assert res.shape == (1, 1)
- assert res.dtype == "f4"
- assert res[0, 0] == 0.5
-
- @pytest.mark.parametrize("protocol",
- ["__array__", "__array_interface__", "__array_struct__"])
- def test_assignment_not_deprecated(self, protocol):
- # If the result is dtype=object we do not unpack a nested array or
- # array-like, if it is nested at exactly the right depth.
- # NOTE: We actually do still call __array__, etc. but ignore the result
- # in the end. For `dtype=object` we could optimize that away.
- blueprint = np.arange(10).astype("f4")
- MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol),
- "__float__": lambda _: 0.5})
- myarr = MyArr()
-
- res = np.empty(3, dtype=object)
- def set():
- res[:] = [myarr, myarr, myarr]
- self.assert_not_deprecated(set)
- assert res[0] is myarr
- assert res[1] is myarr
- assert res[2] is myarr
-
-
class TestDeprecatedUnpickleObjectScalar(_DeprecationTestCase):
# Deprecated 2020-11-24, NumPy 1.20
"""
@@ -822,6 +736,18 @@ class TestLoadtxtParseIntsViaFloat(_DeprecationTestCase):
assert isinstance(e.__cause__, DeprecationWarning)
+class TestScalarConversion(_DeprecationTestCase):
+ # 2023-01-02, 1.25.0
+ def test_float_conversion(self):
+ self.assert_deprecated(float, args=(np.array([3.14]),))
+
+ def test_behaviour(self):
+ b = np.array([[3.14]])
+ c = np.zeros(5)
+ with pytest.warns(DeprecationWarning):
+ c[0] = b
+
+
class TestPyIntConversion(_DeprecationTestCase):
message = r".*stop allowing conversion of out-of-bound.*"
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 4a064827d..984047c87 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -3644,9 +3644,13 @@ class TestMethods:
msg = 'dtype: {0}'.format(dt)
ap = complex(a)
assert_equal(ap, a, msg)
- bp = complex(b)
+
+ with assert_warns(DeprecationWarning):
+ bp = complex(b)
assert_equal(bp, b, msg)
- cp = complex(c)
+
+ with assert_warns(DeprecationWarning):
+ cp = complex(c)
assert_equal(cp, c, msg)
def test__complex__should_not_work(self):
@@ -3669,7 +3673,8 @@ class TestMethods:
assert_raises(TypeError, complex, d)
e = np.array(['1+1j'], 'U')
- assert_raises(TypeError, complex, e)
+ with assert_warns(DeprecationWarning):
+ assert_raises(TypeError, complex, e)
class TestCequenceMethods:
def test_array_contains(self):
@@ -8756,8 +8761,10 @@ class TestConversion:
int_funcs = (int, lambda x: x.__int__())
for int_func in int_funcs:
assert_equal(int_func(np.array(0)), 0)
- assert_equal(int_func(np.array([1])), 1)
- assert_equal(int_func(np.array([[42]])), 42)
+ with assert_warns(DeprecationWarning):
+ assert_equal(int_func(np.array([1])), 1)
+ with assert_warns(DeprecationWarning):
+ assert_equal(int_func(np.array([[42]])), 42)
assert_raises(TypeError, int_func, np.array([1, 2]))
# gh-9972
@@ -8772,7 +8779,8 @@ class TestConversion:
def __trunc__(self):
return 3
assert_equal(3, int_func(np.array(HasTrunc())))
- assert_equal(3, int_func(np.array([HasTrunc()])))
+ with assert_warns(DeprecationWarning):
+ assert_equal(3, int_func(np.array([HasTrunc()])))
else:
pass
@@ -8781,8 +8789,9 @@ class TestConversion:
raise NotImplementedError
assert_raises(NotImplementedError,
int_func, np.array(NotConvertible()))
- assert_raises(NotImplementedError,
- int_func, np.array([NotConvertible()]))
+ with assert_warns(DeprecationWarning):
+ assert_raises(NotImplementedError,
+ int_func, np.array([NotConvertible()]))
class TestWhere:
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 0ed64d72a..438b5600a 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -1426,23 +1426,36 @@ class TestSpecialFloats:
np.log(a)
@pytest.mark.skipif(IS_WASM, reason="fp errors don't work in wasm")
- def test_sincos_values(self):
+ @pytest.mark.parametrize('dtype', ['e', 'f', 'd', 'g'])
+ def test_sincos_values(self, dtype):
with np.errstate(all='ignore'):
x = [np.nan, np.nan, np.nan, np.nan]
y = [np.nan, -np.nan, np.inf, -np.inf]
- for dt in ['e', 'f', 'd', 'g']:
- xf = np.array(x, dtype=dt)
- yf = np.array(y, dtype=dt)
- assert_equal(np.sin(yf), xf)
- assert_equal(np.cos(yf), xf)
-
+ xf = np.array(x, dtype=dtype)
+ yf = np.array(y, dtype=dtype)
+ assert_equal(np.sin(yf), xf)
+ assert_equal(np.cos(yf), xf)
+ @pytest.mark.skipif(IS_WASM, reason="fp errors don't work in wasm")
+ @pytest.mark.parametrize('callable', [np.sin, np.cos])
+ @pytest.mark.parametrize('dtype', ['e', 'f', 'd'])
+ @pytest.mark.parametrize('value', [np.inf, -np.inf])
+ def test_sincos_errors(self, callable, dtype, value):
with np.errstate(invalid='raise'):
- for callable in [np.sin, np.cos]:
- for value in [np.inf, -np.inf]:
- for dt in ['e', 'f', 'd']:
- assert_raises(FloatingPointError, callable,
- np.array([value], dtype=dt))
+ assert_raises(FloatingPointError, callable,
+ np.array([value], dtype=dtype))
+
+ @pytest.mark.parametrize('callable', [np.sin, np.cos])
+ @pytest.mark.parametrize('dtype', ['f', 'd'])
+ @pytest.mark.parametrize('stride', [-1, 1, 2, 4, 5])
+ def test_sincos_overlaps(self, callable, dtype, stride):
+ N = 100
+ M = N // abs(stride)
+ rng = np.random.default_rng(42)
+ x = rng.standard_normal(N, dtype)
+ y = callable(x[::stride])
+ callable(x[::stride], out=x[:M])
+ assert_equal(x[:M], y)
@pytest.mark.parametrize('dt', ['e', 'f', 'd', 'g'])
def test_sqrt_values(self, dt):
diff --git a/numpy/distutils/command/build_ext.py b/numpy/distutils/command/build_ext.py
index d24162a42..871aa1099 100644
--- a/numpy/distutils/command/build_ext.py
+++ b/numpy/distutils/command/build_ext.py
@@ -259,7 +259,7 @@ class build_ext (old_build_ext):
log.warn('resetting extension %r language from %r to %r.' %
(ext.name, l, ext_language))
- ext.language = ext_language
+ ext.language = ext_language
# global language
all_languages.update(ext_languages)
diff --git a/numpy/f2py/crackfortran.py b/numpy/f2py/crackfortran.py
index c085baf12..c0a79bcae 100755
--- a/numpy/f2py/crackfortran.py
+++ b/numpy/f2py/crackfortran.py
@@ -1740,6 +1740,28 @@ def updatevars(typespec, selector, attrspec, entitydecl):
d1[k] = unmarkouterparen(d1[k])
else:
del d1[k]
+
+ if 'len' in d1:
+ if typespec in ['complex', 'integer', 'logical', 'real']:
+ if ('kindselector' not in edecl) or (not edecl['kindselector']):
+ edecl['kindselector'] = {}
+ edecl['kindselector']['*'] = d1['len']
+ del d1['len']
+ elif typespec == 'character':
+ if ('charselector' not in edecl) or (not edecl['charselector']):
+ edecl['charselector'] = {}
+ if 'len' in edecl['charselector']:
+ del edecl['charselector']['len']
+ edecl['charselector']['*'] = d1['len']
+ del d1['len']
+
+ if 'init' in d1:
+ if '=' in edecl and (not edecl['='] == d1['init']):
+ outmess('updatevars: attempt to change the init expression of "%s" ("%s") to "%s". Ignoring.\n' % (
+ ename, edecl['='], d1['init']))
+ else:
+ edecl['='] = d1['init']
+
if 'len' in d1 and 'array' in d1:
if d1['len'] == '':
d1['len'] = d1['array']
@@ -1749,6 +1771,7 @@ def updatevars(typespec, selector, attrspec, entitydecl):
del d1['len']
errmess('updatevars: "%s %s" is mapped to "%s %s(%s)"\n' % (
typespec, e, typespec, ename, d1['array']))
+
if 'array' in d1:
dm = 'dimension(%s)' % d1['array']
if 'attrspec' not in edecl or (not edecl['attrspec']):
@@ -1762,23 +1785,6 @@ def updatevars(typespec, selector, attrspec, entitydecl):
% (ename, dm1, dm))
break
- if 'len' in d1:
- if typespec in ['complex', 'integer', 'logical', 'real']:
- if ('kindselector' not in edecl) or (not edecl['kindselector']):
- edecl['kindselector'] = {}
- edecl['kindselector']['*'] = d1['len']
- elif typespec == 'character':
- if ('charselector' not in edecl) or (not edecl['charselector']):
- edecl['charselector'] = {}
- if 'len' in edecl['charselector']:
- del edecl['charselector']['len']
- edecl['charselector']['*'] = d1['len']
- if 'init' in d1:
- if '=' in edecl and (not edecl['='] == d1['init']):
- outmess('updatevars: attempt to change the init expression of "%s" ("%s") to "%s". Ignoring.\n' % (
- ename, edecl['='], d1['init']))
- else:
- edecl['='] = d1['init']
else:
outmess('updatevars: could not crack entity declaration "%s". Ignoring.\n' % (
ename + m.group('after')))
diff --git a/numpy/f2py/tests/src/string/scalar_string.f90 b/numpy/f2py/tests/src/string/scalar_string.f90
index d847668bb..f8f076172 100644
--- a/numpy/f2py/tests/src/string/scalar_string.f90
+++ b/numpy/f2py/tests/src/string/scalar_string.f90
@@ -1,7 +1,9 @@
MODULE string_test
character(len=8) :: string
+ character string77 * 8
character(len=12), dimension(5,7) :: strarr
+ character strarr77(5,7) * 12
END MODULE string_test
diff --git a/numpy/f2py/tests/test_character.py b/numpy/f2py/tests/test_character.py
index 5f9805158..0bb0f4290 100644
--- a/numpy/f2py/tests/test_character.py
+++ b/numpy/f2py/tests/test_character.py
@@ -576,16 +576,18 @@ class TestStringScalarArr(util.F2PyTest):
@pytest.mark.slow
def test_char(self):
- out = self.module.string_test.string
- expected = ()
- assert out.shape == expected
- expected = '|S8'
- assert out.dtype == expected
+ for out in (self.module.string_test.string,
+ self.module.string_test.string77):
+ expected = ()
+ assert out.shape == expected
+ expected = '|S8'
+ assert out.dtype == expected
@pytest.mark.slow
def test_char_arr(self):
- out = self.module.string_test.strarr
- expected = (5,7)
- assert out.shape == expected
- expected = '|S12'
- assert out.dtype == expected
+ for out in (self.module.string_test.strarr,
+ self.module.string_test.strarr77):
+ expected = (5,7)
+ assert out.shape == expected
+ expected = '|S12'
+ assert out.dtype == expected
diff --git a/numpy/f2py/tests/test_crackfortran.py b/numpy/f2py/tests/test_crackfortran.py
index 886fc596e..dc0f7e27a 100644
--- a/numpy/f2py/tests/test_crackfortran.py
+++ b/numpy/f2py/tests/test_crackfortran.py
@@ -290,39 +290,29 @@ class TestNameArgsPatternBacktracking:
def test_nameargspattern_backtracking(self, adversary):
'''address ReDOS vulnerability:
https://github.com/numpy/numpy/issues/23338'''
- last_median = 0.
- trials_per_count = 128
+ trials_per_batch = 12
+ batches_per_regex = 4
start_reps, end_reps = 15, 25
- times_median_doubled = 0
for ii in range(start_reps, end_reps):
repeated_adversary = adversary * ii
- times = []
- for _ in range(trials_per_count):
- t0 = time.perf_counter()
- mtch = nameargspattern.search(repeated_adversary)
- times.append(time.perf_counter() - t0)
- # We should use a measure of time that's resilient to outliers.
- # Times jump around a lot due to the CPU's scheduler.
- median = np.median(times)
+ # test times in small batches.
+ # this gives us more chances to catch a bad regex
+ # while still catching it before too long if it is bad
+ for _ in range(batches_per_regex):
+ times = []
+ for _ in range(trials_per_batch):
+ t0 = time.perf_counter()
+ mtch = nameargspattern.search(repeated_adversary)
+ times.append(time.perf_counter() - t0)
+ # our pattern should be much faster than 0.2s per search
+ # it's unlikely that a bad regex will pass even on fast CPUs
+ assert np.median(times) < 0.2
assert not mtch
# if the adversary is capped with @)@, it becomes acceptable
# according to the old version of the regex.
# that should still be true.
good_version_of_adversary = repeated_adversary + '@)@'
assert nameargspattern.search(good_version_of_adversary)
- if ii > start_reps:
- # the hallmark of exponentially catastrophic backtracking
- # is that runtime doubles for every added instance of
- # the problematic pattern.
- times_median_doubled += median > 2 * last_median
- # also try to rule out non-exponential but still bad cases
- # arbitrarily, we should set a hard limit of 10ms as too slow
- assert median < trials_per_count * 0.01
- last_median = median
- # we accept that maybe the median might double once, due to
- # the CPU scheduler acting weird or whatever. More than that
- # seems suspicious.
- assert times_median_doubled < 2
class TestFunctionReturn(util.F2PyTest):
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index f8f2ab7a2..22fb0eb7d 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -262,6 +262,9 @@ class NpzFile(Mapping):
else:
raise KeyError("%s is not a file in the archive" % key)
+ def __contains__(self, key):
+ return (key in self._files or key in self.files)
+
def __repr__(self):
# Get filename or default to `object`
if isinstance(self.fid, str):
diff --git a/numpy/lib/npyio.pyi b/numpy/lib/npyio.pyi
index 9dd3d6809..cc81e82b7 100644
--- a/numpy/lib/npyio.pyi
+++ b/numpy/lib/npyio.pyi
@@ -98,6 +98,7 @@ class NpzFile(Mapping[str, NDArray[Any]]):
def __iter__(self) -> Iterator[str]: ...
def __len__(self) -> int: ...
def __getitem__(self, key: str) -> NDArray[Any]: ...
+ def __contains__(self, key: str) -> bool: ...
def __repr__(self) -> str: ...
# NOTE: Returns a `NpzFile` if file is a zip file;
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index d2bd3d47e..0cf748dfd 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -3340,6 +3340,10 @@ class MaskedArray(ndarray):
# Note: Don't try to check for m.any(), that'll take too long
return dout
+ # setitem may put NaNs into integer arrays or occasionally overflow a
+ # float. But this may happen in masked values, so avoid otherwise
+ # correct warnings (as is typical also in masked calculations).
+ @np.errstate(over='ignore', invalid='ignore')
def __setitem__(self, indx, value):
"""
x.__setitem__(i, y) <==> x[i]=y
@@ -4631,6 +4635,7 @@ class MaskedArray(ndarray):
otherwise. 'K' means to read the elements in the order they occur
in memory, except for reversing the data when strides are negative.
By default, 'C' index order is used.
+ (Masked arrays currently use 'A' on the data when 'K' is passed.)
Returns
-------
@@ -4657,6 +4662,13 @@ class MaskedArray(ndarray):
fill_value=999999)
"""
+ # The order of _data and _mask could be different (it shouldn't be
+ # normally). Passing order `K` or `A` would be incorrect.
+ # So we ignore the mask memory order.
+ # TODO: We don't actually support K, so use A instead. We could
+ # try to guess this correct by sorting strides or deprecate.
+ if order in "kKaA":
+ order = "C" if self._data.flags.fnc else "F"
r = ndarray.ravel(self._data, order=order).view(type(self))
r._update_from(self)
if self._mask is not nomask:
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index 5db01b74a..9a4b74997 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -377,6 +377,24 @@ class TestMaskedArray:
assert_equal(s1, s2)
assert_(x1[1:1].shape == (0,))
+ def test_setitem_no_warning(self):
+ # Setitem shouldn't warn, because the assignment might be masked
+ # and warning for a masked assignment is weird (see gh-23000)
+ # (When the value is masked, otherwise a warning would be acceptable
+ # but is not given currently.)
+ x = np.ma.arange(60).reshape((6, 10))
+ index = (slice(1, 5, 2), [7, 5])
+ value = np.ma.masked_all((2, 2))
+ value._data[...] = np.inf # not a valid integer...
+ x[index] = value
+ # The masked scalar is special cased, but test anyway (it's NaN):
+ x[...] = np.ma.masked
+ # Finally, a large value that cannot be cast to the float32 `x`
+ x = np.ma.arange(3., dtype=np.float32)
+ value = np.ma.array([2e234, 1, 1], mask=[True, False, False])
+ x[...] = value
+ x[[0, 1, 2]] = value
+
@suppress_copy_mask_on_assignment
def test_copy(self):
# Tests of some subtle points of copying and sizing.
@@ -3411,6 +3429,22 @@ class TestMaskedArrayMethods:
assert_equal(a.ravel(order='C'), [1, 2, 3, 4])
assert_equal(a.ravel(order='F'), [1, 3, 2, 4])
+ @pytest.mark.parametrize("order", "AKCF")
+ @pytest.mark.parametrize("data_order", "CF")
+ def test_ravel_order(self, order, data_order):
+ # Ravelling must ravel mask and data in the same order always to avoid
+ # misaligning the two in the ravel result.
+ arr = np.ones((5, 10), order=data_order)
+ arr[0, :] = 0
+ mask = np.ones((10, 5), dtype=bool, order=data_order).T
+ mask[0, :] = False
+ x = array(arr, mask=mask)
+ assert x._data.flags.fnc != x._mask.flags.fnc
+ assert (x.filled(0) == 0).all()
+ raveled = x.ravel(order)
+ assert (raveled.filled(0) == 0).all()
+
+
def test_reshape(self):
# Tests reshape
x = arange(4)