Merge branch 'main' into f2pyFuncFix_23598

author: Sebastian Berg <sebastianb@nvidia.com> 2023-04-26 10:51:21 +0200
committer: GitHub <noreply@github.com> 2023-04-26 10:51:21 +0200
commit: b4f0e4122e83115d99de849fcfc4fd1fc8b8f65d (patch)
tree: 0695fcc40428a235161418f2bd288f9a90e3d640 /numpy
parent: c7ff4118a345c966e2a0fc688e054e3fd9191e99 (diff)
parent: 5665fdcdf21dec575d204852b208eaecf1a1638d (diff)
download: numpy-b4f0e4122e83115d99de849fcfc4fd1fc8b8f65d.tar.gz
49 files changed, 1734 insertions, 986 deletions
diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py
index 86fe5583c..58661ed09 100644
--- a/numpy/core/_add_newdocs_scalars.py
+++ b/numpy/core/_add_newdocs_scalars.py
@@ -204,7 +204,11 @@ add_newdoc_for_scalar_type('str_', ['unicode_'],
     r"""
     A unicode string.
 
-    When used in arrays, this type strips trailing null codepoints.
+    This type strips trailing null codepoints.
+
+    >>> s = np.str_("abc\x00")
+    >>> s
+    'abc'
 
     Unlike the builtin `str`, this supports the :ref:`python:bufferobjects`, exposing its
     contents as UCS4:
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index dcfb6e6a8..62cd52707 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -1096,7 +1096,7 @@ def format_float_scientific(x, precision=None, unique=True, trim='k',
         identify the value may be printed and rounded unbiased.
 
         -- versionadded:: 1.21.0
-        
+
     Returns
     -------
     rep : string
@@ -1181,7 +1181,7 @@ def format_float_positional(x, precision=None, unique=True,
         Minimum number of digits to print. Only has an effect if `unique=True`
         in which case additional digits past those necessary to uniquely
         identify the value may be printed, rounding the last additional digit.
-        
+
         -- versionadded:: 1.21.0
 
     Returns
@@ -1339,13 +1339,29 @@ class TimedeltaFormat(_TimelikeFormat):
 
 
 class SubArrayFormat:
-    def __init__(self, format_function):
+    def __init__(self, format_function, **options):
         self.format_function = format_function
+        self.threshold = options['threshold']
+        self.edge_items = options['edgeitems']
+
+    def __call__(self, a):
+        self.summary_insert = "..." if a.size > self.threshold else ""
+        return self.format_array(a)
+
+    def format_array(self, a):
+        if np.ndim(a) == 0:
+            return self.format_function(a)
+
+        if self.summary_insert and a.shape[0] > 2*self.edge_items:
+            formatted = (
+                [self.format_array(a_) for a_ in a[:self.edge_items]]
+                + [self.summary_insert]
+                + [self.format_array(a_) for a_ in a[-self.edge_items:]]
+            )
+        else:
+            formatted = [self.format_array(a_) for a_ in a]
 
-    def __call__(self, arr):
-        if arr.ndim <= 1:
-            return "[" + ", ".join(self.format_function(a) for a in arr) + "]"
-        return "[" + ", ".join(self.__call__(a) for a in arr) + "]"
+        return "[" + ", ".join(formatted) + "]"
 
 
 class StructuredVoidFormat:
@@ -1369,7 +1385,7 @@ class StructuredVoidFormat:
         for field_name in data.dtype.names:
             format_function = _get_format_function(data[field_name], **options)
             if data.dtype[field_name].shape != ():
-                format_function = SubArrayFormat(format_function)
+                format_function = SubArrayFormat(format_function, **options)
             format_functions.append(format_function)
         return cls(format_functions)
 
@@ -1428,7 +1444,7 @@ def dtype_is_implied(dtype):
     # not just void types can be structured, and names are not part of the repr
     if dtype.names is not None:
         return False
-    
+
     # should care about endianness *unless size is 1* (e.g., int8, bool)
     if not dtype.isnative:
         return False
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 1fe2e43c0..d0306bb72 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -799,7 +799,7 @@ defdict = {
           None,
           TD('e', dispatch=[('loops_umath_fp', 'e')]),
           TD('f', dispatch=[('loops_trigonometric', 'f')]),
-          TD('d', dispatch=[('loops_umath_fp', 'd')]),
+          TD('d', dispatch=[('loops_trigonometric', 'd')]),
           TD('g' + cmplx, f='cos'),
           TD(P, f='cos'),
           ),
@@ -809,7 +809,7 @@ defdict = {
           None,
           TD('e', dispatch=[('loops_umath_fp', 'e')]),
           TD('f', dispatch=[('loops_trigonometric', 'f')]),
-          TD('d', dispatch=[('loops_umath_fp', 'd')]),
+          TD('d', dispatch=[('loops_trigonometric', 'd')]),
           TD('g' + cmplx, f='sin'),
           TD(P, f='sin'),
           ),
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index 05f947c15..1695b4d27 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -3833,7 +3833,7 @@ add_newdoc('numpy.core.umath', 'sqrt',
     --------
     emath.sqrt
         A version which returns complex numbers when given negative reals.
-        Note: 0.0 and -0.0 are handled differently for complex inputs.
+        Note that 0.0 and -0.0 are handled differently for complex inputs.
 
     Notes
     -----
diff --git a/numpy/core/function_base.py b/numpy/core/function_base.py
index c82b495b1..00e4e6b0e 100644
--- a/numpy/core/function_base.py
+++ b/numpy/core/function_base.py
@@ -168,7 +168,7 @@ def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None,
     y += start
 
     if endpoint and num > 1:
-        y[-1] = stop
+        y[-1, ...] = stop
 
     if axis != 0:
         y = _nx.moveaxis(y, 0, axis)
diff --git a/numpy/core/include/numpy/_dtype_api.h b/numpy/core/include/numpy/_dtype_api.h
index 2f801eace..c397699c7 100644
--- a/numpy/core/include/numpy/_dtype_api.h
+++ b/numpy/core/include/numpy/_dtype_api.h
@@ -5,14 +5,14 @@
 #ifndef NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_
 #define NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_
 
-#define __EXPERIMENTAL_DTYPE_API_VERSION 9
+#define __EXPERIMENTAL_DTYPE_API_VERSION 10
 
 struct PyArrayMethodObject_tag;
 
 /*
  * Largely opaque struct for DType classes (i.e. metaclass instances).
  * The internal definition is currently in `ndarraytypes.h` (export is a bit
- * more complex because `PyArray_Descr` is a DTypeMeta internall but not
+ * more complex because `PyArray_Descr` is a DTypeMeta internally but not
  * externally).
  */
 #if !(defined(NPY_INTERNAL_BUILD) && NPY_INTERNAL_BUILD)
@@ -199,9 +199,8 @@ typedef int (get_reduction_initial_function)(
         PyArrayMethod_Context *context, npy_bool reduction_is_empty,
         char *initial);
 
-
 /*
- * The following functions are only used be the wrapping array method defined
+ * The following functions are only used by the wrapping array method defined
  * in umath/wrapping_array_method.c
  */
 
@@ -257,14 +256,13 @@ typedef int translate_loop_descrs_func(int nin, int nout,
  * strided-loop function. This is designed for loops that need to visit every
  * element of a single array.
  *
- * Currently this is only used for array clearing, via the
- * NPY_DT_get_clear_loop, api hook, particularly for arrays storing embedded
- * references to python objects or heap-allocated data. If you define a dtype
- * that uses embedded references, the NPY_ITEM_REFCOUNT flag must be set on the
- * dtype instance.
+ * Currently this is used for array clearing, via the NPY_DT_get_clear_loop
+ * API hook, and zero-filling, via the NPY_DT_get_fill_zero_loop API hook.
+ * These are most useful for handling arrays storing embedded references to
+ * python objects or heap-allocated data.
  *
  * The `void *traverse_context` is passed in because we may need to pass in
- * Intepreter state or similar in the futurem, but we don't want to pass in
+ * Intepreter state or similar in the future, but we don't want to pass in
  * a full context (with pointers to dtypes, method, caller which all make
  * no sense for a traverse function).
  *
@@ -280,9 +278,10 @@ typedef int (traverse_loop_function)(
 /*
  * Simplified get_loop function specific to dtype traversal
  *
- * Currently this is only used for clearing arrays. It should set the flags
- * needed for the traversal loop and set out_loop to the loop function, which
- * must be a valid traverse_loop_function pointer.
+ * It should set the flags needed for the traversal loop and set out_loop to the
+ * loop function, which must be a valid traverse_loop_function
+ * pointer. Currently this is used for zero-filling and clearing arrays storing
+ * embedded references.
  *
  */
 typedef int (get_traverse_loop_function)(
@@ -319,6 +318,7 @@ typedef int (get_traverse_loop_function)(
 #define NPY_DT_setitem 7
 #define NPY_DT_getitem 8
 #define NPY_DT_get_clear_loop 9
+#define NPY_DT_get_fill_zero_loop 10
 
 // These PyArray_ArrFunc slots will be deprecated and replaced eventually
 // getitem and setitem can be defined as a performance optimization;
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h
index 3b31bcf2d..728cedbf1 100644
--- a/numpy/core/include/numpy/npy_common.h
+++ b/numpy/core/include/numpy/npy_common.h
@@ -105,6 +105,14 @@
     #define NPY_FINLINE static
 #endif
 
+#if defined(_MSC_VER)
+    #define NPY_NOINLINE static __declspec(noinline)
+#elif defined(__GNUC__) || defined(__clang__)
+    #define NPY_NOINLINE static __attribute__((noinline))
+#else
+    #define NPY_NOINLINE static
+#endif
+
 #ifdef HAVE___THREAD
     #define NPY_TLS __thread
 #else
diff --git a/numpy/core/meson.build b/numpy/core/meson.build
index e968fb336..e51a7ba3b 100644
--- a/numpy/core/meson.build
+++ b/numpy/core/meson.build
@@ -430,6 +430,7 @@ _numpyconfig_h = configure_file(
 # ----------------------------
 
 staticlib_cflags = []
+staticlib_cppflags = []
 if cc.get_id() == 'msvc'
   # Disable voltbl section for vc142 to allow link using mingw-w64; see:
   # https://github.com/matthew-brett/dll_investigation/issues/1#issuecomment-1100468171
@@ -443,6 +444,7 @@ endif
 # https://mesonbuild.com/Build-options.html#build-options
 if get_option('disable-simd-optimizations')
   staticlib_cflags += '-DNPY_DISABLE_OPTIMIZATION'
+  staticlib_cppflags += '-DNPY_DISABLE_OPTIMIZATION'
 endif
 
 npy_math_internal_h = custom_target(
@@ -455,12 +457,13 @@ npymath_sources = [
   src_file.process('src/npymath/ieee754.c.src'),
   src_file.process('src/npymath/npy_math_complex.c.src'),
   npy_math_internal_h,
-  'src/npymath/halffloat.c',
+  'src/npymath/halffloat.cpp',
   'src/npymath/npy_math.c',
 ]
 npymath_lib = static_library('npymath',
   npymath_sources,
   c_args: staticlib_cflags,
+  cpp_args: staticlib_cppflags,
   include_directories: ['include', 'src/npymath', 'src/common'],
   dependencies: py_dep,
   install: true,
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 680c2a5f6..8ef012f70 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -669,7 +669,7 @@ def configuration(parent_package='',top_path=None):
                        # join('src', 'npymath', 'ieee754.cpp'),
                        join('src', 'npymath', 'ieee754.c.src'),
                        join('src', 'npymath', 'npy_math_complex.c.src'),
-                       join('src', 'npymath', 'halffloat.c'),
+                       join('src', 'npymath', 'halffloat.cpp'),
                        ]
 
     config.add_installed_library('npymath',
@@ -727,7 +727,8 @@ def configuration(parent_package='',top_path=None):
             join('src', 'common', 'numpyos.h'),
             join('src', 'common', 'npy_cpu_dispatch.h'),
             join('src', 'common', 'simd', 'simd.h'),
-            ]
+            join('src', 'common', 'common.hpp'),
+        ]
 
     common_src = [
             join('src', 'common', 'array_assign.c'),
@@ -1010,9 +1011,6 @@ def configuration(parent_package='',top_path=None):
         svml_objs.sort()
 
     config.add_extension('_multiarray_umath',
-                         # Forcing C language even though we have C++ sources.
-                         # It forces the C linker and don't link C++ runtime.
-                         language = 'c',
                          sources=multiarray_src + umath_src +
                                  common_src +
                                  [generate_config_h,
diff --git a/numpy/core/src/common/common.hpp b/numpy/core/src/common/common.hpp
index 47d790bcf..44ba449d8 100644
--- a/numpy/core/src/common/common.hpp
+++ b/numpy/core/src/common/common.hpp
@@ -4,8 +4,11 @@
  * The following C++ headers are safe to be used standalone, however,
  * they are gathered to make it easy for us and for the future need to support PCH.
  */
+#include "npdef.hpp"
+#include "utils.hpp"
 #include "npstd.hpp"
 #include "half.hpp"
 #include "meta.hpp"
+#include "float_status.hpp"
 
 #endif // NUMPY_CORE_SRC_COMMON_COMMON_HPP
diff --git a/numpy/core/src/common/float_status.hpp b/numpy/core/src/common/float_status.hpp
new file mode 100644
index 000000000..8e4d5e06a
--- /dev/null
+++ b/numpy/core/src/common/float_status.hpp
@@ -0,0 +1,134 @@
+#ifndef NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP
+#define NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP
+
+#include "npstd.hpp"
+
+#include <fenv.h>
+
+namespace np {
+
+/// @addtogroup cpp_core_utility
+/// @{
+/**
+ * Class wraps floating-point environment operations,
+ * provides lazy access to its functionality.
+ */
+class FloatStatus {
+ public:
+/*
+ * According to the C99 standard FE_DIVBYZERO, etc. may not be provided when
+ * unsupported.  In such cases NumPy will not report these correctly, but we
+ * should still allow compiling (whether tests pass or not).
+ * By defining them as 0 locally, we make them no-ops.  Unlike these defines,
+ * for example `musl` still defines all of the functions (as no-ops):
+ *     https://git.musl-libc.org/cgit/musl/tree/src/fenv/fenv.c
+ * and does similar replacement in its tests:
+ * http://nsz.repo.hu/git/?p=libc-test;a=blob;f=src/common/mtest.h;h=706c1ba23ea8989b17a2f72ed1a919e187c06b6a;hb=HEAD#l30
+ */
+#ifdef FE_DIVBYZERO
+    static constexpr int kDivideByZero = FE_DIVBYZERO;
+#else
+    static constexpr int kDivideByZero = 0;
+#endif
+#ifdef FE_INVALID
+    static constexpr int kInvalid = FE_INVALID;
+#else
+    static constexpr int kInvalid = 0;
+#endif
+#ifdef FE_INEXACT
+    static constexpr int kInexact = FE_INEXACT;
+#else
+    static constexpr int kInexact = 0;
+#endif
+#ifdef FE_OVERFLOW
+    static constexpr int kOverflow = FE_OVERFLOW;
+#else
+    static constexpr int kOverflow = 0;
+#endif
+#ifdef FE_UNDERFLOW
+    static constexpr int kUnderflow = FE_UNDERFLOW;
+#else
+    static constexpr int kUnderflow = 0;
+#endif
+    static constexpr int kAllExcept = (kDivideByZero | kInvalid | kInexact |
+                                       kOverflow | kUnderflow);
+
+    FloatStatus(bool clear_on_dst=true)
+        : clear_on_dst_(clear_on_dst)
+    {
+        if constexpr (kAllExcept != 0) {
+            fpstatus_ = fetestexcept(kAllExcept);
+        }
+        else {
+            fpstatus_ = 0;
+        }
+    }
+    ~FloatStatus()
+    {
+        if constexpr (kAllExcept != 0) {
+            if (fpstatus_ != 0 && clear_on_dst_) {
+                feclearexcept(kAllExcept);
+            }
+        }
+    }
+    constexpr bool IsDivideByZero() const
+    {
+        return (fpstatus_ & kDivideByZero) != 0;
+    }
+    constexpr bool IsInexact() const
+    {
+        return (fpstatus_ & kInexact) != 0;
+    }
+    constexpr bool IsInvalid() const
+    {
+        return (fpstatus_ & kInvalid) != 0;
+    }
+    constexpr bool IsOverFlow() const
+    {
+        return (fpstatus_ & kOverflow) != 0;
+    }
+    constexpr bool IsUnderFlow() const
+    {
+        return (fpstatus_ & kUnderflow) != 0;
+    }
+    static void RaiseDivideByZero()
+    {
+        if constexpr (kDivideByZero != 0) {
+            feraiseexcept(kDivideByZero);
+        }
+    }
+    static void RaiseInexact()
+    {
+        if constexpr (kInexact != 0) {
+            feraiseexcept(kInexact);
+        }
+    }
+    static void RaiseInvalid()
+    {
+        if constexpr (kInvalid != 0) {
+            feraiseexcept(kInvalid);
+        }
+    }
+    static void RaiseOverflow()
+    {
+        if constexpr (kOverflow != 0) {
+            feraiseexcept(kOverflow);
+        }
+    }
+    static void RaiseUnderflow()
+    {
+        if constexpr (kUnderflow != 0) {
+            feraiseexcept(kUnderflow);
+        }
+    }
+
+  private:
+    bool clear_on_dst_;
+    int fpstatus_;
+};
+
+/// @} cpp_core_utility
+} // namespace np
+
+#endif // NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP
+
diff --git a/numpy/core/src/common/half.hpp b/numpy/core/src/common/half.hpp
index 399f2fa79..e5f3f7a40 100644
--- a/numpy/core/src/common/half.hpp
+++ b/numpy/core/src/common/half.hpp
@@ -3,11 +3,14 @@
 
 #include "npstd.hpp"
 
+#include "npy_cpu_dispatch.h" // NPY_HAVE_CPU_FEATURES
+#include "half_private.hpp"
+
 // TODO(@seiko2plus):
 // - covers half-precision operations that being supported by numpy/halffloat.h
-// - support __fp16
-// - optimize x86 half<->single via cpu_fp16
-// - optimize ppc64 half<->single via cpu_vsx3
+// - add support for arithmetic operations
+// - enables __fp16 causes massive FP exceptions on aarch64,
+//   needs a deep investigation
 
 namespace np {
 
@@ -16,48 +19,246 @@ namespace np {
 
 /// Provides a type that implements 16-bit floating point (half-precision).
 /// This type is ensured to be 16-bit size.
+#if 1 // ndef __ARM_FP16_FORMAT_IEEE
 class Half final {
- public:
-    /// @name Public Constructors
-    /// @{
+  public:
+    /// Whether `Half` has a full native HW support.
+    static constexpr bool kNative = false;
+    /// Whether `Half` has a native HW support for single/double conversion.
+    template<typename T>
+    static constexpr bool kNativeConversion = (
+        (
+            std::is_same_v<T, float> &&
+        #if defined(NPY_HAVE_FP16) || defined(NPY_HAVE_VSX3)
+            true
+        #else
+            false
+        #endif
+        ) || (
+            std::is_same_v<T, double> &&
+        #if defined(NPY_HAVE_AVX512FP16) || defined(NPY_HAVE_VSX3)
+            true
+        #else
+            false
+        #endif
+        )
+    );
 
     /// Default constructor. initialize nothing.
     Half() = default;
-    /// Copy.
-    Half(const Half &r)
+
+    /// Constract from float
+    /// If there are no hardware optimization available, rounding will always
+    /// be set to ties to even.
+    explicit Half(float f)
     {
-        data_.u = r.data_.u;
+    #if defined(NPY_HAVE_FP16)
+        __m128 mf = _mm_load_ss(&f);
+        bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_cvtps_ph(mf, _MM_FROUND_TO_NEAREST_INT)));
+    #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
+        __vector float vf32 = vec_splats(f);
+        __vector unsigned short vf16;
+        __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (vf32));
+        bits_ = vec_extract(vf16, 0);
+    #else
+        bits_ = half_private::FromFloatBits(BitCast<uint32_t>(f));
+    #endif
     }
 
-    /// @}
+    /// Construct from double.
+    /// If there are no hardware optimization available, rounding will always
+    /// be set to ties to even.
+    explicit Half(double f)
+    {
+    #if defined(NPY_HAVE_AVX512FP16)
+        __m128d md = _mm_load_sd(&f);
+        bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_castph_si128(_mm_cvtpd_ph(mf))));
+    #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
+        __vector double vf64 = vec_splats(f);
+        __vector unsigned short vf16;
+        __asm__ __volatile__ ("xvcvdphp %x0,%x1" : "=wa" (vf16) : "wa" (vf64));
+        bits_ = vec_extract(vf16, 0);
+    #else
+        bits_ = half_private::FromDoubleBits(BitCast<uint64_t>(f));
+    #endif
+    }
+
+    /// Cast to float
+    explicit operator float() const
+    {
+    #if defined(NPY_HAVE_FP16)
+        float ret;
+        _mm_store_ss(&ret, _mm_cvtph_ps(_mm_cvtsi32_si128(bits_)));
+        return ret;
+    #elif defined(NPY_HAVE_VSX3) && defined(vec_extract_fp_from_shorth)
+        return vec_extract(vec_extract_fp_from_shorth(vec_splats(bits_)), 0);
+    #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
+        __vector float vf32;
+        __asm__ __volatile__("xvcvhpsp %x0,%x1"
+                             : "=wa"(vf32)
+                             : "wa"(vec_splats(bits_.u)));
+        return vec_extract(vf32, 0);
+    #else
+        return BitCast<float>(half_private::ToFloatBits(bits_));
+    #endif
+    }
+
+    /// Cast to double
+    explicit operator double() const
+    {
+    #if defined(NPY_HAVE_AVX512FP16)
+        double ret;
+        _mm_store_sd(&ret, _mm_cvtph_pd(_mm_castsi128_ph(_mm_cvtsi32_si128(bits_))));
+        return ret;
+    #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
+        __vector float vf64;
+        __asm__ __volatile__("xvcvhpdp %x0,%x1"
+                             : "=wa"(vf32)
+                             : "wa"(vec_splats(bits_)));
+        return vec_extract(vf64, 0);
+    #else
+        return BitCast<double>(half_private::ToDoubleBits(bits_));
+    #endif
+    }
 
     /// Returns a new Half constracted from the IEEE 754 binary16.
-    /// @param b the value of binary16.
-    static Half FromBits(uint16_t b)
+    static constexpr Half FromBits(uint16_t bits)
     {
-        Half f;
-        f.data_.u = b;
-        return f;
+        Half h{};
+        h.bits_ = bits;
+        return h;
     }
     /// Returns the IEEE 754 binary16 representation.
-    uint16_t Bits() const
+    constexpr uint16_t Bits() const
     {
-        return data_.u;
+        return bits_;
     }
 
- private:
-    union {
-        uint16_t u;
-/*
-TODO(@seiko2plus): support __fp16
-#ifdef NPY_HAVE_HW_FP16
-        __fp16 f;
-#endif
-*/
-    } data_;
+    /// @name Comparison operators (orderd)
+    /// @{
+    constexpr bool operator==(Half r) const
+    {
+        return !(IsNaN() || r.IsNaN()) && Equal(r);
+    }
+    constexpr bool operator<(Half r) const
+    {
+        return !(IsNaN() || r.IsNaN()) && Less(r);
+    }
+    constexpr bool operator<=(Half r) const
+    {
+        return !(IsNaN() || r.IsNaN()) && LessEqual(r);
+    }
+    constexpr bool operator>(Half r) const
+    {
+        return r < *this;
+    }
+    constexpr bool operator>=(Half r) const
+    {
+        return r <= *this;
+    }
+    /// @}
+
+    /// @name Comparison operators (unorderd)
+    /// @{
+    constexpr bool operator!=(Half r) const
+    {
+        return !(*this == r);
+    }
+    /// @} Comparison operators
+
+    /// @name Comparison with no guarantee of NaN behavior
+    /// @{
+    constexpr bool Less(Half r) const
+    {
+        uint_fast16_t a = static_cast<uint_fast16_t>(bits_),
+                      b = static_cast<uint_fast16_t>(r.bits_);
+        bool sign_a = (a & 0x8000u) == 0x8000u;
+        bool sign_b = (b & 0x8000u) == 0x8000u;
+        // if both `a` and `b` have same sign
+        //   Test if `a` > `b` when `a` has the sign
+        //        or `a` < `b` when is not.
+        //   And make sure they are not equal to each other
+        //       in case of both are equal to +-0
+        // else
+        //   Test if  `a` has the sign.
+        //        and `a` != -0.0 and `b` != 0.0
+        return (sign_a == sign_b) ? (sign_a ^ (a < b)) && (a != b)
+                                  : sign_a && ((a | b) != 0x8000u);
+    }
+    constexpr bool LessEqual(Half r) const
+    {
+        uint_fast16_t a = static_cast<uint_fast16_t>(bits_),
+                      b = static_cast<uint_fast16_t>(r.bits_);
+        bool sign_a = (a & 0x8000u) == 0x8000u;
+        bool sign_b = (b & 0x8000u) == 0x8000u;
+        // if both `a` and `b` have same sign
+        //   Test if `a` > `b` when `a` has the sign
+        //        or `a` < `b` when is not.
+        //        or a == b (needed even if we used <= above instead
+        //                   since testing +-0 still required)
+        // else
+        //   Test if `a` has the sign
+        //        or `a` and `b` equal to +-0.0
+        return (sign_a == sign_b) ? (sign_a ^ (a < b)) || (a == b)
+                                  : sign_a || ((a | b) == 0x8000u);
+    }
+    constexpr bool Equal(Half r) const
+    {
+        // fast16 cast is not worth it, since unpack op should involved.
+        uint16_t a = bits_, b = r.bits_;
+        return a == b || ((a | b) == 0x8000u);
+    }
+    /// @} Comparison
+
+    /// @name Properties
+    // @{
+    constexpr bool IsNaN() const
+    {
+        return ((bits_ & 0x7c00u) == 0x7c00u) &&
+               ((bits_ & 0x03ffu) != 0);
+    }
+    /// @} Properties
+
+  private:
+    uint16_t bits_;
+};
+#else // __ARM_FP16_FORMAT_IEEE
+class Half final {
+  public:
+    static constexpr bool kNative = true;
+    template<typename T>
+    static constexpr bool kNativeConversion = (
+        std::is_same_v<T, float> || std::is_same_v<T, double>
+    );
+    Half() = default;
+    constexpr Half(__fp16 h) : half_(h)
+    {}
+    constexpr operator __fp16() const
+    { return half_; }
+    static Half FromBits(uint16_t bits)
+    {
+        Half h;
+        h.half_ = BitCast<__fp16>(bits);
+        return h;
+    }
+    uint16_t Bits() const
+    { return BitCast<uint16_t>(half_); }
+    constexpr bool Less(Half r) const
+    { return half_ < r.half_; }
+    constexpr bool LessEqual(Half r) const
+    { return half_ <= r.half_; }
+    constexpr bool Equal(Half r) const
+    { return half_ == r.half_; }
+    constexpr bool IsNaN() const
+    { return half_ != half_; }
+
+  private:
+    __fp16 half_;
 };
+#endif // __ARM_FP16_FORMAT_IEEE
 
 /// @} cpp_core_types
 
 } // namespace np
+
 #endif // NUMPY_CORE_SRC_COMMON_HALF_HPP
diff --git a/numpy/core/src/common/half_private.hpp b/numpy/core/src/common/half_private.hpp
new file mode 100644
index 000000000..7a64eb397
--- /dev/null
+++ b/numpy/core/src/common/half_private.hpp
@@ -0,0 +1,330 @@
+#ifndef NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP
+#define NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP
+
+#include "npstd.hpp"
+#include "float_status.hpp"
+
+/*
+ * The following functions that emulating float/double/half conversions
+ * are copied from npymath without any changes to its functionalty.
+ */
+namespace np { namespace half_private {
+
+template<bool gen_overflow=true, bool gen_underflow=true, bool round_even=true>
+inline uint16_t FromFloatBits(uint32_t f)
+{
+    uint32_t f_exp, f_sig;
+    uint16_t h_sgn, h_exp, h_sig;
+
+    h_sgn = (uint16_t) ((f&0x80000000u) >> 16);
+    f_exp = (f&0x7f800000u);
+
+    /* Exponent overflow/NaN converts to signed inf/NaN */
+    if (f_exp >= 0x47800000u) {
+        if (f_exp == 0x7f800000u) {
+            /* Inf or NaN */
+            f_sig = (f&0x007fffffu);
+            if (f_sig != 0) {
+                /* NaN - propagate the flag in the significand... */
+                uint16_t ret = (uint16_t) (0x7c00u + (f_sig >> 13));
+                /* ...but make sure it stays a NaN */
+                if (ret == 0x7c00u) {
+                    ret++;
+                }
+                return h_sgn + ret;
+            } else {
+                /* signed inf */
+                return (uint16_t) (h_sgn + 0x7c00u);
+            }
+        } else {
+            if constexpr (gen_overflow) {
+                /* overflow to signed inf */
+                FloatStatus::RaiseOverflow();
+            }
+            return (uint16_t) (h_sgn + 0x7c00u);
+        }
+    }
+
+    /* Exponent underflow converts to a subnormal half or signed zero */
+    if (f_exp <= 0x38000000u) {
+        /*
+         * Signed zeros, subnormal floats, and floats with small
+         * exponents all convert to signed zero half-floats.
+         */
+        if (f_exp < 0x33000000u) {
+            if constexpr (gen_underflow) {
+                /* If f != 0, it underflowed to 0 */
+                if ((f&0x7fffffff) != 0) {
+                    FloatStatus::RaiseUnderflow();
+                }
+            }
+            return h_sgn;
+        }
+        /* Make the subnormal significand */
+        f_exp >>= 23;
+        f_sig = (0x00800000u + (f&0x007fffffu));
+        if constexpr (gen_underflow) {
+            /* If it's not exactly represented, it underflowed */
+            if ((f_sig&(((uint32_t)1 << (126 - f_exp)) - 1)) != 0) {
+                FloatStatus::RaiseUnderflow();
+            }
+        }
+        /*
+         * Usually the significand is shifted by 13. For subnormals an
+         * additional shift needs to occur. This shift is one for the largest
+         * exponent giving a subnormal `f_exp = 0x38000000 >> 23 = 112`, which
+         * offsets the new first bit. At most the shift can be 1+10 bits.
+         */
+        f_sig >>= (113 - f_exp);
+        /* Handle rounding by adding 1 to the bit beyond half precision */
+        if constexpr (round_even) {
+            /*
+             * If the last bit in the half significand is 0 (already even), and
+             * the remaining bit pattern is 1000...0, then we do not add one
+             * to the bit after the half significand. However, the (113 - f_exp)
+             * shift can lose up to 11 bits, so the || checks them in the original.
+             * In all other cases, we can just add one.
+             */
+            if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) {
+                f_sig += 0x00001000u;
+            }
+        }
+        else {
+            f_sig += 0x00001000u;
+        }
+        h_sig = (uint16_t) (f_sig >> 13);
+        /*
+         * If the rounding causes a bit to spill into h_exp, it will
+         * increment h_exp from zero to one and h_sig will be zero.
+         * This is the correct result.
+         */
+        return (uint16_t) (h_sgn + h_sig);
+    }
+
+    /* Regular case with no overflow or underflow */
+    h_exp = (uint16_t) ((f_exp - 0x38000000u) >> 13);
+    /* Handle rounding by adding 1 to the bit beyond half precision */
+    f_sig = (f&0x007fffffu);
+    if constexpr (round_even) {
+        /*
+         * If the last bit in the half significand is 0 (already even), and
+         * the remaining bit pattern is 1000...0, then we do not add one
+         * to the bit after the half significand.  In all other cases, we do.
+         */
+        if ((f_sig&0x00003fffu) != 0x00001000u) {
+            f_sig += 0x00001000u;
+        }
+    }
+    else {
+        f_sig += 0x00001000u;
+    }
+    h_sig = (uint16_t) (f_sig >> 13);
+    /*
+     * If the rounding causes a bit to spill into h_exp, it will
+     * increment h_exp by one and h_sig will be zero.  This is the
+     * correct result.  h_exp may increment to 15, at greatest, in
+     * which case the result overflows to a signed inf.
+     */
+    if constexpr (gen_overflow) {
+        h_sig += h_exp;
+        if (h_sig == 0x7c00u) {
+            FloatStatus::RaiseOverflow();
+        }
+        return h_sgn + h_sig;
+    }
+    else {
+        return h_sgn + h_exp + h_sig;
+    }
+}
+
+template<bool gen_overflow=true, bool gen_underflow=true, bool round_even=true>
+inline uint16_t FromDoubleBits(uint64_t d)
+{
+    uint64_t d_exp, d_sig;
+    uint16_t h_sgn, h_exp, h_sig;
+
+    h_sgn = (d&0x8000000000000000ULL) >> 48;
+    d_exp = (d&0x7ff0000000000000ULL);
+
+    /* Exponent overflow/NaN converts to signed inf/NaN */
+    if (d_exp >= 0x40f0000000000000ULL) {
+        if (d_exp == 0x7ff0000000000000ULL) {
+            /* Inf or NaN */
+            d_sig = (d&0x000fffffffffffffULL);
+            if (d_sig != 0) {
+                /* NaN - propagate the flag in the significand... */
+                uint16_t ret = (uint16_t) (0x7c00u + (d_sig >> 42));
+                /* ...but make sure it stays a NaN */
+                if (ret == 0x7c00u) {
+                    ret++;
+                }
+                return h_sgn + ret;
+            } else {
+                /* signed inf */
+                return h_sgn + 0x7c00u;
+            }
+        } else {
+            /* overflow to signed inf */
+            if constexpr (gen_overflow) {
+                FloatStatus::RaiseOverflow();
+            }
+            return h_sgn + 0x7c00u;
+        }
+    }
+
+    /* Exponent underflow converts to subnormal half or signed zero */
+    if (d_exp <= 0x3f00000000000000ULL) {
+        /*
+         * Signed zeros, subnormal floats, and floats with small
+         * exponents all convert to signed zero half-floats.
+         */
+        if (d_exp < 0x3e60000000000000ULL) {
+            if constexpr (gen_underflow) {
+                /* If d != 0, it underflowed to 0 */
+                if ((d&0x7fffffffffffffffULL) != 0) {
+                    FloatStatus::RaiseUnderflow();
+                }
+            }
+            return h_sgn;
+        }
+        /* Make the subnormal significand */
+        d_exp >>= 52;
+        d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL));
+        if constexpr (gen_underflow) {
+            /* If it's not exactly represented, it underflowed */
+            if ((d_sig&(((uint64_t)1 << (1051 - d_exp)) - 1)) != 0) {
+                FloatStatus::RaiseUnderflow();
+            }
+        }
+        /*
+         * Unlike floats, doubles have enough room to shift left to align
+         * the subnormal significand leading to no loss of the last bits.
+         * The smallest possible exponent giving a subnormal is:
+         * `d_exp = 0x3e60000000000000 >> 52 = 998`. All larger subnormals are
+         * shifted with respect to it. This adds a shift of 10+1 bits the final
+         * right shift when comparing it to the one in the normal branch.
+         */
+        assert(d_exp - 998 >= 0);
+        d_sig <<= (d_exp - 998);
+        /* Handle rounding by adding 1 to the bit beyond half precision */
+        if constexpr (round_even) {
+            /*
+             * If the last bit in the half significand is 0 (already even), and
+             * the remaining bit pattern is 1000...0, then we do not add one
+             * to the bit after the half significand.  In all other cases, we do.
+             */
+            if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) {
+                d_sig += 0x0010000000000000ULL;
+            }
+        }
+        else {
+            d_sig += 0x0010000000000000ULL;
+        }
+        h_sig = (uint16_t) (d_sig >> 53);
+        /*
+         * If the rounding causes a bit to spill into h_exp, it will
+         * increment h_exp from zero to one and h_sig will be zero.
+         * This is the correct result.
+         */
+        return h_sgn + h_sig;
+    }
+
+    /* Regular case with no overflow or underflow */
+    h_exp = (uint16_t) ((d_exp - 0x3f00000000000000ULL) >> 42);
+    /* Handle rounding by adding 1 to the bit beyond half precision */
+    d_sig = (d&0x000fffffffffffffULL);
+    if constexpr (round_even) {
+        /*
+         * If the last bit in the half significand is 0 (already even), and
+         * the remaining bit pattern is 1000...0, then we do not add one
+         * to the bit after the half significand.  In all other cases, we do.
+         */
+        if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
+            d_sig += 0x0000020000000000ULL;
+        }
+    }
+    else {
+        d_sig += 0x0000020000000000ULL;
+    }
+    h_sig = (uint16_t) (d_sig >> 42);
+
+    /*
+     * If the rounding causes a bit to spill into h_exp, it will
+     * increment h_exp by one and h_sig will be zero.  This is the
+     * correct result.  h_exp may increment to 15, at greatest, in
+     * which case the result overflows to a signed inf.
+     */
+    if constexpr (gen_overflow) {
+        h_sig += h_exp;
+        if (h_sig == 0x7c00u) {
+            FloatStatus::RaiseOverflow();
+        }
+        return h_sgn + h_sig;
+    }
+    else {
+        return h_sgn + h_exp + h_sig;
+    }
+}
+
+constexpr uint32_t ToFloatBits(uint16_t h)
+{
+    uint16_t h_exp = (h&0x7c00u);
+    uint32_t f_sgn = ((uint32_t)h&0x8000u) << 16;
+    switch (h_exp) {
+        case 0x0000u: { // 0 or subnormal
+            uint16_t h_sig = (h&0x03ffu);
+            // Signed zero
+            if (h_sig == 0) {
+                return f_sgn;
+            }
+            // Subnormal
+            h_sig <<= 1;
+            while ((h_sig&0x0400u) == 0) {
+                h_sig <<= 1;
+                h_exp++;
+            }
+            uint32_t f_exp = ((uint32_t)(127 - 15 - h_exp)) << 23;
+            uint32_t f_sig = ((uint32_t)(h_sig&0x03ffu)) << 13;
+            return f_sgn + f_exp + f_sig;
+        }
+        case 0x7c00u: // inf or NaN
+            // All-ones exponent and a copy of the significand
+            return f_sgn + 0x7f800000u + (((uint32_t)(h&0x03ffu)) << 13);
+        default: // normalized
+            // Just need to adjust the exponent and shift
+            return f_sgn + (((uint32_t)(h&0x7fffu) + 0x1c000u) << 13);
+    }
+}
+
+constexpr uint64_t ToDoubleBits(uint16_t h)
+{
+    uint16_t h_exp = (h&0x7c00u);
+    uint64_t d_sgn = ((uint64_t)h&0x8000u) << 48;
+    switch (h_exp) {
+        case 0x0000u: { // 0 or subnormal
+            uint16_t h_sig = (h&0x03ffu);
+            // Signed zero
+            if (h_sig == 0) {
+                return d_sgn;
+            }
+            // Subnormal
+            h_sig <<= 1;
+            while ((h_sig&0x0400u) == 0) {
+                h_sig <<= 1;
+                h_exp++;
+            }
+            uint64_t d_exp = ((uint64_t)(1023 - 15 - h_exp)) << 52;
+            uint64_t d_sig = ((uint64_t)(h_sig&0x03ffu)) << 42;
+            return d_sgn + d_exp + d_sig;
+        }
+        case 0x7c00u: // inf or NaN
+            // All-ones exponent and a copy of the significand
+            return d_sgn + 0x7ff0000000000000ULL + (((uint64_t)(h&0x03ffu)) << 42);
+        default: // normalized
+            // Just need to adjust the exponent and shift
+            return d_sgn + (((uint64_t)(h&0x7fffu) + 0xfc000u) << 42);
+    }
+}
+
+}} // namespace np::half_private
+#endif // NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP
diff --git a/numpy/core/src/common/npdef.hpp b/numpy/core/src/common/npdef.hpp
new file mode 100644
index 000000000..56a0df52e
--- /dev/null
+++ b/numpy/core/src/common/npdef.hpp
@@ -0,0 +1,28 @@
+#ifndef NUMPY_CORE_SRC_COMMON_NPDEF_HPP
+#define NUMPY_CORE_SRC_COMMON_NPDEF_HPP
+
+#if !defined(__cplusplus) || __cplusplus < 201703L
+    #error "NumPy requires a compiler with at least C++17 enabled"
+#endif
+
+/// @addtogroup cpp_core_defs
+/// @{
+
+/// Whether compiler supports C++20
+#if __cplusplus > 202002L
+    #define NP_HAS_CPP20 1
+#else
+    #define NP_HAS_CPP20 0
+#endif
+
+/// Wraps `__has_builtin`
+#if defined(__has_builtin)
+    #define NP_HAS_BUILTIN(INTRIN) __has_builtin(INTRIN)
+#else
+    #define NP_HAS_BUILTIN(INTRIN) 0
+#endif
+
+/// @} cpp_core_defs
+
+#endif // NUMPY_CORE_SRC_COMMON_NPDEF_HPP
+
diff --git a/numpy/core/src/common/npstd.hpp b/numpy/core/src/common/npstd.hpp
index 71993bd7c..ca664229a 100644
--- a/numpy/core/src/common/npstd.hpp
+++ b/numpy/core/src/common/npstd.hpp
@@ -31,6 +31,8 @@ using std::int64_t;
 using std::uintptr_t;
 using std::intptr_t;
 using std::complex;
+using std::uint_fast16_t;
+using std::uint_fast32_t;
 
 /** Guard for long double.
  *
diff --git a/numpy/core/src/common/utils.hpp b/numpy/core/src/common/utils.hpp
new file mode 100644
index 000000000..f847cab44
--- /dev/null
+++ b/numpy/core/src/common/utils.hpp
@@ -0,0 +1,51 @@
+#ifndef NUMPY_CORE_SRC_COMMON_UTILS_HPP
+#define NUMPY_CORE_SRC_COMMON_UTILS_HPP
+
+#include "npdef.hpp"
+
+#if NP_HAS_CPP20
+    #include <bit>
+#endif
+
+#include <type_traits>
+#include <string.h>
+
+namespace np {
+
+/** Create a value of type `To` from the bits of `from`.
+ *
+ * similar to `std::bit_cast` but compatible with C++17,
+ * should perform similar to `*reinterpret_cast<To*>(&from)`
+ * or through punning without expecting any undefined behaviors.
+ */
+template<typename To, typename From>
+#if NP_HAS_BUILTIN(__builtin_bit_cast) || NP_HAS_CPP20
+[[nodiscard]] constexpr
+#else
+inline
+#endif
+To BitCast(const From &from) noexcept
+{
+    static_assert(
+        sizeof(To) == sizeof(From),
+        "both data types must have the same size");
+
+    static_assert(
+        std::is_trivially_copyable_v<To> &&
+        std::is_trivially_copyable_v<From>,
+        "both data types must be trivially copyable");
+
+#if NP_HAS_CPP20
+    return std::bit_cast<To>(from);
+#elif NP_HAS_BUILTIN(__builtin_bit_cast)
+    return __builtin_bit_cast(To, from);
+#else
+    To to;
+    memcpy(&to, &from, sizeof(from));
+    return to;
+#endif
+}
+
+} // namespace np
+#endif // NUMPY_CORE_SRC_COMMON_UTILS_HPP
+
diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c
index d55a5752b..ba9118052 100644
--- a/numpy/core/src/multiarray/array_coercion.c
+++ b/numpy/core/src/multiarray/array_coercion.c
@@ -1025,53 +1025,6 @@ PyArray_DiscoverDTypeAndShape_Recursive(
             Py_DECREF(arr);
             arr = NULL;
         }
-        else if (curr_dims > 0 && curr_dims != max_dims) {
-            /*
-             * Deprecated 2020-12-09, NumPy 1.20
-             *
-             * See https://github.com/numpy/numpy/issues/17965
-             * Shapely had objects which are not sequences but did export
-             * the array-interface (and so are arguably array-like).
-             * Previously numpy would not use array-like information during
-             * shape discovery, so that it ended up acting as if this was
-             * an (unknown) scalar but with the specified dtype.
-             * Thus we ignore "scalars" here, as the value stored in the
-             * array should be acceptable.
-             */
-            if (PyArray_NDIM(arr) > 0 && NPY_UNLIKELY(!PySequence_Check(obj))) {
-                if (PyErr_WarnFormat(PyExc_FutureWarning, 1,
-                        "The input object of type '%s' is an array-like "
-                        "implementing one of the corresponding protocols "
-                        "(`__array__`, `__array_interface__` or "
-                        "`__array_struct__`); but not a sequence (or 0-D). "
-                        "In the future, this object will be coerced as if it "
-                        "was first converted using `np.array(obj)`. "
-                        "To retain the old behaviour, you have to either "
-                        "modify the type '%s', or assign to an empty array "
-                        "created with `np.empty(correct_shape, dtype=object)`.",
-                        Py_TYPE(obj)->tp_name, Py_TYPE(obj)->tp_name) < 0) {
-                    Py_DECREF(arr);
-                    return -1;
-                }
-                /*
-                 * Strangely enough, even though we threw away the result here,
-                 * we did use it during descriptor discovery, so promote it:
-                 */
-                if (update_shape(curr_dims, &max_dims, out_shape,
-                        0, NULL, NPY_FALSE, flags) < 0) {
-                    *flags |= FOUND_RAGGED_ARRAY;
-                    Py_DECREF(arr);
-                    return max_dims;
-                }
-                if (!(*flags & DESCRIPTOR_WAS_SET) && handle_promotion(
-                        out_descr, PyArray_DESCR(arr), fixed_DType, flags) < 0) {
-                    Py_DECREF(arr);
-                    return -1;
-                }
-                Py_DECREF(arr);
-                return max_dims;
-            }
-        }
     }
     if (arr != NULL) {
         /*
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index da8d23a26..573d0d606 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -128,24 +128,6 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype)
 }
 
 
-NPY_NO_EXPORT int
-_zerofill(PyArrayObject *ret)
-{
-    if (PyDataType_REFCHK(PyArray_DESCR(ret))) {
-        PyObject *zero = PyLong_FromLong(0);
-        PyArray_FillObjectArray(ret, zero);
-        Py_DECREF(zero);
-        if (PyErr_Occurred()) {
-            return -1;
-        }
-    }
-    else {
-        npy_intp n = PyArray_NBYTES(ret);
-        memset(PyArray_DATA(ret), 0, n);
-    }
-    return 0;
-}
-
 NPY_NO_EXPORT npy_bool
 _IsWriteable(PyArrayObject *ap)
 {
@@ -459,3 +441,32 @@ new_array_for_sum(PyArrayObject *ap1, PyArrayObject *ap2, PyArrayObject* out,
     }
 }
 
+NPY_NO_EXPORT int
+check_is_convertible_to_scalar(PyArrayObject *v)
+{
+    if (PyArray_NDIM(v) == 0) {
+        return 0;
+    }
+
+    /* Remove this if-else block when the deprecation expires */
+    if (PyArray_SIZE(v) == 1) {
+        /* Numpy 1.25.0, 2023-01-02 */
+        if (DEPRECATE(
+                "Conversion of an array with ndim > 0 to a scalar "
+                "is deprecated, and will error in future. "
+                "Ensure you extract a single element from your array "
+                "before performing this operation. "
+                "(Deprecated NumPy 1.25.)") < 0) {
+            return -1;
+        }
+        return 0;
+    } else {
+        PyErr_SetString(PyExc_TypeError,
+            "only length-1 arrays can be converted to Python scalars");
+        return -1;
+    }
+
+    PyErr_SetString(PyExc_TypeError,
+            "only 0-dimensional arrays can be converted to Python scalars");
+    return -1;
+}
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index 4e067b22c..cb9fadc4e 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -55,9 +55,6 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims,
 NPY_NO_EXPORT PyArray_Descr *
 _array_find_python_scalar_type(PyObject *op);
 
-NPY_NO_EXPORT int
-_zerofill(PyArrayObject *ret);
-
 NPY_NO_EXPORT npy_bool
 _IsWriteable(PyArrayObject *ap);
 
@@ -335,6 +332,13 @@ PyArray_TupleFromItems(int n, PyObject *const *items, int make_null_none)
     return tuple;
 }
 
+/*
+ * Returns 0 if the array has rank 0, -1 otherwise. Prints a deprecation
+ * warning for arrays of _size_ 1.
+ */
+NPY_NO_EXPORT int
+check_is_convertible_to_scalar(PyArrayObject *v);
+
 
 #include "ucsnarrow.h"
 
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index 53db5c577..de1cd075d 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -3913,21 +3913,6 @@ PyArray_InitializeObjectToObjectCast(void)
 }
 
 
-static int
-PyArray_SetClearFunctions(void)
-{
-    PyArray_DTypeMeta *Object = PyArray_DTypeFromTypeNum(NPY_OBJECT);
-    NPY_DT_SLOTS(Object)->get_clear_loop = &npy_get_clear_object_strided_loop;
-    Py_DECREF(Object);  /* use borrowed */
-
-    PyArray_DTypeMeta *Void = PyArray_DTypeFromTypeNum(NPY_VOID);
-    NPY_DT_SLOTS(Void)->get_clear_loop = &npy_get_clear_void_and_legacy_user_dtype_loop;
-    Py_DECREF(Void);  /* use borrowed */
-    return 0;
-}
-
-
-
 NPY_NO_EXPORT int
 PyArray_InitializeCasts()
 {
@@ -3947,8 +3932,5 @@ PyArray_InitializeCasts()
     if (PyArray_InitializeDatetimeCasts() < 0) {
         return -1;
     }
-    if (PyArray_SetClearFunctions() < 0) {
-        return -1;
-    }
     return 0;
 }
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index a6335c783..79a1905a7 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -715,6 +715,11 @@ PyArray_NewFromDescr_int(
     fa->base = (PyObject *)NULL;
     fa->weakreflist = (PyObject *)NULL;
 
+    /* needed for zero-filling logic below, defined and initialized up here
+       so cleanup logic can go in the fail block */
+    NPY_traverse_info fill_zero_info;
+    NPY_traverse_info_init(&fill_zero_info);
+
     if (nd > 0) {
         fa->dimensions = npy_alloc_cache_dim(2 * nd);
         if (fa->dimensions == NULL) {
@@ -784,6 +789,31 @@ PyArray_NewFromDescr_int(
 
 
     if (data == NULL) {
+        /* float errors do not matter and we do not release GIL */
+        NPY_ARRAYMETHOD_FLAGS zero_flags;
+        get_traverse_loop_function *get_fill_zero_loop =
+            NPY_DT_SLOTS(NPY_DTYPE(descr))->get_fill_zero_loop;
+        if (get_fill_zero_loop != NULL) {
+            if (get_fill_zero_loop(
+                    NULL, descr, 1, descr->elsize, &(fill_zero_info.func),
+                    &(fill_zero_info.auxdata), &zero_flags) < 0) {
+                goto fail;
+            }
+        }
+
+        /*
+         * We always want a zero-filled array allocated with calloc if
+         * NPY_NEEDS_INIT is set on the dtype, for safety.  We also want a
+         * zero-filled array if zeroed is set and the zero-filling loop isn't
+         * defined, for better performance.
+         *
+         * If the zero-filling loop is defined and zeroed is set, allocate
+         * with malloc and let the zero-filling loop fill the array buffer
+         * with valid zero values for the dtype.
+         */
+        int use_calloc = (PyDataType_FLAGCHK(descr, NPY_NEEDS_INIT) ||
+                          (zeroed && (fill_zero_info.func == NULL)));
+
         /* Store the handler in case the default is modified */
         fa->mem_handler = PyDataMem_GetHandler();
         if (fa->mem_handler == NULL) {
@@ -801,11 +831,8 @@ PyArray_NewFromDescr_int(
                 fa->strides[i] = 0;
             }
         }
-        /*
-         * It is bad to have uninitialized OBJECT pointers
-         * which could also be sub-fields of a VOID array
-         */
-        if (zeroed || PyDataType_FLAGCHK(descr, NPY_NEEDS_INIT)) {
+
+        if (use_calloc) {
             data = PyDataMem_UserNEW_ZEROED(nbytes, 1, fa->mem_handler);
         }
         else {
@@ -816,6 +843,18 @@ PyArray_NewFromDescr_int(
             goto fail;
         }
 
+        /*
+         * If the array needs special dtype-specific zero-filling logic, do that
+         */
+        if (NPY_UNLIKELY(zeroed && (fill_zero_info.func != NULL))) {
+            npy_intp size = PyArray_MultiplyList(fa->dimensions, fa->nd);
+            if (fill_zero_info.func(
+                    NULL, descr, data, size, descr->elsize,
+                    fill_zero_info.auxdata) < 0) {
+                goto fail;
+            }
+        }
+
         fa->flags |= NPY_ARRAY_OWNDATA;
     }
     else {
@@ -910,9 +949,11 @@ PyArray_NewFromDescr_int(
             }
         }
     }
+    NPY_traverse_info_xfree(&fill_zero_info);
     return (PyObject *)fa;
 
  fail:
+    NPY_traverse_info_xfree(&fill_zero_info);
     Py_XDECREF(fa->mem_handler);
     Py_DECREF(fa);
     return NULL;
@@ -3017,17 +3058,7 @@ PyArray_Zeros(int nd, npy_intp const *dims, PyArray_Descr *type, int is_f_order)
         return NULL;
     }
 
-    /* handle objects */
-    if (PyDataType_REFCHK(PyArray_DESCR(ret))) {
-        if (_zerofill(ret) < 0) {
-            Py_DECREF(ret);
-            return NULL;
-        }
-    }
-
-
     return (PyObject *)ret;
-
 }
 
 /*NUMPY_API
diff --git a/numpy/core/src/multiarray/dtype_traversal.c b/numpy/core/src/multiarray/dtype_traversal.c
index cefa7d6e1..769c2e015 100644
--- a/numpy/core/src/multiarray/dtype_traversal.c
+++ b/numpy/core/src/multiarray/dtype_traversal.c
@@ -24,7 +24,7 @@
 #include "alloc.h"
 #include "array_method.h"
 #include "dtypemeta.h"
-
+#include "refcount.h"
 #include "dtype_traversal.h"
 
 
@@ -124,6 +124,39 @@ npy_get_clear_object_strided_loop(
 }
 
 
+/**************** Python Object zero fill *********************/
+
+static int
+fill_zero_object_strided_loop(
+        void *NPY_UNUSED(traverse_context), PyArray_Descr *NPY_UNUSED(descr),
+        char *data, npy_intp size, npy_intp stride,
+        NpyAuxData *NPY_UNUSED(auxdata))
+{
+    PyObject *zero = PyLong_FromLong(0);
+    while (size--) {
+        Py_INCREF(zero);
+        // assumes `data` doesn't have a pre-existing object inside it
+        memcpy(data, &zero, sizeof(zero));
+        data += stride;
+    }
+    Py_DECREF(zero);
+    return 0;
+}
+
+NPY_NO_EXPORT int
+npy_object_get_fill_zero_loop(void *NPY_UNUSED(traverse_context),
+                              PyArray_Descr *NPY_UNUSED(descr),
+                              int NPY_UNUSED(aligned),
+                              npy_intp NPY_UNUSED(fixed_stride),
+                              traverse_loop_function **out_loop,
+                              NpyAuxData **NPY_UNUSED(out_auxdata),
+                              NPY_ARRAYMETHOD_FLAGS *flags)
+{
+    *flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_NO_FLOATINGPOINT_ERRORS;
+    *out_loop = &fill_zero_object_strided_loop;
+    return 0;
+}
+
 /**************** Structured DType clear funcationality ***************/
 
 /*
@@ -408,7 +441,6 @@ clear_no_op(
     return 0;
 }
 
-
 NPY_NO_EXPORT int
 npy_get_clear_void_and_legacy_user_dtype_loop(
         void *traverse_context, PyArray_Descr *dtype, int aligned,
@@ -472,3 +504,41 @@ npy_get_clear_void_and_legacy_user_dtype_loop(
             dtype);
     return -1;
 }
+
+/**************** Structured DType zero fill ***************/
+
+static int
+fill_zero_void_with_objects_strided_loop(
+        void *NPY_UNUSED(traverse_context), PyArray_Descr *descr,
+        char *data, npy_intp size, npy_intp stride,
+        NpyAuxData *NPY_UNUSED(auxdata))
+{
+    PyObject *zero = PyLong_FromLong(0);
+    while (size--) {
+        _fillobject(data, zero, descr);
+        data += stride;
+    }
+    Py_DECREF(zero);
+    return 0;
+}
+
+
+NPY_NO_EXPORT int
+npy_void_get_fill_zero_loop(void *NPY_UNUSED(traverse_context),
+                            PyArray_Descr *descr,
+                            int NPY_UNUSED(aligned),
+                            npy_intp NPY_UNUSED(fixed_stride),
+                            traverse_loop_function **out_loop,
+                            NpyAuxData **NPY_UNUSED(out_auxdata),
+                            NPY_ARRAYMETHOD_FLAGS *flags)
+{
+    *flags = NPY_METH_NO_FLOATINGPOINT_ERRORS;
+    if (PyDataType_REFCHK(descr)) {
+        *flags |= NPY_METH_REQUIRES_PYAPI;
+        *out_loop = &fill_zero_void_with_objects_strided_loop;
+    }
+    else {
+        *out_loop = NULL;
+    }
+    return 0;
+}
diff --git a/numpy/core/src/multiarray/dtype_traversal.h b/numpy/core/src/multiarray/dtype_traversal.h
index fd060a0f0..a9c185382 100644
--- a/numpy/core/src/multiarray/dtype_traversal.h
+++ b/numpy/core/src/multiarray/dtype_traversal.h
@@ -19,6 +19,22 @@ npy_get_clear_void_and_legacy_user_dtype_loop(
         traverse_loop_function **out_loop, NpyAuxData **out_traversedata,
         NPY_ARRAYMETHOD_FLAGS *flags);
 
+/* NumPy DType zero-filling implementations */
+
+NPY_NO_EXPORT int
+npy_object_get_fill_zero_loop(
+        void *NPY_UNUSED(traverse_context), PyArray_Descr *NPY_UNUSED(descr),
+        int NPY_UNUSED(aligned), npy_intp NPY_UNUSED(fixed_stride),
+        traverse_loop_function **out_loop, NpyAuxData **NPY_UNUSED(out_auxdata),
+        NPY_ARRAYMETHOD_FLAGS *flags);
+
+NPY_NO_EXPORT int
+npy_void_get_fill_zero_loop(
+        void *NPY_UNUSED(traverse_context), PyArray_Descr *descr,
+        int NPY_UNUSED(aligned), npy_intp NPY_UNUSED(fixed_stride),
+        traverse_loop_function **out_loop, NpyAuxData **NPY_UNUSED(out_auxdata),
+        NPY_ARRAYMETHOD_FLAGS *flags);
+
 
 /* Helper to deal with calling or nesting simple strided loops */
 
@@ -34,6 +50,7 @@ NPY_traverse_info_init(NPY_traverse_info *cast_info)
 {
     cast_info->func = NULL;  /* mark as uninitialized. */
     cast_info->auxdata = NULL;  /* allow leaving auxdata untouched */
+    cast_info->descr = NULL;  /* mark as uninitialized. */
 }
 
 
@@ -45,7 +62,7 @@ NPY_traverse_info_xfree(NPY_traverse_info *traverse_info)
     }
     traverse_info->func = NULL;
     NPY_AUXDATA_FREE(traverse_info->auxdata);
-    Py_DECREF(traverse_info->descr);
+    Py_XDECREF(traverse_info->descr);
 }
 
 
@@ -79,4 +96,4 @@ PyArray_GetClearFunction(
         NPY_traverse_info *clear_info, NPY_ARRAYMETHOD_FLAGS *flags);
 
 
-#endif  /* NUMPY_CORE_SRC_MULTIARRAY_DTYPE_TRAVERSAL_H_ */
-\ No newline at end of file
+#endif  /* NUMPY_CORE_SRC_MULTIARRAY_DTYPE_TRAVERSAL_H_ */
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
index f268ba2cb..f8c1b6617 100644
--- a/numpy/core/src/multiarray/dtypemeta.c
+++ b/numpy/core/src/multiarray/dtypemeta.c
@@ -20,6 +20,8 @@
 #include "usertypes.h"
 #include "conversion_utils.h"
 #include "templ_common.h"
+#include "refcount.h"
+#include "dtype_traversal.h"
 
 #include <assert.h>
 
@@ -524,6 +526,7 @@ void_common_instance(PyArray_Descr *descr1, PyArray_Descr *descr2)
     return NULL;
 }
 
+
 NPY_NO_EXPORT int
 python_builtins_are_known_scalar_types(
         PyArray_DTypeMeta *NPY_UNUSED(cls), PyTypeObject *pytype)
@@ -855,6 +858,7 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
     dt_slots->common_dtype = default_builtin_common_dtype;
     dt_slots->common_instance = NULL;
     dt_slots->ensure_canonical = ensure_native_byteorder;
+    dt_slots->get_fill_zero_loop = NULL;
 
     if (PyTypeNum_ISSIGNED(dtype_class->type_num)) {
         /* Convert our scalars (raise on too large unsigned and NaN, etc.) */
@@ -866,6 +870,8 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
     }
     else if (descr->type_num == NPY_OBJECT) {
         dt_slots->common_dtype = object_common_dtype;
+        dt_slots->get_fill_zero_loop = npy_object_get_fill_zero_loop;
+        dt_slots->get_clear_loop = npy_get_clear_object_strided_loop;
     }
     else if (PyTypeNum_ISDATETIME(descr->type_num)) {
         /* Datetimes are flexible, but were not considered previously */
@@ -887,6 +893,9 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
                     void_discover_descr_from_pyobject);
             dt_slots->common_instance = void_common_instance;
             dt_slots->ensure_canonical = void_ensure_canonical;
+            dt_slots->get_fill_zero_loop = npy_void_get_fill_zero_loop;
+            dt_slots->get_clear_loop =
+                    npy_get_clear_void_and_legacy_user_dtype_loop;
         }
         else {
             dt_slots->default_descr = string_and_unicode_default_descr;
diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h
index 3b4dbad24..6dbfd1549 100644
--- a/numpy/core/src/multiarray/dtypemeta.h
+++ b/numpy/core/src/multiarray/dtypemeta.h
@@ -42,6 +42,23 @@ typedef struct {
      */
     get_traverse_loop_function *get_clear_loop;
     /*
+       Either NULL or a function that sets a function pointer to a traversal
+       loop that fills an array with zero values appropriate for the dtype. If
+       get_fill_zero_loop is undefined or the function pointer set by it is
+       NULL, the array buffer is allocated with calloc. If this function is
+       defined and it sets a non-NULL function pointer, the array buffer is
+       allocated with malloc and the zero-filling loop function pointer is
+       called to fill the buffer. For the best performance, avoid using this
+       function if a zero-filled array buffer allocated with calloc makes sense
+       for the dtype.
+
+       Note that this is currently used only for zero-filling a newly allocated
+       array. While it can be used to zero-fill an already-filled buffer, that
+       will not work correctly for arrays holding references. If you need to do
+       that, clear the array first.
+    */
+    get_traverse_loop_function *get_fill_zero_loop;
+    /*
      * The casting implementation (ArrayMethod) to convert between two
      * instances of this DType, stored explicitly for fast access:
      */
@@ -63,7 +80,7 @@ typedef struct {
 
 // This must be updated if new slots before within_dtype_castingimpl
 // are added
-#define NPY_NUM_DTYPE_SLOTS 9
+#define NPY_NUM_DTYPE_SLOTS 10
 #define NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS 22
 #define NPY_DT_MAX_ARRFUNCS_SLOT \
   NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS + _NPY_DT_ARRFUNCS_OFFSET
diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c
index ab35548ed..d019acbb5 100644
--- a/numpy/core/src/multiarray/getset.c
+++ b/numpy/core/src/multiarray/getset.c
@@ -797,20 +797,17 @@ array_imag_get(PyArrayObject *self, void *NPY_UNUSED(ignored))
     }
     else {
         Py_INCREF(PyArray_DESCR(self));
-        ret = (PyArrayObject *)PyArray_NewFromDescr(Py_TYPE(self),
-                                                    PyArray_DESCR(self),
-                                                    PyArray_NDIM(self),
-                                                    PyArray_DIMS(self),
-                                                    NULL, NULL,
-                                                    PyArray_ISFORTRAN(self),
-                                                    (PyObject *)self);
+        ret = (PyArrayObject *)PyArray_NewFromDescr_int(
+                Py_TYPE(self),
+                PyArray_DESCR(self),
+                PyArray_NDIM(self),
+                PyArray_DIMS(self),
+                NULL, NULL,
+                PyArray_ISFORTRAN(self),
+                (PyObject *)self, NULL, 1, 0);
         if (ret == NULL) {
             return NULL;
         }
-        if (_zerofill(ret) < 0) {
-            Py_DECREF(ret);
-            return NULL;
-        }
         PyArray_CLEARFLAGS(ret, NPY_ARRAY_WRITEABLE);
     }
     return (PyObject *) ret;
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 93b290020..3b2e72820 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -2804,9 +2804,7 @@ array_complex(PyArrayObject *self, PyObject *NPY_UNUSED(args))
     PyArray_Descr *dtype;
     PyObject *c;
 
-    if (PyArray_SIZE(self) != 1) {
-        PyErr_SetString(PyExc_TypeError,
-                "only length-1 arrays can be converted to Python scalars");
+    if (check_is_convertible_to_scalar(self) < 0) {
         return NULL;
     }
 
diff --git a/numpy/core/src/multiarray/number.c b/numpy/core/src/multiarray/number.c
index c208fb203..35e4af79c 100644
--- a/numpy/core/src/multiarray/number.c
+++ b/numpy/core/src/multiarray/number.c
@@ -869,13 +869,11 @@ array_scalar_forward(PyArrayObject *v,
                      PyObject *(*builtin_func)(PyObject *),
                      const char *where)
 {
-    PyObject *scalar;
-    if (PyArray_SIZE(v) != 1) {
-        PyErr_SetString(PyExc_TypeError, "only size-1 arrays can be"\
-                        " converted to Python scalars");
+    if (check_is_convertible_to_scalar(v) < 0) {
         return NULL;
     }
 
+    PyObject *scalar;
     scalar = PyArray_GETITEM(v, PyArray_DATA(v));
     if (scalar == NULL) {
         return NULL;
diff --git a/numpy/core/src/multiarray/refcount.c b/numpy/core/src/multiarray/refcount.c
index 20527f7af..d200957c3 100644
--- a/numpy/core/src/multiarray/refcount.c
+++ b/numpy/core/src/multiarray/refcount.c
@@ -17,15 +17,12 @@
 #include "numpy/arrayscalars.h"
 #include "iterators.h"
 #include "dtypemeta.h"
+#include "refcount.h"
 
 #include "npy_config.h"
 
 #include "npy_pycompat.h"
 
-static void
-_fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype);
-
-
 /*
  * Helper function to clear a strided memory (normally or always contiguous)
  * from all Python (or other) references.  The function does nothing if the
@@ -395,7 +392,7 @@ PyArray_FillObjectArray(PyArrayObject *arr, PyObject *obj)
     }
 }
 
-static void
+NPY_NO_EXPORT void
 _fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype)
 {
     if (!PyDataType_FLAGCHK(dtype, NPY_ITEM_REFCOUNT)) {
diff --git a/numpy/core/src/multiarray/refcount.h b/numpy/core/src/multiarray/refcount.h
index 16d34e292..7f39b9ca4 100644
--- a/numpy/core/src/multiarray/refcount.h
+++ b/numpy/core/src/multiarray/refcount.h
@@ -24,4 +24,7 @@ PyArray_XDECREF(PyArrayObject *mp);
 NPY_NO_EXPORT void
 PyArray_FillObjectArray(PyArrayObject *arr, PyObject *obj);
 
+NPY_NO_EXPORT void
+_fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype);
+
 #endif  /* NUMPY_CORE_SRC_MULTIARRAY_REFCOUNT_H_ */
diff --git a/numpy/core/src/npymath/halffloat.c b/numpy/core/src/npymath/halffloat.c
deleted file mode 100644
index 51948c736..000000000
--- a/numpy/core/src/npymath/halffloat.c
+++ /dev/null
@@ -1,555 +0,0 @@
-#define NPY_NO_DEPRECATED_API NPY_API_VERSION
-
-#include "numpy/halffloat.h"
-
-/*
- * This chooses between 'ties to even' and 'ties away from zero'.
- */
-#define NPY_HALF_ROUND_TIES_TO_EVEN 1
-/*
- * If these are 1, the conversions try to trigger underflow,
- * overflow, and invalid exceptions in the FP system when needed.
- */
-#define NPY_HALF_GENERATE_OVERFLOW 1
-#define NPY_HALF_GENERATE_UNDERFLOW 1
-#define NPY_HALF_GENERATE_INVALID 1
-
-/*
- ********************************************************************
- *                   HALF-PRECISION ROUTINES                        *
- ********************************************************************
- */
-
-float npy_half_to_float(npy_half h)
-{
-    union { float ret; npy_uint32 retbits; } conv;
-    conv.retbits = npy_halfbits_to_floatbits(h);
-    return conv.ret;
-}
-
-double npy_half_to_double(npy_half h)
-{
-    union { double ret; npy_uint64 retbits; } conv;
-    conv.retbits = npy_halfbits_to_doublebits(h);
-    return conv.ret;
-}
-
-npy_half npy_float_to_half(float f)
-{
-    union { float f; npy_uint32 fbits; } conv;
-    conv.f = f;
-    return npy_floatbits_to_halfbits(conv.fbits);
-}
-
-npy_half npy_double_to_half(double d)
-{
-    union { double d; npy_uint64 dbits; } conv;
-    conv.d = d;
-    return npy_doublebits_to_halfbits(conv.dbits);
-}
-
-int npy_half_iszero(npy_half h)
-{
-    return (h&0x7fff) == 0;
-}
-
-int npy_half_isnan(npy_half h)
-{
-    return ((h&0x7c00u) == 0x7c00u) && ((h&0x03ffu) != 0x0000u);
-}
-
-int npy_half_isinf(npy_half h)
-{
-    return ((h&0x7fffu) == 0x7c00u);
-}
-
-int npy_half_isfinite(npy_half h)
-{
-    return ((h&0x7c00u) != 0x7c00u);
-}
-
-int npy_half_signbit(npy_half h)
-{
-    return (h&0x8000u) != 0;
-}
-
-npy_half npy_half_spacing(npy_half h)
-{
-    npy_half ret;
-    npy_uint16 h_exp = h&0x7c00u;
-    npy_uint16 h_sig = h&0x03ffu;
-    if (h_exp == 0x7c00u) {
-#if NPY_HALF_GENERATE_INVALID
-        npy_set_floatstatus_invalid();
-#endif
-        ret = NPY_HALF_NAN;
-    } else if (h == 0x7bffu) {
-#if NPY_HALF_GENERATE_OVERFLOW
-        npy_set_floatstatus_overflow();
-#endif
-        ret = NPY_HALF_PINF;
-    } else if ((h&0x8000u) && h_sig == 0) { /* Negative boundary case */
-        if (h_exp > 0x2c00u) { /* If result is normalized */
-            ret = h_exp - 0x2c00u;
-        } else if(h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */
-            ret = 1 << ((h_exp >> 10) - 2);
-        } else {
-            ret = 0x0001u; /* Smallest subnormal half */
-        }
-    } else if (h_exp > 0x2800u) { /* If result is still normalized */
-        ret = h_exp - 0x2800u;
-    } else if (h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */
-        ret = 1 << ((h_exp >> 10) - 1);
-    } else {
-        ret = 0x0001u;
-    }
-
-    return ret;
-}
-
-npy_half npy_half_copysign(npy_half x, npy_half y)
-{
-    return (x&0x7fffu) | (y&0x8000u);
-}
-
-npy_half npy_half_nextafter(npy_half x, npy_half y)
-{
-    npy_half ret;
-
-    if (npy_half_isnan(x) || npy_half_isnan(y)) {
-        ret = NPY_HALF_NAN;
-    } else if (npy_half_eq_nonan(x, y)) {
-        ret = x;
-    } else if (npy_half_iszero(x)) {
-        ret = (y&0x8000u) + 1; /* Smallest subnormal half */
-    } else if (!(x&0x8000u)) { /* x > 0 */
-        if ((npy_int16)x > (npy_int16)y) { /* x > y */
-            ret = x-1;
-        } else {
-            ret = x+1;
-        }
-    } else {
-        if (!(y&0x8000u) || (x&0x7fffu) > (y&0x7fffu)) { /* x < y */
-            ret = x-1;
-        } else {
-            ret = x+1;
-        }
-    }
-#if NPY_HALF_GENERATE_OVERFLOW
-    if (npy_half_isinf(ret) && npy_half_isfinite(x)) {
-        npy_set_floatstatus_overflow();
-    }
-#endif
-
-    return ret;
-}
-
-int npy_half_eq_nonan(npy_half h1, npy_half h2)
-{
-    return (h1 == h2 || ((h1 | h2) & 0x7fff) == 0);
-}
-
-int npy_half_eq(npy_half h1, npy_half h2)
-{
-    /*
-     * The equality cases are as follows:
-     *   - If either value is NaN, never equal.
-     *   - If the values are equal, equal.
-     *   - If the values are both signed zeros, equal.
-     */
-    return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) &&
-           (h1 == h2 || ((h1 | h2) & 0x7fff) == 0);
-}
-
-int npy_half_ne(npy_half h1, npy_half h2)
-{
-    return !npy_half_eq(h1, h2);
-}
-
-int npy_half_lt_nonan(npy_half h1, npy_half h2)
-{
-    if (h1&0x8000u) {
-        if (h2&0x8000u) {
-            return (h1&0x7fffu) > (h2&0x7fffu);
-        } else {
-            /* Signed zeros are equal, have to check for it */
-            return (h1 != 0x8000u) || (h2 != 0x0000u);
-        }
-    } else {
-        if (h2&0x8000u) {
-            return 0;
-        } else {
-            return (h1&0x7fffu) < (h2&0x7fffu);
-        }
-    }
-}
-
-int npy_half_lt(npy_half h1, npy_half h2)
-{
-    return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && npy_half_lt_nonan(h1, h2);
-}
-
-int npy_half_gt(npy_half h1, npy_half h2)
-{
-    return npy_half_lt(h2, h1);
-}
-
-int npy_half_le_nonan(npy_half h1, npy_half h2)
-{
-    if (h1&0x8000u) {
-        if (h2&0x8000u) {
-            return (h1&0x7fffu) >= (h2&0x7fffu);
-        } else {
-            return 1;
-        }
-    } else {
-        if (h2&0x8000u) {
-            /* Signed zeros are equal, have to check for it */
-            return (h1 == 0x0000u) && (h2 == 0x8000u);
-        } else {
-            return (h1&0x7fffu) <= (h2&0x7fffu);
-        }
-    }
-}
-
-int npy_half_le(npy_half h1, npy_half h2)
-{
-    return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && npy_half_le_nonan(h1, h2);
-}
-
-int npy_half_ge(npy_half h1, npy_half h2)
-{
-    return npy_half_le(h2, h1);
-}
-
-npy_half npy_half_divmod(npy_half h1, npy_half h2, npy_half *modulus)
-{
-    float fh1 = npy_half_to_float(h1);
-    float fh2 = npy_half_to_float(h2);
-    float div, mod;
-
-    div = npy_divmodf(fh1, fh2, &mod);
-    *modulus = npy_float_to_half(mod);
-    return npy_float_to_half(div);
-}
-
-
-
-/*
- ********************************************************************
- *                     BIT-LEVEL CONVERSIONS                        *
- ********************************************************************
- */
-
-npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
-{
-    npy_uint32 f_exp, f_sig;
-    npy_uint16 h_sgn, h_exp, h_sig;
-
-    h_sgn = (npy_uint16) ((f&0x80000000u) >> 16);
-    f_exp = (f&0x7f800000u);
-
-    /* Exponent overflow/NaN converts to signed inf/NaN */
-    if (f_exp >= 0x47800000u) {
-        if (f_exp == 0x7f800000u) {
-            /* Inf or NaN */
-            f_sig = (f&0x007fffffu);
-            if (f_sig != 0) {
-                /* NaN - propagate the flag in the significand... */
-                npy_uint16 ret = (npy_uint16) (0x7c00u + (f_sig >> 13));
-                /* ...but make sure it stays a NaN */
-                if (ret == 0x7c00u) {
-                    ret++;
-                }
-                return h_sgn + ret;
-            } else {
-                /* signed inf */
-                return (npy_uint16) (h_sgn + 0x7c00u);
-            }
-        } else {
-            /* overflow to signed inf */
-#if NPY_HALF_GENERATE_OVERFLOW
-            npy_set_floatstatus_overflow();
-#endif
-            return (npy_uint16) (h_sgn + 0x7c00u);
-        }
-    }
-
-    /* Exponent underflow converts to a subnormal half or signed zero */
-    if (f_exp <= 0x38000000u) {
-        /*
-         * Signed zeros, subnormal floats, and floats with small
-         * exponents all convert to signed zero half-floats.
-         */
-        if (f_exp < 0x33000000u) {
-#if NPY_HALF_GENERATE_UNDERFLOW
-            /* If f != 0, it underflowed to 0 */
-            if ((f&0x7fffffff) != 0) {
-                npy_set_floatstatus_underflow();
-            }
-#endif
-            return h_sgn;
-        }
-        /* Make the subnormal significand */
-        f_exp >>= 23;
-        f_sig = (0x00800000u + (f&0x007fffffu));
-#if NPY_HALF_GENERATE_UNDERFLOW
-        /* If it's not exactly represented, it underflowed */
-        if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) {
-            npy_set_floatstatus_underflow();
-        }
-#endif
-        /*
-         * Usually the significand is shifted by 13. For subnormals an
-         * additional shift needs to occur. This shift is one for the largest
-         * exponent giving a subnormal `f_exp = 0x38000000 >> 23 = 112`, which
-         * offsets the new first bit. At most the shift can be 1+10 bits.
-         */
-        f_sig >>= (113 - f_exp);
-        /* Handle rounding by adding 1 to the bit beyond half precision */
-#if NPY_HALF_ROUND_TIES_TO_EVEN
-        /*
-         * If the last bit in the half significand is 0 (already even), and
-         * the remaining bit pattern is 1000...0, then we do not add one
-         * to the bit after the half significand. However, the (113 - f_exp)
-         * shift can lose up to 11 bits, so the || checks them in the original.
-         * In all other cases, we can just add one.
-         */
-        if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) {
-            f_sig += 0x00001000u;
-        }
-#else
-        f_sig += 0x00001000u;
-#endif
-        h_sig = (npy_uint16) (f_sig >> 13);
-        /*
-         * If the rounding causes a bit to spill into h_exp, it will
-         * increment h_exp from zero to one and h_sig will be zero.
-         * This is the correct result.
-         */
-        return (npy_uint16) (h_sgn + h_sig);
-    }
-
-    /* Regular case with no overflow or underflow */
-    h_exp = (npy_uint16) ((f_exp - 0x38000000u) >> 13);
-    /* Handle rounding by adding 1 to the bit beyond half precision */
-    f_sig = (f&0x007fffffu);
-#if NPY_HALF_ROUND_TIES_TO_EVEN
-    /*
-     * If the last bit in the half significand is 0 (already even), and
-     * the remaining bit pattern is 1000...0, then we do not add one
-     * to the bit after the half significand.  In all other cases, we do.
-     */
-    if ((f_sig&0x00003fffu) != 0x00001000u) {
-        f_sig += 0x00001000u;
-    }
-#else
-    f_sig += 0x00001000u;
-#endif
-    h_sig = (npy_uint16) (f_sig >> 13);
-    /*
-     * If the rounding causes a bit to spill into h_exp, it will
-     * increment h_exp by one and h_sig will be zero.  This is the
-     * correct result.  h_exp may increment to 15, at greatest, in
-     * which case the result overflows to a signed inf.
-     */
-#if NPY_HALF_GENERATE_OVERFLOW
-    h_sig += h_exp;
-    if (h_sig == 0x7c00u) {
-        npy_set_floatstatus_overflow();
-    }
-    return h_sgn + h_sig;
-#else
-    return h_sgn + h_exp + h_sig;
-#endif
-}
-
-npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
-{
-    npy_uint64 d_exp, d_sig;
-    npy_uint16 h_sgn, h_exp, h_sig;
-
-    h_sgn = (d&0x8000000000000000ULL) >> 48;
-    d_exp = (d&0x7ff0000000000000ULL);
-
-    /* Exponent overflow/NaN converts to signed inf/NaN */
-    if (d_exp >= 0x40f0000000000000ULL) {
-        if (d_exp == 0x7ff0000000000000ULL) {
-            /* Inf or NaN */
-            d_sig = (d&0x000fffffffffffffULL);
-            if (d_sig != 0) {
-                /* NaN - propagate the flag in the significand... */
-                npy_uint16 ret = (npy_uint16) (0x7c00u + (d_sig >> 42));
-                /* ...but make sure it stays a NaN */
-                if (ret == 0x7c00u) {
-                    ret++;
-                }
-                return h_sgn + ret;
-            } else {
-                /* signed inf */
-                return h_sgn + 0x7c00u;
-            }
-        } else {
-            /* overflow to signed inf */
-#if NPY_HALF_GENERATE_OVERFLOW
-            npy_set_floatstatus_overflow();
-#endif
-            return h_sgn + 0x7c00u;
-        }
-    }
-
-    /* Exponent underflow converts to subnormal half or signed zero */
-    if (d_exp <= 0x3f00000000000000ULL) {
-        /*
-         * Signed zeros, subnormal floats, and floats with small
-         * exponents all convert to signed zero half-floats.
-         */
-        if (d_exp < 0x3e60000000000000ULL) {
-#if NPY_HALF_GENERATE_UNDERFLOW
-            /* If d != 0, it underflowed to 0 */
-            if ((d&0x7fffffffffffffffULL) != 0) {
-                npy_set_floatstatus_underflow();
-            }
-#endif
-            return h_sgn;
-        }
-        /* Make the subnormal significand */
-        d_exp >>= 52;
-        d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL));
-#if NPY_HALF_GENERATE_UNDERFLOW
-        /* If it's not exactly represented, it underflowed */
-        if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) {
-            npy_set_floatstatus_underflow();
-        }
-#endif
-        /*
-         * Unlike floats, doubles have enough room to shift left to align
-         * the subnormal significand leading to no loss of the last bits.
-         * The smallest possible exponent giving a subnormal is:
-         * `d_exp = 0x3e60000000000000 >> 52 = 998`. All larger subnormals are
-         * shifted with respect to it. This adds a shift of 10+1 bits the final
-         * right shift when comparing it to the one in the normal branch.
-         */
-        assert(d_exp - 998 >= 0);
-        d_sig <<= (d_exp - 998);
-        /* Handle rounding by adding 1 to the bit beyond half precision */
-#if NPY_HALF_ROUND_TIES_TO_EVEN
-        /*
-         * If the last bit in the half significand is 0 (already even), and
-         * the remaining bit pattern is 1000...0, then we do not add one
-         * to the bit after the half significand.  In all other cases, we do.
-         */
-        if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) {
-            d_sig += 0x0010000000000000ULL;
-        }
-#else
-        d_sig += 0x0010000000000000ULL;
-#endif
-        h_sig = (npy_uint16) (d_sig >> 53);
-        /*
-         * If the rounding causes a bit to spill into h_exp, it will
-         * increment h_exp from zero to one and h_sig will be zero.
-         * This is the correct result.
-         */
-        return h_sgn + h_sig;
-    }
-
-    /* Regular case with no overflow or underflow */
-    h_exp = (npy_uint16) ((d_exp - 0x3f00000000000000ULL) >> 42);
-    /* Handle rounding by adding 1 to the bit beyond half precision */
-    d_sig = (d&0x000fffffffffffffULL);
-#if NPY_HALF_ROUND_TIES_TO_EVEN
-    /*
-     * If the last bit in the half significand is 0 (already even), and
-     * the remaining bit pattern is 1000...0, then we do not add one
-     * to the bit after the half significand.  In all other cases, we do.
-     */
-    if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
-        d_sig += 0x0000020000000000ULL;
-    }
-#else
-    d_sig += 0x0000020000000000ULL;
-#endif
-    h_sig = (npy_uint16) (d_sig >> 42);
-
-    /*
-     * If the rounding causes a bit to spill into h_exp, it will
-     * increment h_exp by one and h_sig will be zero.  This is the
-     * correct result.  h_exp may increment to 15, at greatest, in
-     * which case the result overflows to a signed inf.
-     */
-#if NPY_HALF_GENERATE_OVERFLOW
-    h_sig += h_exp;
-    if (h_sig == 0x7c00u) {
-        npy_set_floatstatus_overflow();
-    }
-    return h_sgn + h_sig;
-#else
-    return h_sgn + h_exp + h_sig;
-#endif
-}
-
-npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
-{
-    npy_uint16 h_exp, h_sig;
-    npy_uint32 f_sgn, f_exp, f_sig;
-
-    h_exp = (h&0x7c00u);
-    f_sgn = ((npy_uint32)h&0x8000u) << 16;
-    switch (h_exp) {
-        case 0x0000u: /* 0 or subnormal */
-            h_sig = (h&0x03ffu);
-            /* Signed zero */
-            if (h_sig == 0) {
-                return f_sgn;
-            }
-            /* Subnormal */
-            h_sig <<= 1;
-            while ((h_sig&0x0400u) == 0) {
-                h_sig <<= 1;
-                h_exp++;
-            }
-            f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23;
-            f_sig = ((npy_uint32)(h_sig&0x03ffu)) << 13;
-            return f_sgn + f_exp + f_sig;
-        case 0x7c00u: /* inf or NaN */
-            /* All-ones exponent and a copy of the significand */
-            return f_sgn + 0x7f800000u + (((npy_uint32)(h&0x03ffu)) << 13);
-        default: /* normalized */
-            /* Just need to adjust the exponent and shift */
-            return f_sgn + (((npy_uint32)(h&0x7fffu) + 0x1c000u) << 13);
-    }
-}
-
-npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
-{
-    npy_uint16 h_exp, h_sig;
-    npy_uint64 d_sgn, d_exp, d_sig;
-
-    h_exp = (h&0x7c00u);
-    d_sgn = ((npy_uint64)h&0x8000u) << 48;
-    switch (h_exp) {
-        case 0x0000u: /* 0 or subnormal */
-            h_sig = (h&0x03ffu);
-            /* Signed zero */
-            if (h_sig == 0) {
-                return d_sgn;
-            }
-            /* Subnormal */
-            h_sig <<= 1;
-            while ((h_sig&0x0400u) == 0) {
-                h_sig <<= 1;
-                h_exp++;
-            }
-            d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52;
-            d_sig = ((npy_uint64)(h_sig&0x03ffu)) << 42;
-            return d_sgn + d_exp + d_sig;
-        case 0x7c00u: /* inf or NaN */
-            /* All-ones exponent and a copy of the significand */
-            return d_sgn + 0x7ff0000000000000ULL +
-                                (((npy_uint64)(h&0x03ffu)) << 42);
-        default: /* normalized */
-            /* Just need to adjust the exponent and shift */
-            return d_sgn + (((npy_uint64)(h&0x7fffu) + 0xfc000u) << 42);
-    }
-}
diff --git a/numpy/core/src/npymath/halffloat.cpp b/numpy/core/src/npymath/halffloat.cpp
new file mode 100644
index 000000000..aa582c1b9
--- /dev/null
+++ b/numpy/core/src/npymath/halffloat.cpp
@@ -0,0 +1,238 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+/*
+ * If these are 1, the conversions try to trigger underflow,
+ * overflow, and invalid exceptions in the FP system when needed.
+ */
+#define NPY_HALF_GENERATE_OVERFLOW 1
+#define NPY_HALF_GENERATE_INVALID 1
+
+#include "numpy/halffloat.h"
+
+#include "common.hpp"
+/*
+ ********************************************************************
+ *                   HALF-PRECISION ROUTINES                        *
+ ********************************************************************
+ */
+using namespace np;
+
+float npy_half_to_float(npy_half h)
+{
+    return static_cast<float>(Half::FromBits(h));
+}
+
+double npy_half_to_double(npy_half h)
+{
+    return static_cast<double>(Half::FromBits(h));
+}
+
+npy_half npy_float_to_half(float f)
+{
+    return Half(f).Bits();
+}
+
+npy_half npy_double_to_half(double d)
+{
+    return Half(d).Bits();
+}
+
+int npy_half_iszero(npy_half h)
+{
+    return (h&0x7fff) == 0;
+}
+
+int npy_half_isnan(npy_half h)
+{
+    return Half::FromBits(h).IsNaN();
+}
+
+int npy_half_isinf(npy_half h)
+{
+    return ((h&0x7fffu) == 0x7c00u);
+}
+
+int npy_half_isfinite(npy_half h)
+{
+    return ((h&0x7c00u) != 0x7c00u);
+}
+
+int npy_half_signbit(npy_half h)
+{
+    return (h&0x8000u) != 0;
+}
+
+npy_half npy_half_spacing(npy_half h)
+{
+    npy_half ret;
+    npy_uint16 h_exp = h&0x7c00u;
+    npy_uint16 h_sig = h&0x03ffu;
+    if (h_exp == 0x7c00u) {
+#if NPY_HALF_GENERATE_INVALID
+        npy_set_floatstatus_invalid();
+#endif
+        ret = NPY_HALF_NAN;
+    } else if (h == 0x7bffu) {
+#if NPY_HALF_GENERATE_OVERFLOW
+        npy_set_floatstatus_overflow();
+#endif
+        ret = NPY_HALF_PINF;
+    } else if ((h&0x8000u) && h_sig == 0) { /* Negative boundary case */
+        if (h_exp > 0x2c00u) { /* If result is normalized */
+            ret = h_exp - 0x2c00u;
+        } else if(h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */
+            ret = 1 << ((h_exp >> 10) - 2);
+        } else {
+            ret = 0x0001u; /* Smallest subnormal half */
+        }
+    } else if (h_exp > 0x2800u) { /* If result is still normalized */
+        ret = h_exp - 0x2800u;
+    } else if (h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */
+        ret = 1 << ((h_exp >> 10) - 1);
+    } else {
+        ret = 0x0001u;
+    }
+
+    return ret;
+}
+
+npy_half npy_half_copysign(npy_half x, npy_half y)
+{
+    return (x&0x7fffu) | (y&0x8000u);
+}
+
+npy_half npy_half_nextafter(npy_half x, npy_half y)
+{
+    npy_half ret;
+
+    if (npy_half_isnan(x) || npy_half_isnan(y)) {
+        ret = NPY_HALF_NAN;
+    } else if (npy_half_eq_nonan(x, y)) {
+        ret = x;
+    } else if (npy_half_iszero(x)) {
+        ret = (y&0x8000u) + 1; /* Smallest subnormal half */
+    } else if (!(x&0x8000u)) { /* x > 0 */
+        if ((npy_int16)x > (npy_int16)y) { /* x > y */
+            ret = x-1;
+        } else {
+            ret = x+1;
+        }
+    } else {
+        if (!(y&0x8000u) || (x&0x7fffu) > (y&0x7fffu)) { /* x < y */
+            ret = x-1;
+        } else {
+            ret = x+1;
+        }
+    }
+#if NPY_HALF_GENERATE_OVERFLOW
+    if (npy_half_isinf(ret) && npy_half_isfinite(x)) {
+        npy_set_floatstatus_overflow();
+    }
+#endif
+
+    return ret;
+}
+
+int npy_half_eq_nonan(npy_half h1, npy_half h2)
+{
+    return Half::FromBits(h1).Equal(Half::FromBits(h2));
+}
+
+int npy_half_eq(npy_half h1, npy_half h2)
+{
+    return Half::FromBits(h1) == Half::FromBits(h2);
+}
+
+int npy_half_ne(npy_half h1, npy_half h2)
+{
+    return Half::FromBits(h1) != Half::FromBits(h2);
+}
+
+int npy_half_lt_nonan(npy_half h1, npy_half h2)
+{
+    return Half::FromBits(h1).Less(Half::FromBits(h2));
+}
+
+int npy_half_lt(npy_half h1, npy_half h2)
+{
+    return Half::FromBits(h1) < Half::FromBits(h2);
+}
+
+int npy_half_gt(npy_half h1, npy_half h2)
+{
+    return npy_half_lt(h2, h1);
+}
+
+int npy_half_le_nonan(npy_half h1, npy_half h2)
+{
+    return Half::FromBits(h1).LessEqual(Half::FromBits(h2));
+}
+
+int npy_half_le(npy_half h1, npy_half h2)
+{
+    return Half::FromBits(h1) <= Half::FromBits(h2);
+}
+
+int npy_half_ge(npy_half h1, npy_half h2)
+{
+    return npy_half_le(h2, h1);
+}
+
+npy_half npy_half_divmod(npy_half h1, npy_half h2, npy_half *modulus)
+{
+    float fh1 = npy_half_to_float(h1);
+    float fh2 = npy_half_to_float(h2);
+    float div, mod;
+
+    div = npy_divmodf(fh1, fh2, &mod);
+    *modulus = npy_float_to_half(mod);
+    return npy_float_to_half(div);
+}
+
+
+/*
+ ********************************************************************
+ *                     BIT-LEVEL CONVERSIONS                        *
+ ********************************************************************
+ */
+
+npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
+{
+    if constexpr (Half::kNativeConversion<float>) {
+        return BitCast<uint16_t>(Half(BitCast<float>(f)));
+    }
+    else {
+        return half_private::FromFloatBits(f);
+    }
+}
+
+npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
+{
+    if constexpr (Half::kNativeConversion<double>) {
+        return BitCast<uint16_t>(Half(BitCast<double>(d)));
+    }
+    else {
+        return half_private::FromDoubleBits(d);
+    }
+}
+
+npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h)
+{
+    if constexpr (Half::kNativeConversion<float>) {
+        return BitCast<uint32_t>(static_cast<float>(Half::FromBits(h)));
+    }
+    else {
+        return half_private::ToFloatBits(h);
+    }
+}
+
+npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h)
+{
+    if constexpr (Half::kNativeConversion<double>) {
+        return BitCast<uint64_t>(static_cast<double>(Half::FromBits(h)));
+    }
+    else {
+        return half_private::ToDoubleBits(h);
+    }
+}
+
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index e36067822..ab54c1966 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -336,15 +336,6 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void HALF_@func@,
 /**end repeat**/
 
 /**begin repeat
- * #func = sin, cos#
- */
-
-NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void DOUBLE_@func@,
-    (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)))
-
-/**end repeat**/
-
-/**begin repeat
  *  #TYPE = FLOAT, DOUBLE#
  */
 /**begin repeat1
@@ -360,12 +351,17 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@func@,
 #ifndef NPY_DISABLE_OPTIMIZATION
     #include "loops_trigonometric.dispatch.h"
 #endif
+
 /**begin repeat
+ *  #TYPE = FLOAT, DOUBLE#
+ */
+/**begin repeat1
  *  #func = sin, cos#
  */
-NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void FLOAT_@func@, (
+NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@func@, (
     char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)
 ))
+/**end repeat1**/
 /**end repeat**/
 
 #ifndef NPY_DISABLE_OPTIMIZATION
diff --git a/numpy/core/src/umath/loops_trigonometric.dispatch.c.src b/numpy/core/src/umath/loops_trigonometric.dispatch.c.src
index 9f9978e66..28af04f6b 100644
--- a/numpy/core/src/umath/loops_trigonometric.dispatch.c.src
+++ b/numpy/core/src/umath/loops_trigonometric.dispatch.c.src
@@ -9,12 +9,18 @@
 #include "simd/simd.h"
 #include "loops_utils.h"
 #include "loops.h"
+#include "fast_loop_macros.h"
 /*
  * TODO:
  * - use vectorized version of Payne-Hanek style reduction for large elements or
  *   when there's no native FUSED support instead of fallback to libc
  */
-#if NPY_SIMD_F32 && NPY_SIMD_FMA3 // native support
+#if NPY_SIMD_FMA3  // native support
+/**begin repeat
+ *  #check = F64, F32#
+ *  #sfx  = f64, f32#
+ */
+#if NPY_SIMD_@check@
 /*
  * Vectorized Cody-Waite range reduction technique
  * Performs the reduction step x* = x - y*C in three steps:
@@ -23,14 +29,189 @@
  * 3) x* = x - y*c3
  * c1, c2 are exact floating points, c3 = C - c1 - c2 simulates higher precision
  */
-NPY_FINLINE npyv_f32
-simd_range_reduction_f32(npyv_f32 x, npyv_f32 y, npyv_f32 c1, npyv_f32 c2, npyv_f32 c3)
+NPY_FINLINE npyv_@sfx@
+simd_range_reduction_@sfx@(npyv_@sfx@ x, npyv_@sfx@ y, npyv_@sfx@ c1, npyv_@sfx@ c2, npyv_@sfx@ c3)
 {
-    npyv_f32 reduced_x = npyv_muladd_f32(y, c1, x);
-    reduced_x = npyv_muladd_f32(y, c2, reduced_x);
-    reduced_x = npyv_muladd_f32(y, c3, reduced_x);
+    npyv_@sfx@ reduced_x = npyv_muladd_@sfx@(y, c1, x);
+    reduced_x = npyv_muladd_@sfx@(y, c2, reduced_x);
+    reduced_x = npyv_muladd_@sfx@(y, c3, reduced_x);
     return reduced_x;
 }
+#endif
+/**end repeat**/
+
+#if NPY_SIMD_F64
+/**begin repeat
+ *  #op = cos, sin#
+ */
+#if defined(NPY_OS_WIN32) || defined(NPY_OS_CYGWIN)
+NPY_FINLINE npyv_f64
+#else
+NPY_NOINLINE npyv_f64
+#endif
+simd_@op@_scalar_f64(npyv_f64 out, npy_uint64 cmp_bits)
+{
+    // MSVC doesn't compile with direct vector access, so we copy it here
+    // as we have no npyv_get_lane/npyv_set_lane intrinsics
+    npy_double NPY_DECL_ALIGNED(NPY_SIMD_WIDTH) out_copy[npyv_nlanes_f64];
+    npyv_storea_f64(out_copy, out);
+
+    for (unsigned i = 0; i < npyv_nlanes_f64; ++i) {
+        if (cmp_bits & (1 << i)) {
+            out_copy[i] = npy_@op@(out_copy[i]);
+        }
+    }
+
+    return npyv_loada_f64(out_copy);
+}
+/**end repeat**/
+
+/*
+ * Approximate sine algorithm for x \in [-pi/2, pi/2]
+ * worst-case error is 3.5 ulp.
+ * abs error: 0x1.be222a58p-53 in [-pi/2, pi/2].
+ */
+NPY_FINLINE npyv_f64
+simd_approx_sine_poly_f64(npyv_f64 r)
+{
+    const npyv_f64 poly1 = npyv_setall_f64(-0x1.9f4a9c8b21dc9p-41);
+    const npyv_f64 poly2 = npyv_setall_f64(0x1.60e88a10163f2p-33);
+    const npyv_f64 poly3 = npyv_setall_f64(-0x1.ae6361b7254e7p-26);
+    const npyv_f64 poly4 = npyv_setall_f64(0x1.71de382e8d62bp-19);
+    const npyv_f64 poly5 = npyv_setall_f64(-0x1.a01a019aeb4ffp-13);
+    const npyv_f64 poly6 = npyv_setall_f64(0x1.111111110b25ep-7);
+    const npyv_f64 poly7 = npyv_setall_f64(-0x1.55555555554c3p-3);
+
+    npyv_f64 r2 = npyv_mul_f64(r, r);
+    npyv_f64 y = npyv_muladd_f64(poly1, r2, poly2);
+    y = npyv_muladd_f64(y, r2, poly3);
+    y = npyv_muladd_f64(y, r2, poly4);
+    y = npyv_muladd_f64(y, r2, poly5);
+    y = npyv_muladd_f64(y, r2, poly6);
+    y = npyv_muladd_f64(y, r2, poly7);
+    y = npyv_muladd_f64(npyv_mul_f64(y, r2), r, r);
+
+    return y;
+}
+
+/* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
+NPY_FINLINE npyv_f64
+simd_range_reduction_pi2(npyv_f64 r, npyv_f64 n) {
+    const npyv_f64 pi1 = npyv_setall_f64(-0x1.921fb54442d18p+1);
+    const npyv_f64 pi2 = npyv_setall_f64(-0x1.1a62633145c06p-53);
+    const npyv_f64 pi3 = npyv_setall_f64(-0x1.c1cd129024e09p-106);
+
+    return simd_range_reduction_f64(r, n, pi1, pi2, pi3);
+}
+
+NPY_FINLINE npyv_b64 simd_cos_range_check_f64(npyv_u64 ir) {
+    const npyv_u64 tiny_bound = npyv_setall_u64(0x202); /* top12 (asuint64 (0x1p-509)).  */
+    const npyv_u64 simd_thresh = npyv_setall_u64(0x214); /* top12 (asuint64 (RangeVal)) - SIMD_TINY_BOUND.  */
+
+    return npyv_cmpge_u64(npyv_sub_u64(npyv_shri_u64(ir, 52), tiny_bound), simd_thresh);
+}
+
+NPY_FINLINE npyv_b64 simd_sin_range_check_f64(npyv_u64 ir) {
+    const npyv_f64 range_val = npyv_setall_f64(0x1p23);
+
+    return npyv_cmpge_u64(ir, npyv_reinterpret_u64_f64(range_val));
+}
+
+NPY_FINLINE npyv_f64
+simd_cos_poly_f64(npyv_f64 r, npyv_u64 ir, npyv_u64 sign)
+{
+    const npyv_f64 inv_pi = npyv_setall_f64(0x1.45f306dc9c883p-2);
+    const npyv_f64 half_pi = npyv_setall_f64(0x1.921fb54442d18p+0);
+    const npyv_f64 shift = npyv_setall_f64(0x1.8p52);
+
+    /* n = rint((|x|+pi/2)/pi) - 0.5.  */
+    npyv_f64 n = npyv_muladd_f64(inv_pi, npyv_add_f64(r, half_pi), shift);
+    npyv_u64 odd = npyv_shli_u64(npyv_reinterpret_u64_f64(n), 63);
+    n = npyv_sub_f64(n, shift);
+    n = npyv_sub_f64(n, npyv_setall_f64(0.5));
+
+    /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
+    r = simd_range_reduction_pi2(r, n);
+
+    /* sin(r) poly approx.  */
+    npyv_f64 y = simd_approx_sine_poly_f64(r);
+
+    /* sign.  */
+    return npyv_reinterpret_f64_u64(npyv_xor_u64(npyv_reinterpret_u64_f64(y), odd));
+}
+
+NPY_FINLINE npyv_f64
+simd_sin_poly_f64(npyv_f64 r, npyv_u64 ir, npyv_u64 sign)
+{
+    const npyv_f64 inv_pi = npyv_setall_f64(0x1.45f306dc9c883p-2);
+    const npyv_f64 shift = npyv_setall_f64(0x1.8p52);
+
+    /* n = rint(|x|/pi).  */
+    npyv_f64 n = npyv_muladd_f64(inv_pi, r, shift);
+    npyv_u64 odd = npyv_shli_u64(npyv_reinterpret_u64_f64(n), 63);
+    n = npyv_sub_f64(n, shift);
+
+    /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2).  */
+    r = simd_range_reduction_pi2(r, n);
+
+    /* sin(r) poly approx.  */
+    npyv_f64 y = simd_approx_sine_poly_f64(r);
+
+    /* sign.  */
+    return npyv_reinterpret_f64_u64(npyv_xor_u64(npyv_xor_u64(npyv_reinterpret_u64_f64(y), sign), odd));
+}
+
+/**begin repeat
+ *  #op = cos, sin#
+ */
+NPY_FINLINE void
+simd_@op@_f64(const double *src, npy_intp ssrc, double *dst, npy_intp sdst, npy_intp len)
+{
+    const npyv_u64 abs_mask = npyv_setall_u64(0x7fffffffffffffff);
+    const int vstep = npyv_nlanes_f64;
+
+    npyv_f64 out = npyv_zero_f64();
+    npyv_f64 x_in;
+
+    for (; len > 0; len -= vstep, src += ssrc*vstep, dst += sdst*vstep) {
+        if (ssrc == 1) {
+            x_in = npyv_load_tillz_f64(src, len);
+        } else {
+            x_in = npyv_loadn_tillz_f64(src, ssrc, len);
+        }
+
+        npyv_u64 ir = npyv_and_u64(npyv_reinterpret_u64_f64(x_in), abs_mask);
+        npyv_f64 r = npyv_reinterpret_f64_u64(ir);
+        npyv_u64 sign = npyv_and_u64(npyv_reinterpret_u64_f64(x_in), npyv_not_u64(abs_mask));
+
+        npyv_b64 cmp = simd_@op@_range_check_f64(ir);
+        /* If fenv exceptions are to be triggered correctly, set any special lanes
+        to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
+        scalar loop later.  */
+        r = npyv_select_f64(cmp, npyv_setall_f64(1.0), r);
+
+        // Some in range, at least one calculation is useful
+        if (!npyv_all_b64(cmp)) {
+            out = simd_@op@_poly_f64(r, ir, sign);
+        }
+
+        if (npyv_any_b64(cmp)) {
+            out = npyv_select_f64(cmp, x_in, out);
+            out = simd_@op@_scalar_f64(out, npyv_tobits_b64(cmp));
+        }
+
+        if (sdst == 1) {
+            npyv_store_till_f64(dst, len, out);
+        } else {
+            npyv_storen_till_f64(dst, sdst, len, out);
+        }
+    }
+    npyv_cleanup();
+}
+/**end repeat**/
+#endif // NPY_SIMD_F64
+
+#if NPY_SIMD_F32
 /*
  * Approximate cosine algorithm for x \in [-PI/4, PI/4]
  * Maximum ULP across all 32-bit floats = 0.875
@@ -198,24 +379,58 @@ simd_sincos_f32(const float *src, npy_intp ssrc, float *dst, npy_intp sdst,
     }
     npyv_cleanup();
 }
-#endif // NPY_SIMD_FMA3
+#endif // NPY_SIMD_FP32
+#endif // NYP_SIMD_FMA3
 
 /**begin repeat
  *  #func = cos, sin#
- *  #enum = SIMD_COMPUTE_COS, SIMD_COMPUTE_SIN#
+ */
+NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_@func@)
+(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
+{
+#if NPY_SIMD_F64 && NPY_SIMD_FMA3
+    const double *src = (double*)args[0];
+          double *dst = (double*)args[1];
+    const int lsize = sizeof(src[0]);
+    const npy_intp ssrc = steps[0] / lsize;
+    const npy_intp sdst = steps[1] / lsize;
+    npy_intp len = dimensions[0];
+    assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0));
+
+    if (is_mem_overlap(src, steps[0], dst, steps[1], len) ||
+        !npyv_loadable_stride_f64(ssrc) || !npyv_storable_stride_f64(sdst)
+    ) {
+        for (; len > 0; --len, src += ssrc, dst += sdst) {
+            simd_@func@_f64(src, 1, dst, 1, 1);
+        }
+    } else {
+        simd_@func@_f64(src, ssrc, dst, sdst, len);
+    }
+#else
+    UNARY_LOOP {
+        const npy_double in1 = *(npy_double *)ip1;
+        *(npy_double *)op1 = npy_@func@(in1);
+    }
+#endif
+}
+/**end repeat**/
+
+/**begin repeat
+ *  #func = sin, cos#
+ *  #enum = SIMD_COMPUTE_SIN, SIMD_COMPUTE_COS#
  */
 NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_@func@)
 (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
 {
-    const float *src = (float*)args[0];
-          float *dst = (float*)args[1];
+#if NPY_SIMD_F32 && NPY_SIMD_FMA3
+    const npy_float *src = (npy_float*)args[0];
+          npy_float *dst = (npy_float*)args[1];
 
     const int lsize = sizeof(src[0]);
     const npy_intp ssrc = steps[0] / lsize;
     const npy_intp sdst = steps[1] / lsize;
     npy_intp len = dimensions[0];
     assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0));
-#if NPY_SIMD_F32 && NPY_SIMD_FMA3
     if (is_mem_overlap(src, steps[0], dst, steps[1], len) ||
         !npyv_loadable_stride_f32(ssrc) || !npyv_storable_stride_f32(sdst)
     ) {
@@ -226,9 +441,9 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_@func@)
         simd_sincos_f32(src, ssrc, dst, sdst, len, @enum@);
     }
 #else
-    for (; len > 0; --len, src += ssrc, dst += sdst) {
-        const float src0 = *src;
-        *dst = npy_@func@f(src0);
+    UNARY_LOOP {
+        const npy_float in1 = *(npy_float *)ip1;
+        *(npy_float *)op1 = npy_@func@f(in1);
     }
 #endif
 }
diff --git a/numpy/core/src/umath/loops_umath_fp.dispatch.c.src b/numpy/core/src/umath/loops_umath_fp.dispatch.c.src
index d6703bfb9..89999e879 100644
--- a/numpy/core/src/umath/loops_umath_fp.dispatch.c.src
+++ b/numpy/core/src/umath/loops_umath_fp.dispatch.c.src
@@ -60,32 +60,6 @@ simd_@func@_@sfx@(const npyv_lanetype_@sfx@ *src, npy_intp ssrc,
 /**end repeat**/
 
 /**begin repeat
- * #func = sin, cos#
- */
-static void
-simd_@func@_f64(const double *src, npy_intp ssrc,
-                      double *dst, npy_intp sdst, npy_intp len)
-{
-    const int vstep = npyv_nlanes_f64;
-    for (; len > 0; len -= vstep, src += ssrc*vstep, dst += sdst*vstep) {
-        npyv_f64 x;
-        if (ssrc == 1) {
-            x = npyv_load_tillz_f64(src, len);
-        } else {
-            x = npyv_loadn_tillz_f64(src, ssrc, len);
-        }
-        npyv_f64 out = __svml_@func@8(x);
-        if (sdst == 1) {
-            npyv_store_till_f64(dst, len, out);
-        } else {
-            npyv_storen_till_f64(dst, sdst, len, out);
-        }
-    }
-    npyv_cleanup();
-}
-/**end repeat**/
-
-/**begin repeat
  * #sfx = f32, f64#
  * #func_suffix = f16, 8#
  */
@@ -277,31 +251,3 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@func@)
 }
 /**end repeat1**/
 /**end repeat**/
-
-/**begin repeat
- *  #func = sin, cos#
- */
-NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_@func@)
-(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
-{
-#if NPY_SIMD && defined(NPY_HAVE_AVX512_SKX) && defined(NPY_CAN_LINK_SVML)
-    const double *src = (double*)args[0];
-          double *dst = (double*)args[1];
-    const int lsize = sizeof(src[0]);
-    const npy_intp ssrc = steps[0] / lsize;
-    const npy_intp sdst = steps[1] / lsize;
-    const npy_intp len = dimensions[0];
-    assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0));
-    if (!is_mem_overlap(src, steps[0], dst, steps[1], len) &&
-        npyv_loadable_stride_f64(ssrc) &&
-        npyv_storable_stride_f64(sdst)) {
-        simd_@func@_f64(src, ssrc, dst, sdst, len);
-        return;
-    }
-#endif
-    UNARY_LOOP {
-        const npy_double in1 = *(npy_double *)ip1;
-        *(npy_double *)op1 = npy_@func@(in1);
-    }
-}
-/**end repeat**/
diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
index aeddac585..baca076ae 100644
--- a/numpy/core/tests/test_array_coercion.py
+++ b/numpy/core/tests/test_array_coercion.py
@@ -39,9 +39,9 @@ def arraylikes():
     yield subclass
 
     class _SequenceLike():
-        # We are giving a warning that array-like's were also expected to be
-        # sequence-like in `np.array([array_like])`. This can be removed
-        # when the deprecation expired (started NumPy 1.20).
+        # Older NumPy versions, sometimes cared whether a protocol array was
+        # also _SequenceLike.  This shouldn't matter, but keep it for now
+        # for __array__ and not the others.
         def __len__(self):
             raise TypeError
 
@@ -62,7 +62,7 @@ def arraylikes():
     yield param(memoryview, id="memoryview")
 
     # Array-interface
-    class ArrayInterface(_SequenceLike):
+    class ArrayInterface:
         def __init__(self, a):
             self.a = a  # need to hold on to keep interface valid
             self.__array_interface__ = a.__array_interface__
@@ -70,7 +70,7 @@ def arraylikes():
     yield param(ArrayInterface, id="__array_interface__")
 
     # Array-Struct
-    class ArrayStruct(_SequenceLike):
+    class ArrayStruct:
         def __init__(self, a):
             self.a = a  # need to hold on to keep struct valid
             self.__array_struct__ = a.__array_struct__
@@ -654,6 +654,14 @@ class TestArrayLikes:
         res = np.array([obj], dtype=object)
         assert res[0] is obj
 
+    @pytest.mark.parametrize("arraylike", arraylikes())
+    @pytest.mark.parametrize("arr", [np.array(0.), np.arange(4)])
+    def test_object_assignment_special_case(self, arraylike, arr):
+        obj = arraylike(arr)
+        empty = np.arange(1, dtype=object)
+        empty[:] = [obj]
+        assert empty[0] is obj
+
     def test_0d_generic_special_case(self):
         class ArraySubclass(np.ndarray):
             def __float__(self):
diff --git a/numpy/core/tests/test_arrayprint.py b/numpy/core/tests/test_arrayprint.py
index 372cb8d41..b92c8ae8c 100644
--- a/numpy/core/tests/test_arrayprint.py
+++ b/numpy/core/tests/test_arrayprint.py
@@ -353,6 +353,33 @@ class TestArray2String:
                 '       [ 501,  502,  503, ...,  999, 1000, 1001]])'
         assert_equal(repr(A), reprA)
 
+    def test_summarize_structure(self):
+        A = (np.arange(2002, dtype="<i8").reshape(2, 1001)
+             .view([('i', "<i8", (1001,))]))
+        strA = ("[[([   0,    1,    2, ...,  998,  999, 1000],)]\n"
+                " [([1001, 1002, 1003, ..., 1999, 2000, 2001],)]]")
+        assert_equal(str(A), strA)
+
+        reprA = ("array([[([   0,    1,    2, ...,  998,  999, 1000],)],\n"
+                 "       [([1001, 1002, 1003, ..., 1999, 2000, 2001],)]],\n"
+                 "      dtype=[('i', '<i8', (1001,))])")
+        assert_equal(repr(A), reprA)
+
+        B = np.ones(2002, dtype=">i8").view([('i', ">i8", (2, 1001))])
+        strB = "[([[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]],)]"
+        assert_equal(str(B), strB)
+
+        reprB = (
+            "array([([[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]],)],\n"
+            "      dtype=[('i', '>i8', (2, 1001))])"
+        )
+        assert_equal(repr(B), reprB)
+
+        C = (np.arange(22, dtype="<i8").reshape(2, 11)
+             .view([('i1', "<i8"), ('i10', "<i8", (10,))]))
+        strC = "[[( 0, [ 1, ..., 10])]\n [(11, [12, ..., 21])]]"
+        assert_equal(np.array2string(C, threshold=1, edgeitems=1), strC)
+
     def test_linewidth(self):
         a = np.full(6, 1)
 
@@ -817,7 +844,7 @@ class TestPrintOptions:
     )
     def test_dtype_endianness_repr(self, native):
         '''
-        there was an issue where 
+        there was an issue where
         repr(array([0], dtype='<u2')) and repr(array([0], dtype='>u2'))
         both returned the same thing:
         array([0], dtype=uint16)
@@ -963,6 +990,16 @@ class TestPrintOptions:
                     [[ 0.]]]])""")
         )
 
+    def test_edgeitems_structured(self):
+        np.set_printoptions(edgeitems=1, threshold=1)
+        A = np.arange(5*2*3, dtype="<i8").view([('i', "<i8", (5, 2, 3))])
+        reprA = (
+            "array([([[[ 0, ...,  2], [ 3, ...,  5]], ..., "
+            "[[24, ..., 26], [27, ..., 29]]],)],\n"
+            "      dtype=[('i', '<i8', (5, 2, 3))])"
+        )
+        assert_equal(repr(A), reprA)
+
     def test_bad_args(self):
         assert_raises(ValueError, np.set_printoptions, threshold=float('nan'))
         assert_raises(TypeError, np.set_printoptions, threshold='1')
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index b92a20a12..0449b3257 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -580,92 +580,6 @@ class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase):
         self.assert_deprecated(check)
 
 
-class TestFutureWarningArrayLikeNotIterable(_DeprecationTestCase):
-    # Deprecated 2020-12-09, NumPy 1.20
-    warning_cls = FutureWarning
-    message = "The input object of type.*but not a sequence"
-
-    @pytest.mark.parametrize("protocol",
-            ["__array__", "__array_interface__", "__array_struct__"])
-    def test_deprecated(self, protocol):
-        """Test that these objects give a warning since they are not 0-D,
-        not coerced at the top level `np.array(obj)`, but nested, and do
-        *not* define the sequence protocol.
-
-        NOTE: Tests for the versions including __len__ and __getitem__ exist
-              in `test_array_coercion.py` and they can be modified or amended
-              when this deprecation expired.
-        """
-        blueprint = np.arange(10)
-        MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)})
-        self.assert_deprecated(lambda: np.array([MyArr()], dtype=object))
-
-    @pytest.mark.parametrize("protocol",
-             ["__array__", "__array_interface__", "__array_struct__"])
-    def test_0d_not_deprecated(self, protocol):
-        # 0-D always worked (albeit it would use __float__ or similar for the
-        # conversion, which may not happen anymore)
-        blueprint = np.array(1.)
-        MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)})
-        myarr = MyArr()
-
-        self.assert_not_deprecated(lambda: np.array([myarr], dtype=object))
-        res = np.array([myarr], dtype=object)
-        expected = np.empty(1, dtype=object)
-        expected[0] = myarr
-        assert_array_equal(res, expected)
-
-    @pytest.mark.parametrize("protocol",
-             ["__array__", "__array_interface__", "__array_struct__"])
-    def test_unnested_not_deprecated(self, protocol):
-        blueprint = np.arange(10)
-        MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)})
-        myarr = MyArr()
-
-        self.assert_not_deprecated(lambda: np.array(myarr))
-        res = np.array(myarr)
-        assert_array_equal(res, blueprint)
-
-    @pytest.mark.parametrize("protocol",
-             ["__array__", "__array_interface__", "__array_struct__"])
-    def test_strange_dtype_handling(self, protocol):
-        """The old code would actually use the dtype from the array, but
-        then end up not using the array (for dimension discovery)
-        """
-        blueprint = np.arange(10).astype("f4")
-        MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol),
-                                   "__float__": lambda _: 0.5})
-        myarr = MyArr()
-
-        # Make sure we warn (and capture the FutureWarning)
-        with pytest.warns(FutureWarning, match=self.message):
-            res = np.array([[myarr]])
-
-        assert res.shape == (1, 1)
-        assert res.dtype == "f4"
-        assert res[0, 0] == 0.5
-
-    @pytest.mark.parametrize("protocol",
-             ["__array__", "__array_interface__", "__array_struct__"])
-    def test_assignment_not_deprecated(self, protocol):
-        # If the result is dtype=object we do not unpack a nested array or
-        # array-like, if it is nested at exactly the right depth.
-        # NOTE: We actually do still call __array__, etc. but ignore the result
-        #       in the end. For `dtype=object` we could optimize that away.
-        blueprint = np.arange(10).astype("f4")
-        MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol),
-                                   "__float__": lambda _: 0.5})
-        myarr = MyArr()
-
-        res = np.empty(3, dtype=object)
-        def set():
-            res[:] = [myarr, myarr, myarr]
-        self.assert_not_deprecated(set)
-        assert res[0] is myarr
-        assert res[1] is myarr
-        assert res[2] is myarr
-
-
 class TestDeprecatedUnpickleObjectScalar(_DeprecationTestCase):
     # Deprecated 2020-11-24, NumPy 1.20
     """
@@ -822,6 +736,18 @@ class TestLoadtxtParseIntsViaFloat(_DeprecationTestCase):
                 assert isinstance(e.__cause__, DeprecationWarning)
 
 
+class TestScalarConversion(_DeprecationTestCase):
+    # 2023-01-02, 1.25.0
+    def test_float_conversion(self):
+        self.assert_deprecated(float, args=(np.array([3.14]),))
+
+    def test_behaviour(self):
+        b = np.array([[3.14]])
+        c = np.zeros(5)
+        with pytest.warns(DeprecationWarning):
+            c[0] = b
+
+
 class TestPyIntConversion(_DeprecationTestCase):
     message = r".*stop allowing conversion of out-of-bound.*"
 
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 4a064827d..984047c87 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -3644,9 +3644,13 @@ class TestMethods:
             msg = 'dtype: {0}'.format(dt)
             ap = complex(a)
             assert_equal(ap, a, msg)
-            bp = complex(b)
+
+            with assert_warns(DeprecationWarning):
+                bp = complex(b)
             assert_equal(bp, b, msg)
-            cp = complex(c)
+
+            with assert_warns(DeprecationWarning):
+                cp = complex(c)
             assert_equal(cp, c, msg)
 
     def test__complex__should_not_work(self):
@@ -3669,7 +3673,8 @@ class TestMethods:
         assert_raises(TypeError, complex, d)
 
         e = np.array(['1+1j'], 'U')
-        assert_raises(TypeError, complex, e)
+        with assert_warns(DeprecationWarning):
+            assert_raises(TypeError, complex, e)
 
 class TestCequenceMethods:
     def test_array_contains(self):
@@ -8756,8 +8761,10 @@ class TestConversion:
         int_funcs = (int, lambda x: x.__int__())
         for int_func in int_funcs:
             assert_equal(int_func(np.array(0)), 0)
-            assert_equal(int_func(np.array([1])), 1)
-            assert_equal(int_func(np.array([[42]])), 42)
+            with assert_warns(DeprecationWarning):
+                assert_equal(int_func(np.array([1])), 1)
+            with assert_warns(DeprecationWarning):
+                assert_equal(int_func(np.array([[42]])), 42)
             assert_raises(TypeError, int_func, np.array([1, 2]))
 
             # gh-9972
@@ -8772,7 +8779,8 @@ class TestConversion:
                     def __trunc__(self):
                         return 3
                 assert_equal(3, int_func(np.array(HasTrunc())))
-                assert_equal(3, int_func(np.array([HasTrunc()])))
+                with assert_warns(DeprecationWarning):
+                    assert_equal(3, int_func(np.array([HasTrunc()])))
             else:
                 pass
 
@@ -8781,8 +8789,9 @@ class TestConversion:
                     raise NotImplementedError
             assert_raises(NotImplementedError,
                 int_func, np.array(NotConvertible()))
-            assert_raises(NotImplementedError,
-                int_func, np.array([NotConvertible()]))
+            with assert_warns(DeprecationWarning):
+                assert_raises(NotImplementedError,
+                    int_func, np.array([NotConvertible()]))
 
 
 class TestWhere:
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 0ed64d72a..438b5600a 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -1426,23 +1426,36 @@ class TestSpecialFloats:
             np.log(a)
 
     @pytest.mark.skipif(IS_WASM, reason="fp errors don't work in wasm")
-    def test_sincos_values(self):
+    @pytest.mark.parametrize('dtype', ['e', 'f', 'd', 'g'])
+    def test_sincos_values(self, dtype):
         with np.errstate(all='ignore'):
             x = [np.nan, np.nan, np.nan, np.nan]
             y = [np.nan, -np.nan, np.inf, -np.inf]
-            for dt in ['e', 'f', 'd', 'g']:
-                xf = np.array(x, dtype=dt)
-                yf = np.array(y, dtype=dt)
-                assert_equal(np.sin(yf), xf)
-                assert_equal(np.cos(yf), xf)
-
+            xf = np.array(x, dtype=dtype)
+            yf = np.array(y, dtype=dtype)
+            assert_equal(np.sin(yf), xf)
+            assert_equal(np.cos(yf), xf)
 
+    @pytest.mark.skipif(IS_WASM, reason="fp errors don't work in wasm")
+    @pytest.mark.parametrize('callable', [np.sin, np.cos])
+    @pytest.mark.parametrize('dtype', ['e', 'f', 'd'])
+    @pytest.mark.parametrize('value', [np.inf, -np.inf])
+    def test_sincos_errors(self, callable, dtype, value):
         with np.errstate(invalid='raise'):
-            for callable in [np.sin, np.cos]:
-                for value in [np.inf, -np.inf]:
-                    for dt in ['e', 'f', 'd']:
-                        assert_raises(FloatingPointError, callable,
-                                np.array([value], dtype=dt))
+            assert_raises(FloatingPointError, callable,
+                np.array([value], dtype=dtype))
+
+    @pytest.mark.parametrize('callable', [np.sin, np.cos])
+    @pytest.mark.parametrize('dtype', ['f', 'd'])
+    @pytest.mark.parametrize('stride', [-1, 1, 2, 4, 5])
+    def test_sincos_overlaps(self, callable, dtype, stride):
+        N = 100
+        M = N // abs(stride)
+        rng = np.random.default_rng(42)
+        x = rng.standard_normal(N, dtype)
+        y = callable(x[::stride])
+        callable(x[::stride], out=x[:M])
+        assert_equal(x[:M], y)
 
     @pytest.mark.parametrize('dt', ['e', 'f', 'd', 'g'])
     def test_sqrt_values(self, dt):
diff --git a/numpy/distutils/command/build_ext.py b/numpy/distutils/command/build_ext.py
index d24162a42..871aa1099 100644
--- a/numpy/distutils/command/build_ext.py
+++ b/numpy/distutils/command/build_ext.py
@@ -259,7 +259,7 @@ class build_ext (old_build_ext):
                     log.warn('resetting extension %r language from %r to %r.' %
                              (ext.name, l, ext_language))
 
-                ext.language = ext_language
+            ext.language = ext_language
 
             # global language
             all_languages.update(ext_languages)
diff --git a/numpy/f2py/crackfortran.py b/numpy/f2py/crackfortran.py
index c085baf12..c0a79bcae 100755
--- a/numpy/f2py/crackfortran.py
+++ b/numpy/f2py/crackfortran.py
@@ -1740,6 +1740,28 @@ def updatevars(typespec, selector, attrspec, entitydecl):
                         d1[k] = unmarkouterparen(d1[k])
                     else:
                         del d1[k]
+
+                if 'len' in d1:
+                    if typespec in ['complex', 'integer', 'logical', 'real']:
+                        if ('kindselector' not in edecl) or (not edecl['kindselector']):
+                            edecl['kindselector'] = {}
+                        edecl['kindselector']['*'] = d1['len']
+                        del d1['len']
+                    elif typespec == 'character':
+                        if ('charselector' not in edecl) or (not edecl['charselector']):
+                            edecl['charselector'] = {}
+                        if 'len' in edecl['charselector']:
+                            del edecl['charselector']['len']
+                        edecl['charselector']['*'] = d1['len']
+                        del d1['len']
+
+                if 'init' in d1:
+                    if '=' in edecl and (not edecl['='] == d1['init']):
+                        outmess('updatevars: attempt to change the init expression of "%s" ("%s") to "%s". Ignoring.\n' % (
+                            ename, edecl['='], d1['init']))
+                    else:
+                        edecl['='] = d1['init']
+
                 if 'len' in d1 and 'array' in d1:
                     if d1['len'] == '':
                         d1['len'] = d1['array']
@@ -1749,6 +1771,7 @@ def updatevars(typespec, selector, attrspec, entitydecl):
                         del d1['len']
                         errmess('updatevars: "%s %s" is mapped to "%s %s(%s)"\n' % (
                             typespec, e, typespec, ename, d1['array']))
+
                 if 'array' in d1:
                     dm = 'dimension(%s)' % d1['array']
                     if 'attrspec' not in edecl or (not edecl['attrspec']):
@@ -1762,23 +1785,6 @@ def updatevars(typespec, selector, attrspec, entitydecl):
                                         % (ename, dm1, dm))
                                 break
 
-                if 'len' in d1:
-                    if typespec in ['complex', 'integer', 'logical', 'real']:
-                        if ('kindselector' not in edecl) or (not edecl['kindselector']):
-                            edecl['kindselector'] = {}
-                        edecl['kindselector']['*'] = d1['len']
-                    elif typespec == 'character':
-                        if ('charselector' not in edecl) or (not edecl['charselector']):
-                            edecl['charselector'] = {}
-                        if 'len' in edecl['charselector']:
-                            del edecl['charselector']['len']
-                        edecl['charselector']['*'] = d1['len']
-                if 'init' in d1:
-                    if '=' in edecl and (not edecl['='] == d1['init']):
-                        outmess('updatevars: attempt to change the init expression of "%s" ("%s") to "%s". Ignoring.\n' % (
-                            ename, edecl['='], d1['init']))
-                    else:
-                        edecl['='] = d1['init']
             else:
                 outmess('updatevars: could not crack entity declaration "%s". Ignoring.\n' % (
                     ename + m.group('after')))
diff --git a/numpy/f2py/tests/src/string/scalar_string.f90 b/numpy/f2py/tests/src/string/scalar_string.f90
index d847668bb..f8f076172 100644
--- a/numpy/f2py/tests/src/string/scalar_string.f90
+++ b/numpy/f2py/tests/src/string/scalar_string.f90
@@ -1,7 +1,9 @@
 MODULE string_test
 
   character(len=8) :: string
+  character string77 * 8
 
   character(len=12), dimension(5,7) :: strarr
+  character strarr77(5,7) * 12
 
 END MODULE string_test
diff --git a/numpy/f2py/tests/test_character.py b/numpy/f2py/tests/test_character.py
index 5f9805158..0bb0f4290 100644
--- a/numpy/f2py/tests/test_character.py
+++ b/numpy/f2py/tests/test_character.py
@@ -576,16 +576,18 @@ class TestStringScalarArr(util.F2PyTest):
 
     @pytest.mark.slow
     def test_char(self):
-        out = self.module.string_test.string
-        expected = ()
-        assert out.shape == expected
-        expected = '|S8'
-        assert out.dtype == expected
+        for out in (self.module.string_test.string,
+                    self.module.string_test.string77):
+            expected = ()
+            assert out.shape == expected
+            expected = '|S8'
+            assert out.dtype == expected
 
     @pytest.mark.slow
     def test_char_arr(self):
-        out = self.module.string_test.strarr
-        expected = (5,7)
-        assert out.shape == expected
-        expected = '|S12'
-        assert out.dtype == expected
+        for out in (self.module.string_test.strarr,
+                    self.module.string_test.strarr77):
+            expected = (5,7)
+            assert out.shape == expected
+            expected = '|S12'
+            assert out.dtype == expected
diff --git a/numpy/f2py/tests/test_crackfortran.py b/numpy/f2py/tests/test_crackfortran.py
index 886fc596e..dc0f7e27a 100644
--- a/numpy/f2py/tests/test_crackfortran.py
+++ b/numpy/f2py/tests/test_crackfortran.py
@@ -290,39 +290,29 @@ class TestNameArgsPatternBacktracking:
     def test_nameargspattern_backtracking(self, adversary):
         '''address ReDOS vulnerability:
         https://github.com/numpy/numpy/issues/23338'''
-        last_median = 0.
-        trials_per_count = 128
+        trials_per_batch = 12
+        batches_per_regex = 4
         start_reps, end_reps = 15, 25
-        times_median_doubled = 0
         for ii in range(start_reps, end_reps):
             repeated_adversary = adversary * ii
-            times = []
-            for _ in range(trials_per_count):
-                t0 = time.perf_counter()
-                mtch = nameargspattern.search(repeated_adversary)
-                times.append(time.perf_counter() - t0)
-            # We should use a measure of time that's resilient to outliers.
-            # Times jump around a lot due to the CPU's scheduler.
-            median = np.median(times)
+            # test times in small batches.
+            # this gives us more chances to catch a bad regex
+            # while still catching it before too long if it is bad
+            for _ in range(batches_per_regex):
+                times = []
+                for _ in range(trials_per_batch):
+                    t0 = time.perf_counter()
+                    mtch = nameargspattern.search(repeated_adversary)
+                    times.append(time.perf_counter() - t0)
+                # our pattern should be much faster than 0.2s per search
+                # it's unlikely that a bad regex will pass even on fast CPUs
+                assert np.median(times) < 0.2
             assert not mtch
             # if the adversary is capped with @)@, it becomes acceptable
             # according to the old version of the regex.
             # that should still be true.
             good_version_of_adversary = repeated_adversary + '@)@'
             assert nameargspattern.search(good_version_of_adversary)
-            if ii > start_reps:
-                # the hallmark of exponentially catastrophic backtracking
-                # is that runtime doubles for every added instance of
-                # the problematic pattern.
-                times_median_doubled += median > 2 * last_median
-                # also try to rule out non-exponential but still bad cases
-                # arbitrarily, we should set a hard limit of 10ms as too slow
-                assert median < trials_per_count * 0.01
-            last_median = median
-        # we accept that maybe the median might double once, due to
-        # the CPU scheduler acting weird or whatever. More than that
-        # seems suspicious.
-        assert times_median_doubled < 2
 
 
 class TestFunctionReturn(util.F2PyTest):
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index f8f2ab7a2..22fb0eb7d 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -262,6 +262,9 @@ class NpzFile(Mapping):
         else:
             raise KeyError("%s is not a file in the archive" % key)
 
+    def __contains__(self, key):
+        return (key in self._files or key in self.files)
+
     def __repr__(self):
         # Get filename or default to `object`
         if isinstance(self.fid, str):
diff --git a/numpy/lib/npyio.pyi b/numpy/lib/npyio.pyi
index 9dd3d6809..cc81e82b7 100644
--- a/numpy/lib/npyio.pyi
+++ b/numpy/lib/npyio.pyi
@@ -98,6 +98,7 @@ class NpzFile(Mapping[str, NDArray[Any]]):
     def __iter__(self) -> Iterator[str]: ...
     def __len__(self) -> int: ...
     def __getitem__(self, key: str) -> NDArray[Any]: ...
+    def __contains__(self, key: str) -> bool: ...
     def __repr__(self) -> str: ...
 
 # NOTE: Returns a `NpzFile` if file is a zip file;
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index d2bd3d47e..0cf748dfd 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -3340,6 +3340,10 @@ class MaskedArray(ndarray):
                 # Note: Don't try to check for m.any(), that'll take too long
         return dout
 
+    # setitem may put NaNs into integer arrays or occasionally overflow a
+    # float.  But this may happen in masked values, so avoid otherwise
+    # correct warnings (as is typical also in masked calculations).
+    @np.errstate(over='ignore', invalid='ignore')
     def __setitem__(self, indx, value):
         """
         x.__setitem__(i, y) <==> x[i]=y
@@ -4631,6 +4635,7 @@ class MaskedArray(ndarray):
             otherwise.  'K' means to read the elements in the order they occur
             in memory, except for reversing the data when strides are negative.
             By default, 'C' index order is used.
+            (Masked arrays currently use 'A' on the data when 'K' is passed.)
 
         Returns
         -------
@@ -4657,6 +4662,13 @@ class MaskedArray(ndarray):
                fill_value=999999)
 
         """
+        # The order of _data and _mask could be different (it shouldn't be
+        # normally).  Passing order `K` or `A` would be incorrect.
+        # So we ignore the mask memory order.
+        # TODO: We don't actually support K, so use A instead.  We could
+        #       try to guess this correct by sorting strides or deprecate.
+        if order in "kKaA":
+            order = "C" if self._data.flags.fnc else "F"
         r = ndarray.ravel(self._data, order=order).view(type(self))
         r._update_from(self)
         if self._mask is not nomask:
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index 5db01b74a..9a4b74997 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -377,6 +377,24 @@ class TestMaskedArray:
         assert_equal(s1, s2)
         assert_(x1[1:1].shape == (0,))
 
+    def test_setitem_no_warning(self):
+        # Setitem shouldn't warn, because the assignment might be masked
+        # and warning for a masked assignment is weird (see gh-23000)
+        # (When the value is masked, otherwise a warning would be acceptable
+        # but is not given currently.)
+        x = np.ma.arange(60).reshape((6, 10))
+        index = (slice(1, 5, 2), [7, 5])
+        value = np.ma.masked_all((2, 2))
+        value._data[...] = np.inf  # not a valid integer...
+        x[index] = value
+        # The masked scalar is special cased, but test anyway (it's NaN):
+        x[...] = np.ma.masked
+        # Finally, a large value that cannot be cast to the float32 `x`
+        x = np.ma.arange(3., dtype=np.float32)
+        value = np.ma.array([2e234, 1, 1], mask=[True, False, False])
+        x[...] = value
+        x[[0, 1, 2]] = value
+
     @suppress_copy_mask_on_assignment
     def test_copy(self):
         # Tests of some subtle points of copying and sizing.
@@ -3411,6 +3429,22 @@ class TestMaskedArrayMethods:
         assert_equal(a.ravel(order='C'), [1, 2, 3, 4])
         assert_equal(a.ravel(order='F'), [1, 3, 2, 4])
 
+    @pytest.mark.parametrize("order", "AKCF")
+    @pytest.mark.parametrize("data_order", "CF")
+    def test_ravel_order(self, order, data_order):
+        # Ravelling must ravel mask and data in the same order always to avoid
+        # misaligning the two in the ravel result.
+        arr = np.ones((5, 10), order=data_order)
+        arr[0, :] = 0
+        mask = np.ones((10, 5), dtype=bool, order=data_order).T
+        mask[0, :] = False
+        x = array(arr, mask=mask)
+        assert x._data.flags.fnc != x._mask.flags.fnc
+        assert (x.filled(0) == 0).all()
+        raveled = x.ravel(order)
+        assert (raveled.filled(0) == 0).all()
+
+
     def test_reshape(self):
         # Tests reshape
         x = arange(4)
author	Sebastian Berg <sebastianb@nvidia.com>	2023-04-26 10:51:21 +0200
committer	GitHub <noreply@github.com>	2023-04-26 10:51:21 +0200
commit	b4f0e4122e83115d99de849fcfc4fd1fc8b8f65d (patch)
tree	0695fcc40428a235161418f2bd288f9a90e3d640 /numpy
parent	c7ff4118a345c966e2a0fc688e054e3fd9191e99 (diff)
parent	5665fdcdf21dec575d204852b208eaecf1a1638d (diff)
download	numpy-b4f0e4122e83115d99de849fcfc4fd1fc8b8f65d.tar.gz