diff options
author | Sebastian Berg <sebastianb@nvidia.com> | 2023-04-26 10:51:21 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-26 10:51:21 +0200 |
commit | b4f0e4122e83115d99de849fcfc4fd1fc8b8f65d (patch) | |
tree | 0695fcc40428a235161418f2bd288f9a90e3d640 /numpy | |
parent | c7ff4118a345c966e2a0fc688e054e3fd9191e99 (diff) | |
parent | 5665fdcdf21dec575d204852b208eaecf1a1638d (diff) | |
download | numpy-b4f0e4122e83115d99de849fcfc4fd1fc8b8f65d.tar.gz |
Merge branch 'main' into f2pyFuncFix_23598
Diffstat (limited to 'numpy')
49 files changed, 1734 insertions, 986 deletions
diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py index 86fe5583c..58661ed09 100644 --- a/numpy/core/_add_newdocs_scalars.py +++ b/numpy/core/_add_newdocs_scalars.py @@ -204,7 +204,11 @@ add_newdoc_for_scalar_type('str_', ['unicode_'], r""" A unicode string. - When used in arrays, this type strips trailing null codepoints. + This type strips trailing null codepoints. + + >>> s = np.str_("abc\x00") + >>> s + 'abc' Unlike the builtin `str`, this supports the :ref:`python:bufferobjects`, exposing its contents as UCS4: diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index dcfb6e6a8..62cd52707 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -1096,7 +1096,7 @@ def format_float_scientific(x, precision=None, unique=True, trim='k', identify the value may be printed and rounded unbiased. -- versionadded:: 1.21.0 - + Returns ------- rep : string @@ -1181,7 +1181,7 @@ def format_float_positional(x, precision=None, unique=True, Minimum number of digits to print. Only has an effect if `unique=True` in which case additional digits past those necessary to uniquely identify the value may be printed, rounding the last additional digit. - + -- versionadded:: 1.21.0 Returns @@ -1339,13 +1339,29 @@ class TimedeltaFormat(_TimelikeFormat): class SubArrayFormat: - def __init__(self, format_function): + def __init__(self, format_function, **options): self.format_function = format_function + self.threshold = options['threshold'] + self.edge_items = options['edgeitems'] + + def __call__(self, a): + self.summary_insert = "..." if a.size > self.threshold else "" + return self.format_array(a) + + def format_array(self, a): + if np.ndim(a) == 0: + return self.format_function(a) + + if self.summary_insert and a.shape[0] > 2*self.edge_items: + formatted = ( + [self.format_array(a_) for a_ in a[:self.edge_items]] + + [self.summary_insert] + + [self.format_array(a_) for a_ in a[-self.edge_items:]] + ) + else: + formatted = [self.format_array(a_) for a_ in a] - def __call__(self, arr): - if arr.ndim <= 1: - return "[" + ", ".join(self.format_function(a) for a in arr) + "]" - return "[" + ", ".join(self.__call__(a) for a in arr) + "]" + return "[" + ", ".join(formatted) + "]" class StructuredVoidFormat: @@ -1369,7 +1385,7 @@ class StructuredVoidFormat: for field_name in data.dtype.names: format_function = _get_format_function(data[field_name], **options) if data.dtype[field_name].shape != (): - format_function = SubArrayFormat(format_function) + format_function = SubArrayFormat(format_function, **options) format_functions.append(format_function) return cls(format_functions) @@ -1428,7 +1444,7 @@ def dtype_is_implied(dtype): # not just void types can be structured, and names are not part of the repr if dtype.names is not None: return False - + # should care about endianness *unless size is 1* (e.g., int8, bool) if not dtype.isnative: return False diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index 1fe2e43c0..d0306bb72 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -799,7 +799,7 @@ defdict = { None, TD('e', dispatch=[('loops_umath_fp', 'e')]), TD('f', dispatch=[('loops_trigonometric', 'f')]), - TD('d', dispatch=[('loops_umath_fp', 'd')]), + TD('d', dispatch=[('loops_trigonometric', 'd')]), TD('g' + cmplx, f='cos'), TD(P, f='cos'), ), @@ -809,7 +809,7 @@ defdict = { None, TD('e', dispatch=[('loops_umath_fp', 'e')]), TD('f', dispatch=[('loops_trigonometric', 'f')]), - TD('d', dispatch=[('loops_umath_fp', 'd')]), + TD('d', dispatch=[('loops_trigonometric', 'd')]), TD('g' + cmplx, f='sin'), TD(P, f='sin'), ), diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py index 05f947c15..1695b4d27 100644 --- a/numpy/core/code_generators/ufunc_docstrings.py +++ b/numpy/core/code_generators/ufunc_docstrings.py @@ -3833,7 +3833,7 @@ add_newdoc('numpy.core.umath', 'sqrt', -------- emath.sqrt A version which returns complex numbers when given negative reals. - Note: 0.0 and -0.0 are handled differently for complex inputs. + Note that 0.0 and -0.0 are handled differently for complex inputs. Notes ----- diff --git a/numpy/core/function_base.py b/numpy/core/function_base.py index c82b495b1..00e4e6b0e 100644 --- a/numpy/core/function_base.py +++ b/numpy/core/function_base.py @@ -168,7 +168,7 @@ def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, y += start if endpoint and num > 1: - y[-1] = stop + y[-1, ...] = stop if axis != 0: y = _nx.moveaxis(y, 0, axis) diff --git a/numpy/core/include/numpy/_dtype_api.h b/numpy/core/include/numpy/_dtype_api.h index 2f801eace..c397699c7 100644 --- a/numpy/core/include/numpy/_dtype_api.h +++ b/numpy/core/include/numpy/_dtype_api.h @@ -5,14 +5,14 @@ #ifndef NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_ #define NUMPY_CORE_INCLUDE_NUMPY___DTYPE_API_H_ -#define __EXPERIMENTAL_DTYPE_API_VERSION 9 +#define __EXPERIMENTAL_DTYPE_API_VERSION 10 struct PyArrayMethodObject_tag; /* * Largely opaque struct for DType classes (i.e. metaclass instances). * The internal definition is currently in `ndarraytypes.h` (export is a bit - * more complex because `PyArray_Descr` is a DTypeMeta internall but not + * more complex because `PyArray_Descr` is a DTypeMeta internally but not * externally). */ #if !(defined(NPY_INTERNAL_BUILD) && NPY_INTERNAL_BUILD) @@ -199,9 +199,8 @@ typedef int (get_reduction_initial_function)( PyArrayMethod_Context *context, npy_bool reduction_is_empty, char *initial); - /* - * The following functions are only used be the wrapping array method defined + * The following functions are only used by the wrapping array method defined * in umath/wrapping_array_method.c */ @@ -257,14 +256,13 @@ typedef int translate_loop_descrs_func(int nin, int nout, * strided-loop function. This is designed for loops that need to visit every * element of a single array. * - * Currently this is only used for array clearing, via the - * NPY_DT_get_clear_loop, api hook, particularly for arrays storing embedded - * references to python objects or heap-allocated data. If you define a dtype - * that uses embedded references, the NPY_ITEM_REFCOUNT flag must be set on the - * dtype instance. + * Currently this is used for array clearing, via the NPY_DT_get_clear_loop + * API hook, and zero-filling, via the NPY_DT_get_fill_zero_loop API hook. + * These are most useful for handling arrays storing embedded references to + * python objects or heap-allocated data. * * The `void *traverse_context` is passed in because we may need to pass in - * Intepreter state or similar in the futurem, but we don't want to pass in + * Intepreter state or similar in the future, but we don't want to pass in * a full context (with pointers to dtypes, method, caller which all make * no sense for a traverse function). * @@ -280,9 +278,10 @@ typedef int (traverse_loop_function)( /* * Simplified get_loop function specific to dtype traversal * - * Currently this is only used for clearing arrays. It should set the flags - * needed for the traversal loop and set out_loop to the loop function, which - * must be a valid traverse_loop_function pointer. + * It should set the flags needed for the traversal loop and set out_loop to the + * loop function, which must be a valid traverse_loop_function + * pointer. Currently this is used for zero-filling and clearing arrays storing + * embedded references. * */ typedef int (get_traverse_loop_function)( @@ -319,6 +318,7 @@ typedef int (get_traverse_loop_function)( #define NPY_DT_setitem 7 #define NPY_DT_getitem 8 #define NPY_DT_get_clear_loop 9 +#define NPY_DT_get_fill_zero_loop 10 // These PyArray_ArrFunc slots will be deprecated and replaced eventually // getitem and setitem can be defined as a performance optimization; diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h index 3b31bcf2d..728cedbf1 100644 --- a/numpy/core/include/numpy/npy_common.h +++ b/numpy/core/include/numpy/npy_common.h @@ -105,6 +105,14 @@ #define NPY_FINLINE static #endif +#if defined(_MSC_VER) + #define NPY_NOINLINE static __declspec(noinline) +#elif defined(__GNUC__) || defined(__clang__) + #define NPY_NOINLINE static __attribute__((noinline)) +#else + #define NPY_NOINLINE static +#endif + #ifdef HAVE___THREAD #define NPY_TLS __thread #else diff --git a/numpy/core/meson.build b/numpy/core/meson.build index e968fb336..e51a7ba3b 100644 --- a/numpy/core/meson.build +++ b/numpy/core/meson.build @@ -430,6 +430,7 @@ _numpyconfig_h = configure_file( # ---------------------------- staticlib_cflags = [] +staticlib_cppflags = [] if cc.get_id() == 'msvc' # Disable voltbl section for vc142 to allow link using mingw-w64; see: # https://github.com/matthew-brett/dll_investigation/issues/1#issuecomment-1100468171 @@ -443,6 +444,7 @@ endif # https://mesonbuild.com/Build-options.html#build-options if get_option('disable-simd-optimizations') staticlib_cflags += '-DNPY_DISABLE_OPTIMIZATION' + staticlib_cppflags += '-DNPY_DISABLE_OPTIMIZATION' endif npy_math_internal_h = custom_target( @@ -455,12 +457,13 @@ npymath_sources = [ src_file.process('src/npymath/ieee754.c.src'), src_file.process('src/npymath/npy_math_complex.c.src'), npy_math_internal_h, - 'src/npymath/halffloat.c', + 'src/npymath/halffloat.cpp', 'src/npymath/npy_math.c', ] npymath_lib = static_library('npymath', npymath_sources, c_args: staticlib_cflags, + cpp_args: staticlib_cppflags, include_directories: ['include', 'src/npymath', 'src/common'], dependencies: py_dep, install: true, diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 680c2a5f6..8ef012f70 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -669,7 +669,7 @@ def configuration(parent_package='',top_path=None): # join('src', 'npymath', 'ieee754.cpp'), join('src', 'npymath', 'ieee754.c.src'), join('src', 'npymath', 'npy_math_complex.c.src'), - join('src', 'npymath', 'halffloat.c'), + join('src', 'npymath', 'halffloat.cpp'), ] config.add_installed_library('npymath', @@ -727,7 +727,8 @@ def configuration(parent_package='',top_path=None): join('src', 'common', 'numpyos.h'), join('src', 'common', 'npy_cpu_dispatch.h'), join('src', 'common', 'simd', 'simd.h'), - ] + join('src', 'common', 'common.hpp'), + ] common_src = [ join('src', 'common', 'array_assign.c'), @@ -1010,9 +1011,6 @@ def configuration(parent_package='',top_path=None): svml_objs.sort() config.add_extension('_multiarray_umath', - # Forcing C language even though we have C++ sources. - # It forces the C linker and don't link C++ runtime. - language = 'c', sources=multiarray_src + umath_src + common_src + [generate_config_h, diff --git a/numpy/core/src/common/common.hpp b/numpy/core/src/common/common.hpp index 47d790bcf..44ba449d8 100644 --- a/numpy/core/src/common/common.hpp +++ b/numpy/core/src/common/common.hpp @@ -4,8 +4,11 @@ * The following C++ headers are safe to be used standalone, however, * they are gathered to make it easy for us and for the future need to support PCH. */ +#include "npdef.hpp" +#include "utils.hpp" #include "npstd.hpp" #include "half.hpp" #include "meta.hpp" +#include "float_status.hpp" #endif // NUMPY_CORE_SRC_COMMON_COMMON_HPP diff --git a/numpy/core/src/common/float_status.hpp b/numpy/core/src/common/float_status.hpp new file mode 100644 index 000000000..8e4d5e06a --- /dev/null +++ b/numpy/core/src/common/float_status.hpp @@ -0,0 +1,134 @@ +#ifndef NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP +#define NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP + +#include "npstd.hpp" + +#include <fenv.h> + +namespace np { + +/// @addtogroup cpp_core_utility +/// @{ +/** + * Class wraps floating-point environment operations, + * provides lazy access to its functionality. + */ +class FloatStatus { + public: +/* + * According to the C99 standard FE_DIVBYZERO, etc. may not be provided when + * unsupported. In such cases NumPy will not report these correctly, but we + * should still allow compiling (whether tests pass or not). + * By defining them as 0 locally, we make them no-ops. Unlike these defines, + * for example `musl` still defines all of the functions (as no-ops): + * https://git.musl-libc.org/cgit/musl/tree/src/fenv/fenv.c + * and does similar replacement in its tests: + * http://nsz.repo.hu/git/?p=libc-test;a=blob;f=src/common/mtest.h;h=706c1ba23ea8989b17a2f72ed1a919e187c06b6a;hb=HEAD#l30 + */ +#ifdef FE_DIVBYZERO + static constexpr int kDivideByZero = FE_DIVBYZERO; +#else + static constexpr int kDivideByZero = 0; +#endif +#ifdef FE_INVALID + static constexpr int kInvalid = FE_INVALID; +#else + static constexpr int kInvalid = 0; +#endif +#ifdef FE_INEXACT + static constexpr int kInexact = FE_INEXACT; +#else + static constexpr int kInexact = 0; +#endif +#ifdef FE_OVERFLOW + static constexpr int kOverflow = FE_OVERFLOW; +#else + static constexpr int kOverflow = 0; +#endif +#ifdef FE_UNDERFLOW + static constexpr int kUnderflow = FE_UNDERFLOW; +#else + static constexpr int kUnderflow = 0; +#endif + static constexpr int kAllExcept = (kDivideByZero | kInvalid | kInexact | + kOverflow | kUnderflow); + + FloatStatus(bool clear_on_dst=true) + : clear_on_dst_(clear_on_dst) + { + if constexpr (kAllExcept != 0) { + fpstatus_ = fetestexcept(kAllExcept); + } + else { + fpstatus_ = 0; + } + } + ~FloatStatus() + { + if constexpr (kAllExcept != 0) { + if (fpstatus_ != 0 && clear_on_dst_) { + feclearexcept(kAllExcept); + } + } + } + constexpr bool IsDivideByZero() const + { + return (fpstatus_ & kDivideByZero) != 0; + } + constexpr bool IsInexact() const + { + return (fpstatus_ & kInexact) != 0; + } + constexpr bool IsInvalid() const + { + return (fpstatus_ & kInvalid) != 0; + } + constexpr bool IsOverFlow() const + { + return (fpstatus_ & kOverflow) != 0; + } + constexpr bool IsUnderFlow() const + { + return (fpstatus_ & kUnderflow) != 0; + } + static void RaiseDivideByZero() + { + if constexpr (kDivideByZero != 0) { + feraiseexcept(kDivideByZero); + } + } + static void RaiseInexact() + { + if constexpr (kInexact != 0) { + feraiseexcept(kInexact); + } + } + static void RaiseInvalid() + { + if constexpr (kInvalid != 0) { + feraiseexcept(kInvalid); + } + } + static void RaiseOverflow() + { + if constexpr (kOverflow != 0) { + feraiseexcept(kOverflow); + } + } + static void RaiseUnderflow() + { + if constexpr (kUnderflow != 0) { + feraiseexcept(kUnderflow); + } + } + + private: + bool clear_on_dst_; + int fpstatus_; +}; + +/// @} cpp_core_utility +} // namespace np + +#endif // NUMPY_CORE_SRC_COMMON_FLOAT_STATUS_HPP + diff --git a/numpy/core/src/common/half.hpp b/numpy/core/src/common/half.hpp index 399f2fa79..e5f3f7a40 100644 --- a/numpy/core/src/common/half.hpp +++ b/numpy/core/src/common/half.hpp @@ -3,11 +3,14 @@ #include "npstd.hpp" +#include "npy_cpu_dispatch.h" // NPY_HAVE_CPU_FEATURES +#include "half_private.hpp" + // TODO(@seiko2plus): // - covers half-precision operations that being supported by numpy/halffloat.h -// - support __fp16 -// - optimize x86 half<->single via cpu_fp16 -// - optimize ppc64 half<->single via cpu_vsx3 +// - add support for arithmetic operations +// - enables __fp16 causes massive FP exceptions on aarch64, +// needs a deep investigation namespace np { @@ -16,48 +19,246 @@ namespace np { /// Provides a type that implements 16-bit floating point (half-precision). /// This type is ensured to be 16-bit size. +#if 1 // ndef __ARM_FP16_FORMAT_IEEE class Half final { - public: - /// @name Public Constructors - /// @{ + public: + /// Whether `Half` has a full native HW support. + static constexpr bool kNative = false; + /// Whether `Half` has a native HW support for single/double conversion. + template<typename T> + static constexpr bool kNativeConversion = ( + ( + std::is_same_v<T, float> && + #if defined(NPY_HAVE_FP16) || defined(NPY_HAVE_VSX3) + true + #else + false + #endif + ) || ( + std::is_same_v<T, double> && + #if defined(NPY_HAVE_AVX512FP16) || defined(NPY_HAVE_VSX3) + true + #else + false + #endif + ) + ); /// Default constructor. initialize nothing. Half() = default; - /// Copy. - Half(const Half &r) + + /// Constract from float + /// If there are no hardware optimization available, rounding will always + /// be set to ties to even. + explicit Half(float f) { - data_.u = r.data_.u; + #if defined(NPY_HAVE_FP16) + __m128 mf = _mm_load_ss(&f); + bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_cvtps_ph(mf, _MM_FROUND_TO_NEAREST_INT))); + #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM) + __vector float vf32 = vec_splats(f); + __vector unsigned short vf16; + __asm__ __volatile__ ("xvcvsphp %x0,%x1" : "=wa" (vf16) : "wa" (vf32)); + bits_ = vec_extract(vf16, 0); + #else + bits_ = half_private::FromFloatBits(BitCast<uint32_t>(f)); + #endif } - /// @} + /// Construct from double. + /// If there are no hardware optimization available, rounding will always + /// be set to ties to even. + explicit Half(double f) + { + #if defined(NPY_HAVE_AVX512FP16) + __m128d md = _mm_load_sd(&f); + bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_castph_si128(_mm_cvtpd_ph(mf)))); + #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM) + __vector double vf64 = vec_splats(f); + __vector unsigned short vf16; + __asm__ __volatile__ ("xvcvdphp %x0,%x1" : "=wa" (vf16) : "wa" (vf64)); + bits_ = vec_extract(vf16, 0); + #else + bits_ = half_private::FromDoubleBits(BitCast<uint64_t>(f)); + #endif + } + + /// Cast to float + explicit operator float() const + { + #if defined(NPY_HAVE_FP16) + float ret; + _mm_store_ss(&ret, _mm_cvtph_ps(_mm_cvtsi32_si128(bits_))); + return ret; + #elif defined(NPY_HAVE_VSX3) && defined(vec_extract_fp_from_shorth) + return vec_extract(vec_extract_fp_from_shorth(vec_splats(bits_)), 0); + #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM) + __vector float vf32; + __asm__ __volatile__("xvcvhpsp %x0,%x1" + : "=wa"(vf32) + : "wa"(vec_splats(bits_.u))); + return vec_extract(vf32, 0); + #else + return BitCast<float>(half_private::ToFloatBits(bits_)); + #endif + } + + /// Cast to double + explicit operator double() const + { + #if defined(NPY_HAVE_AVX512FP16) + double ret; + _mm_store_sd(&ret, _mm_cvtph_pd(_mm_castsi128_ph(_mm_cvtsi32_si128(bits_)))); + return ret; + #elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM) + __vector float vf64; + __asm__ __volatile__("xvcvhpdp %x0,%x1" + : "=wa"(vf32) + : "wa"(vec_splats(bits_))); + return vec_extract(vf64, 0); + #else + return BitCast<double>(half_private::ToDoubleBits(bits_)); + #endif + } /// Returns a new Half constracted from the IEEE 754 binary16. - /// @param b the value of binary16. - static Half FromBits(uint16_t b) + static constexpr Half FromBits(uint16_t bits) { - Half f; - f.data_.u = b; - return f; + Half h{}; + h.bits_ = bits; + return h; } /// Returns the IEEE 754 binary16 representation. - uint16_t Bits() const + constexpr uint16_t Bits() const { - return data_.u; + return bits_; } - private: - union { - uint16_t u; -/* -TODO(@seiko2plus): support __fp16 -#ifdef NPY_HAVE_HW_FP16 - __fp16 f; -#endif -*/ - } data_; + /// @name Comparison operators (orderd) + /// @{ + constexpr bool operator==(Half r) const + { + return !(IsNaN() || r.IsNaN()) && Equal(r); + } + constexpr bool operator<(Half r) const + { + return !(IsNaN() || r.IsNaN()) && Less(r); + } + constexpr bool operator<=(Half r) const + { + return !(IsNaN() || r.IsNaN()) && LessEqual(r); + } + constexpr bool operator>(Half r) const + { + return r < *this; + } + constexpr bool operator>=(Half r) const + { + return r <= *this; + } + /// @} + + /// @name Comparison operators (unorderd) + /// @{ + constexpr bool operator!=(Half r) const + { + return !(*this == r); + } + /// @} Comparison operators + + /// @name Comparison with no guarantee of NaN behavior + /// @{ + constexpr bool Less(Half r) const + { + uint_fast16_t a = static_cast<uint_fast16_t>(bits_), + b = static_cast<uint_fast16_t>(r.bits_); + bool sign_a = (a & 0x8000u) == 0x8000u; + bool sign_b = (b & 0x8000u) == 0x8000u; + // if both `a` and `b` have same sign + // Test if `a` > `b` when `a` has the sign + // or `a` < `b` when is not. + // And make sure they are not equal to each other + // in case of both are equal to +-0 + // else + // Test if `a` has the sign. + // and `a` != -0.0 and `b` != 0.0 + return (sign_a == sign_b) ? (sign_a ^ (a < b)) && (a != b) + : sign_a && ((a | b) != 0x8000u); + } + constexpr bool LessEqual(Half r) const + { + uint_fast16_t a = static_cast<uint_fast16_t>(bits_), + b = static_cast<uint_fast16_t>(r.bits_); + bool sign_a = (a & 0x8000u) == 0x8000u; + bool sign_b = (b & 0x8000u) == 0x8000u; + // if both `a` and `b` have same sign + // Test if `a` > `b` when `a` has the sign + // or `a` < `b` when is not. + // or a == b (needed even if we used <= above instead + // since testing +-0 still required) + // else + // Test if `a` has the sign + // or `a` and `b` equal to +-0.0 + return (sign_a == sign_b) ? (sign_a ^ (a < b)) || (a == b) + : sign_a || ((a | b) == 0x8000u); + } + constexpr bool Equal(Half r) const + { + // fast16 cast is not worth it, since unpack op should involved. + uint16_t a = bits_, b = r.bits_; + return a == b || ((a | b) == 0x8000u); + } + /// @} Comparison + + /// @name Properties + // @{ + constexpr bool IsNaN() const + { + return ((bits_ & 0x7c00u) == 0x7c00u) && + ((bits_ & 0x03ffu) != 0); + } + /// @} Properties + + private: + uint16_t bits_; +}; +#else // __ARM_FP16_FORMAT_IEEE +class Half final { + public: + static constexpr bool kNative = true; + template<typename T> + static constexpr bool kNativeConversion = ( + std::is_same_v<T, float> || std::is_same_v<T, double> + ); + Half() = default; + constexpr Half(__fp16 h) : half_(h) + {} + constexpr operator __fp16() const + { return half_; } + static Half FromBits(uint16_t bits) + { + Half h; + h.half_ = BitCast<__fp16>(bits); + return h; + } + uint16_t Bits() const + { return BitCast<uint16_t>(half_); } + constexpr bool Less(Half r) const + { return half_ < r.half_; } + constexpr bool LessEqual(Half r) const + { return half_ <= r.half_; } + constexpr bool Equal(Half r) const + { return half_ == r.half_; } + constexpr bool IsNaN() const + { return half_ != half_; } + + private: + __fp16 half_; }; +#endif // __ARM_FP16_FORMAT_IEEE /// @} cpp_core_types } // namespace np + #endif // NUMPY_CORE_SRC_COMMON_HALF_HPP diff --git a/numpy/core/src/common/half_private.hpp b/numpy/core/src/common/half_private.hpp new file mode 100644 index 000000000..7a64eb397 --- /dev/null +++ b/numpy/core/src/common/half_private.hpp @@ -0,0 +1,330 @@ +#ifndef NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP +#define NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP + +#include "npstd.hpp" +#include "float_status.hpp" + +/* + * The following functions that emulating float/double/half conversions + * are copied from npymath without any changes to its functionalty. + */ +namespace np { namespace half_private { + +template<bool gen_overflow=true, bool gen_underflow=true, bool round_even=true> +inline uint16_t FromFloatBits(uint32_t f) +{ + uint32_t f_exp, f_sig; + uint16_t h_sgn, h_exp, h_sig; + + h_sgn = (uint16_t) ((f&0x80000000u) >> 16); + f_exp = (f&0x7f800000u); + + /* Exponent overflow/NaN converts to signed inf/NaN */ + if (f_exp >= 0x47800000u) { + if (f_exp == 0x7f800000u) { + /* Inf or NaN */ + f_sig = (f&0x007fffffu); + if (f_sig != 0) { + /* NaN - propagate the flag in the significand... */ + uint16_t ret = (uint16_t) (0x7c00u + (f_sig >> 13)); + /* ...but make sure it stays a NaN */ + if (ret == 0x7c00u) { + ret++; + } + return h_sgn + ret; + } else { + /* signed inf */ + return (uint16_t) (h_sgn + 0x7c00u); + } + } else { + if constexpr (gen_overflow) { + /* overflow to signed inf */ + FloatStatus::RaiseOverflow(); + } + return (uint16_t) (h_sgn + 0x7c00u); + } + } + + /* Exponent underflow converts to a subnormal half or signed zero */ + if (f_exp <= 0x38000000u) { + /* + * Signed zeros, subnormal floats, and floats with small + * exponents all convert to signed zero half-floats. + */ + if (f_exp < 0x33000000u) { + if constexpr (gen_underflow) { + /* If f != 0, it underflowed to 0 */ + if ((f&0x7fffffff) != 0) { + FloatStatus::RaiseUnderflow(); + } + } + return h_sgn; + } + /* Make the subnormal significand */ + f_exp >>= 23; + f_sig = (0x00800000u + (f&0x007fffffu)); + if constexpr (gen_underflow) { + /* If it's not exactly represented, it underflowed */ + if ((f_sig&(((uint32_t)1 << (126 - f_exp)) - 1)) != 0) { + FloatStatus::RaiseUnderflow(); + } + } + /* + * Usually the significand is shifted by 13. For subnormals an + * additional shift needs to occur. This shift is one for the largest + * exponent giving a subnormal `f_exp = 0x38000000 >> 23 = 112`, which + * offsets the new first bit. At most the shift can be 1+10 bits. + */ + f_sig >>= (113 - f_exp); + /* Handle rounding by adding 1 to the bit beyond half precision */ + if constexpr (round_even) { + /* + * If the last bit in the half significand is 0 (already even), and + * the remaining bit pattern is 1000...0, then we do not add one + * to the bit after the half significand. However, the (113 - f_exp) + * shift can lose up to 11 bits, so the || checks them in the original. + * In all other cases, we can just add one. + */ + if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) { + f_sig += 0x00001000u; + } + } + else { + f_sig += 0x00001000u; + } + h_sig = (uint16_t) (f_sig >> 13); + /* + * If the rounding causes a bit to spill into h_exp, it will + * increment h_exp from zero to one and h_sig will be zero. + * This is the correct result. + */ + return (uint16_t) (h_sgn + h_sig); + } + + /* Regular case with no overflow or underflow */ + h_exp = (uint16_t) ((f_exp - 0x38000000u) >> 13); + /* Handle rounding by adding 1 to the bit beyond half precision */ + f_sig = (f&0x007fffffu); + if constexpr (round_even) { + /* + * If the last bit in the half significand is 0 (already even), and + * the remaining bit pattern is 1000...0, then we do not add one + * to the bit after the half significand. In all other cases, we do. + */ + if ((f_sig&0x00003fffu) != 0x00001000u) { + f_sig += 0x00001000u; + } + } + else { + f_sig += 0x00001000u; + } + h_sig = (uint16_t) (f_sig >> 13); + /* + * If the rounding causes a bit to spill into h_exp, it will + * increment h_exp by one and h_sig will be zero. This is the + * correct result. h_exp may increment to 15, at greatest, in + * which case the result overflows to a signed inf. + */ + if constexpr (gen_overflow) { + h_sig += h_exp; + if (h_sig == 0x7c00u) { + FloatStatus::RaiseOverflow(); + } + return h_sgn + h_sig; + } + else { + return h_sgn + h_exp + h_sig; + } +} + +template<bool gen_overflow=true, bool gen_underflow=true, bool round_even=true> +inline uint16_t FromDoubleBits(uint64_t d) +{ + uint64_t d_exp, d_sig; + uint16_t h_sgn, h_exp, h_sig; + + h_sgn = (d&0x8000000000000000ULL) >> 48; + d_exp = (d&0x7ff0000000000000ULL); + + /* Exponent overflow/NaN converts to signed inf/NaN */ + if (d_exp >= 0x40f0000000000000ULL) { + if (d_exp == 0x7ff0000000000000ULL) { + /* Inf or NaN */ + d_sig = (d&0x000fffffffffffffULL); + if (d_sig != 0) { + /* NaN - propagate the flag in the significand... */ + uint16_t ret = (uint16_t) (0x7c00u + (d_sig >> 42)); + /* ...but make sure it stays a NaN */ + if (ret == 0x7c00u) { + ret++; + } + return h_sgn + ret; + } else { + /* signed inf */ + return h_sgn + 0x7c00u; + } + } else { + /* overflow to signed inf */ + if constexpr (gen_overflow) { + FloatStatus::RaiseOverflow(); + } + return h_sgn + 0x7c00u; + } + } + + /* Exponent underflow converts to subnormal half or signed zero */ + if (d_exp <= 0x3f00000000000000ULL) { + /* + * Signed zeros, subnormal floats, and floats with small + * exponents all convert to signed zero half-floats. + */ + if (d_exp < 0x3e60000000000000ULL) { + if constexpr (gen_underflow) { + /* If d != 0, it underflowed to 0 */ + if ((d&0x7fffffffffffffffULL) != 0) { + FloatStatus::RaiseUnderflow(); + } + } + return h_sgn; + } + /* Make the subnormal significand */ + d_exp >>= 52; + d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL)); + if constexpr (gen_underflow) { + /* If it's not exactly represented, it underflowed */ + if ((d_sig&(((uint64_t)1 << (1051 - d_exp)) - 1)) != 0) { + FloatStatus::RaiseUnderflow(); + } + } + /* + * Unlike floats, doubles have enough room to shift left to align + * the subnormal significand leading to no loss of the last bits. + * The smallest possible exponent giving a subnormal is: + * `d_exp = 0x3e60000000000000 >> 52 = 998`. All larger subnormals are + * shifted with respect to it. This adds a shift of 10+1 bits the final + * right shift when comparing it to the one in the normal branch. + */ + assert(d_exp - 998 >= 0); + d_sig <<= (d_exp - 998); + /* Handle rounding by adding 1 to the bit beyond half precision */ + if constexpr (round_even) { + /* + * If the last bit in the half significand is 0 (already even), and + * the remaining bit pattern is 1000...0, then we do not add one + * to the bit after the half significand. In all other cases, we do. + */ + if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) { + d_sig += 0x0010000000000000ULL; + } + } + else { + d_sig += 0x0010000000000000ULL; + } + h_sig = (uint16_t) (d_sig >> 53); + /* + * If the rounding causes a bit to spill into h_exp, it will + * increment h_exp from zero to one and h_sig will be zero. + * This is the correct result. + */ + return h_sgn + h_sig; + } + + /* Regular case with no overflow or underflow */ + h_exp = (uint16_t) ((d_exp - 0x3f00000000000000ULL) >> 42); + /* Handle rounding by adding 1 to the bit beyond half precision */ + d_sig = (d&0x000fffffffffffffULL); + if constexpr (round_even) { + /* + * If the last bit in the half significand is 0 (already even), and + * the remaining bit pattern is 1000...0, then we do not add one + * to the bit after the half significand. In all other cases, we do. + */ + if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) { + d_sig += 0x0000020000000000ULL; + } + } + else { + d_sig += 0x0000020000000000ULL; + } + h_sig = (uint16_t) (d_sig >> 42); + + /* + * If the rounding causes a bit to spill into h_exp, it will + * increment h_exp by one and h_sig will be zero. This is the + * correct result. h_exp may increment to 15, at greatest, in + * which case the result overflows to a signed inf. + */ + if constexpr (gen_overflow) { + h_sig += h_exp; + if (h_sig == 0x7c00u) { + FloatStatus::RaiseOverflow(); + } + return h_sgn + h_sig; + } + else { + return h_sgn + h_exp + h_sig; + } +} + +constexpr uint32_t ToFloatBits(uint16_t h) +{ + uint16_t h_exp = (h&0x7c00u); + uint32_t f_sgn = ((uint32_t)h&0x8000u) << 16; + switch (h_exp) { + case 0x0000u: { // 0 or subnormal + uint16_t h_sig = (h&0x03ffu); + // Signed zero + if (h_sig == 0) { + return f_sgn; + } + // Subnormal + h_sig <<= 1; + while ((h_sig&0x0400u) == 0) { + h_sig <<= 1; + h_exp++; + } + uint32_t f_exp = ((uint32_t)(127 - 15 - h_exp)) << 23; + uint32_t f_sig = ((uint32_t)(h_sig&0x03ffu)) << 13; + return f_sgn + f_exp + f_sig; + } + case 0x7c00u: // inf or NaN + // All-ones exponent and a copy of the significand + return f_sgn + 0x7f800000u + (((uint32_t)(h&0x03ffu)) << 13); + default: // normalized + // Just need to adjust the exponent and shift + return f_sgn + (((uint32_t)(h&0x7fffu) + 0x1c000u) << 13); + } +} + +constexpr uint64_t ToDoubleBits(uint16_t h) +{ + uint16_t h_exp = (h&0x7c00u); + uint64_t d_sgn = ((uint64_t)h&0x8000u) << 48; + switch (h_exp) { + case 0x0000u: { // 0 or subnormal + uint16_t h_sig = (h&0x03ffu); + // Signed zero + if (h_sig == 0) { + return d_sgn; + } + // Subnormal + h_sig <<= 1; + while ((h_sig&0x0400u) == 0) { + h_sig <<= 1; + h_exp++; + } + uint64_t d_exp = ((uint64_t)(1023 - 15 - h_exp)) << 52; + uint64_t d_sig = ((uint64_t)(h_sig&0x03ffu)) << 42; + return d_sgn + d_exp + d_sig; + } + case 0x7c00u: // inf or NaN + // All-ones exponent and a copy of the significand + return d_sgn + 0x7ff0000000000000ULL + (((uint64_t)(h&0x03ffu)) << 42); + default: // normalized + // Just need to adjust the exponent and shift + return d_sgn + (((uint64_t)(h&0x7fffu) + 0xfc000u) << 42); + } +} + +}} // namespace np::half_private +#endif // NUMPY_CORE_SRC_COMMON_HALF_PRIVATE_HPP diff --git a/numpy/core/src/common/npdef.hpp b/numpy/core/src/common/npdef.hpp new file mode 100644 index 000000000..56a0df52e --- /dev/null +++ b/numpy/core/src/common/npdef.hpp @@ -0,0 +1,28 @@ +#ifndef NUMPY_CORE_SRC_COMMON_NPDEF_HPP +#define NUMPY_CORE_SRC_COMMON_NPDEF_HPP + +#if !defined(__cplusplus) || __cplusplus < 201703L + #error "NumPy requires a compiler with at least C++17 enabled" +#endif + +/// @addtogroup cpp_core_defs +/// @{ + +/// Whether compiler supports C++20 +#if __cplusplus > 202002L + #define NP_HAS_CPP20 1 +#else + #define NP_HAS_CPP20 0 +#endif + +/// Wraps `__has_builtin` +#if defined(__has_builtin) + #define NP_HAS_BUILTIN(INTRIN) __has_builtin(INTRIN) +#else + #define NP_HAS_BUILTIN(INTRIN) 0 +#endif + +/// @} cpp_core_defs + +#endif // NUMPY_CORE_SRC_COMMON_NPDEF_HPP + diff --git a/numpy/core/src/common/npstd.hpp b/numpy/core/src/common/npstd.hpp index 71993bd7c..ca664229a 100644 --- a/numpy/core/src/common/npstd.hpp +++ b/numpy/core/src/common/npstd.hpp @@ -31,6 +31,8 @@ using std::int64_t; using std::uintptr_t; using std::intptr_t; using std::complex; +using std::uint_fast16_t; +using std::uint_fast32_t; /** Guard for long double. * diff --git a/numpy/core/src/common/utils.hpp b/numpy/core/src/common/utils.hpp new file mode 100644 index 000000000..f847cab44 --- /dev/null +++ b/numpy/core/src/common/utils.hpp @@ -0,0 +1,51 @@ +#ifndef NUMPY_CORE_SRC_COMMON_UTILS_HPP +#define NUMPY_CORE_SRC_COMMON_UTILS_HPP + +#include "npdef.hpp" + +#if NP_HAS_CPP20 + #include <bit> +#endif + +#include <type_traits> +#include <string.h> + +namespace np { + +/** Create a value of type `To` from the bits of `from`. + * + * similar to `std::bit_cast` but compatible with C++17, + * should perform similar to `*reinterpret_cast<To*>(&from)` + * or through punning without expecting any undefined behaviors. + */ +template<typename To, typename From> +#if NP_HAS_BUILTIN(__builtin_bit_cast) || NP_HAS_CPP20 +[[nodiscard]] constexpr +#else +inline +#endif +To BitCast(const From &from) noexcept +{ + static_assert( + sizeof(To) == sizeof(From), + "both data types must have the same size"); + + static_assert( + std::is_trivially_copyable_v<To> && + std::is_trivially_copyable_v<From>, + "both data types must be trivially copyable"); + +#if NP_HAS_CPP20 + return std::bit_cast<To>(from); +#elif NP_HAS_BUILTIN(__builtin_bit_cast) + return __builtin_bit_cast(To, from); +#else + To to; + memcpy(&to, &from, sizeof(from)); + return to; +#endif +} + +} // namespace np +#endif // NUMPY_CORE_SRC_COMMON_UTILS_HPP + diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c index d55a5752b..ba9118052 100644 --- a/numpy/core/src/multiarray/array_coercion.c +++ b/numpy/core/src/multiarray/array_coercion.c @@ -1025,53 +1025,6 @@ PyArray_DiscoverDTypeAndShape_Recursive( Py_DECREF(arr); arr = NULL; } - else if (curr_dims > 0 && curr_dims != max_dims) { - /* - * Deprecated 2020-12-09, NumPy 1.20 - * - * See https://github.com/numpy/numpy/issues/17965 - * Shapely had objects which are not sequences but did export - * the array-interface (and so are arguably array-like). - * Previously numpy would not use array-like information during - * shape discovery, so that it ended up acting as if this was - * an (unknown) scalar but with the specified dtype. - * Thus we ignore "scalars" here, as the value stored in the - * array should be acceptable. - */ - if (PyArray_NDIM(arr) > 0 && NPY_UNLIKELY(!PySequence_Check(obj))) { - if (PyErr_WarnFormat(PyExc_FutureWarning, 1, - "The input object of type '%s' is an array-like " - "implementing one of the corresponding protocols " - "(`__array__`, `__array_interface__` or " - "`__array_struct__`); but not a sequence (or 0-D). " - "In the future, this object will be coerced as if it " - "was first converted using `np.array(obj)`. " - "To retain the old behaviour, you have to either " - "modify the type '%s', or assign to an empty array " - "created with `np.empty(correct_shape, dtype=object)`.", - Py_TYPE(obj)->tp_name, Py_TYPE(obj)->tp_name) < 0) { - Py_DECREF(arr); - return -1; - } - /* - * Strangely enough, even though we threw away the result here, - * we did use it during descriptor discovery, so promote it: - */ - if (update_shape(curr_dims, &max_dims, out_shape, - 0, NULL, NPY_FALSE, flags) < 0) { - *flags |= FOUND_RAGGED_ARRAY; - Py_DECREF(arr); - return max_dims; - } - if (!(*flags & DESCRIPTOR_WAS_SET) && handle_promotion( - out_descr, PyArray_DESCR(arr), fixed_DType, flags) < 0) { - Py_DECREF(arr); - return -1; - } - Py_DECREF(arr); - return max_dims; - } - } } if (arr != NULL) { /* diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index da8d23a26..573d0d606 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -128,24 +128,6 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype) } -NPY_NO_EXPORT int -_zerofill(PyArrayObject *ret) -{ - if (PyDataType_REFCHK(PyArray_DESCR(ret))) { - PyObject *zero = PyLong_FromLong(0); - PyArray_FillObjectArray(ret, zero); - Py_DECREF(zero); - if (PyErr_Occurred()) { - return -1; - } - } - else { - npy_intp n = PyArray_NBYTES(ret); - memset(PyArray_DATA(ret), 0, n); - } - return 0; -} - NPY_NO_EXPORT npy_bool _IsWriteable(PyArrayObject *ap) { @@ -459,3 +441,32 @@ new_array_for_sum(PyArrayObject *ap1, PyArrayObject *ap2, PyArrayObject* out, } } +NPY_NO_EXPORT int +check_is_convertible_to_scalar(PyArrayObject *v) +{ + if (PyArray_NDIM(v) == 0) { + return 0; + } + + /* Remove this if-else block when the deprecation expires */ + if (PyArray_SIZE(v) == 1) { + /* Numpy 1.25.0, 2023-01-02 */ + if (DEPRECATE( + "Conversion of an array with ndim > 0 to a scalar " + "is deprecated, and will error in future. " + "Ensure you extract a single element from your array " + "before performing this operation. " + "(Deprecated NumPy 1.25.)") < 0) { + return -1; + } + return 0; + } else { + PyErr_SetString(PyExc_TypeError, + "only length-1 arrays can be converted to Python scalars"); + return -1; + } + + PyErr_SetString(PyExc_TypeError, + "only 0-dimensional arrays can be converted to Python scalars"); + return -1; +} diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index 4e067b22c..cb9fadc4e 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -55,9 +55,6 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, NPY_NO_EXPORT PyArray_Descr * _array_find_python_scalar_type(PyObject *op); -NPY_NO_EXPORT int -_zerofill(PyArrayObject *ret); - NPY_NO_EXPORT npy_bool _IsWriteable(PyArrayObject *ap); @@ -335,6 +332,13 @@ PyArray_TupleFromItems(int n, PyObject *const *items, int make_null_none) return tuple; } +/* + * Returns 0 if the array has rank 0, -1 otherwise. Prints a deprecation + * warning for arrays of _size_ 1. + */ +NPY_NO_EXPORT int +check_is_convertible_to_scalar(PyArrayObject *v); + #include "ucsnarrow.h" diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index 53db5c577..de1cd075d 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -3913,21 +3913,6 @@ PyArray_InitializeObjectToObjectCast(void) } -static int -PyArray_SetClearFunctions(void) -{ - PyArray_DTypeMeta *Object = PyArray_DTypeFromTypeNum(NPY_OBJECT); - NPY_DT_SLOTS(Object)->get_clear_loop = &npy_get_clear_object_strided_loop; - Py_DECREF(Object); /* use borrowed */ - - PyArray_DTypeMeta *Void = PyArray_DTypeFromTypeNum(NPY_VOID); - NPY_DT_SLOTS(Void)->get_clear_loop = &npy_get_clear_void_and_legacy_user_dtype_loop; - Py_DECREF(Void); /* use borrowed */ - return 0; -} - - - NPY_NO_EXPORT int PyArray_InitializeCasts() { @@ -3947,8 +3932,5 @@ PyArray_InitializeCasts() if (PyArray_InitializeDatetimeCasts() < 0) { return -1; } - if (PyArray_SetClearFunctions() < 0) { - return -1; - } return 0; } diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index a6335c783..79a1905a7 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -715,6 +715,11 @@ PyArray_NewFromDescr_int( fa->base = (PyObject *)NULL; fa->weakreflist = (PyObject *)NULL; + /* needed for zero-filling logic below, defined and initialized up here + so cleanup logic can go in the fail block */ + NPY_traverse_info fill_zero_info; + NPY_traverse_info_init(&fill_zero_info); + if (nd > 0) { fa->dimensions = npy_alloc_cache_dim(2 * nd); if (fa->dimensions == NULL) { @@ -784,6 +789,31 @@ PyArray_NewFromDescr_int( if (data == NULL) { + /* float errors do not matter and we do not release GIL */ + NPY_ARRAYMETHOD_FLAGS zero_flags; + get_traverse_loop_function *get_fill_zero_loop = + NPY_DT_SLOTS(NPY_DTYPE(descr))->get_fill_zero_loop; + if (get_fill_zero_loop != NULL) { + if (get_fill_zero_loop( + NULL, descr, 1, descr->elsize, &(fill_zero_info.func), + &(fill_zero_info.auxdata), &zero_flags) < 0) { + goto fail; + } + } + + /* + * We always want a zero-filled array allocated with calloc if + * NPY_NEEDS_INIT is set on the dtype, for safety. We also want a + * zero-filled array if zeroed is set and the zero-filling loop isn't + * defined, for better performance. + * + * If the zero-filling loop is defined and zeroed is set, allocate + * with malloc and let the zero-filling loop fill the array buffer + * with valid zero values for the dtype. + */ + int use_calloc = (PyDataType_FLAGCHK(descr, NPY_NEEDS_INIT) || + (zeroed && (fill_zero_info.func == NULL))); + /* Store the handler in case the default is modified */ fa->mem_handler = PyDataMem_GetHandler(); if (fa->mem_handler == NULL) { @@ -801,11 +831,8 @@ PyArray_NewFromDescr_int( fa->strides[i] = 0; } } - /* - * It is bad to have uninitialized OBJECT pointers - * which could also be sub-fields of a VOID array - */ - if (zeroed || PyDataType_FLAGCHK(descr, NPY_NEEDS_INIT)) { + + if (use_calloc) { data = PyDataMem_UserNEW_ZEROED(nbytes, 1, fa->mem_handler); } else { @@ -816,6 +843,18 @@ PyArray_NewFromDescr_int( goto fail; } + /* + * If the array needs special dtype-specific zero-filling logic, do that + */ + if (NPY_UNLIKELY(zeroed && (fill_zero_info.func != NULL))) { + npy_intp size = PyArray_MultiplyList(fa->dimensions, fa->nd); + if (fill_zero_info.func( + NULL, descr, data, size, descr->elsize, + fill_zero_info.auxdata) < 0) { + goto fail; + } + } + fa->flags |= NPY_ARRAY_OWNDATA; } else { @@ -910,9 +949,11 @@ PyArray_NewFromDescr_int( } } } + NPY_traverse_info_xfree(&fill_zero_info); return (PyObject *)fa; fail: + NPY_traverse_info_xfree(&fill_zero_info); Py_XDECREF(fa->mem_handler); Py_DECREF(fa); return NULL; @@ -3017,17 +3058,7 @@ PyArray_Zeros(int nd, npy_intp const *dims, PyArray_Descr *type, int is_f_order) return NULL; } - /* handle objects */ - if (PyDataType_REFCHK(PyArray_DESCR(ret))) { - if (_zerofill(ret) < 0) { - Py_DECREF(ret); - return NULL; - } - } - - return (PyObject *)ret; - } /*NUMPY_API diff --git a/numpy/core/src/multiarray/dtype_traversal.c b/numpy/core/src/multiarray/dtype_traversal.c index cefa7d6e1..769c2e015 100644 --- a/numpy/core/src/multiarray/dtype_traversal.c +++ b/numpy/core/src/multiarray/dtype_traversal.c @@ -24,7 +24,7 @@ #include "alloc.h" #include "array_method.h" #include "dtypemeta.h" - +#include "refcount.h" #include "dtype_traversal.h" @@ -124,6 +124,39 @@ npy_get_clear_object_strided_loop( } +/**************** Python Object zero fill *********************/ + +static int +fill_zero_object_strided_loop( + void *NPY_UNUSED(traverse_context), PyArray_Descr *NPY_UNUSED(descr), + char *data, npy_intp size, npy_intp stride, + NpyAuxData *NPY_UNUSED(auxdata)) +{ + PyObject *zero = PyLong_FromLong(0); + while (size--) { + Py_INCREF(zero); + // assumes `data` doesn't have a pre-existing object inside it + memcpy(data, &zero, sizeof(zero)); + data += stride; + } + Py_DECREF(zero); + return 0; +} + +NPY_NO_EXPORT int +npy_object_get_fill_zero_loop(void *NPY_UNUSED(traverse_context), + PyArray_Descr *NPY_UNUSED(descr), + int NPY_UNUSED(aligned), + npy_intp NPY_UNUSED(fixed_stride), + traverse_loop_function **out_loop, + NpyAuxData **NPY_UNUSED(out_auxdata), + NPY_ARRAYMETHOD_FLAGS *flags) +{ + *flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_NO_FLOATINGPOINT_ERRORS; + *out_loop = &fill_zero_object_strided_loop; + return 0; +} + /**************** Structured DType clear funcationality ***************/ /* @@ -408,7 +441,6 @@ clear_no_op( return 0; } - NPY_NO_EXPORT int npy_get_clear_void_and_legacy_user_dtype_loop( void *traverse_context, PyArray_Descr *dtype, int aligned, @@ -472,3 +504,41 @@ npy_get_clear_void_and_legacy_user_dtype_loop( dtype); return -1; } + +/**************** Structured DType zero fill ***************/ + +static int +fill_zero_void_with_objects_strided_loop( + void *NPY_UNUSED(traverse_context), PyArray_Descr *descr, + char *data, npy_intp size, npy_intp stride, + NpyAuxData *NPY_UNUSED(auxdata)) +{ + PyObject *zero = PyLong_FromLong(0); + while (size--) { + _fillobject(data, zero, descr); + data += stride; + } + Py_DECREF(zero); + return 0; +} + + +NPY_NO_EXPORT int +npy_void_get_fill_zero_loop(void *NPY_UNUSED(traverse_context), + PyArray_Descr *descr, + int NPY_UNUSED(aligned), + npy_intp NPY_UNUSED(fixed_stride), + traverse_loop_function **out_loop, + NpyAuxData **NPY_UNUSED(out_auxdata), + NPY_ARRAYMETHOD_FLAGS *flags) +{ + *flags = NPY_METH_NO_FLOATINGPOINT_ERRORS; + if (PyDataType_REFCHK(descr)) { + *flags |= NPY_METH_REQUIRES_PYAPI; + *out_loop = &fill_zero_void_with_objects_strided_loop; + } + else { + *out_loop = NULL; + } + return 0; +} diff --git a/numpy/core/src/multiarray/dtype_traversal.h b/numpy/core/src/multiarray/dtype_traversal.h index fd060a0f0..a9c185382 100644 --- a/numpy/core/src/multiarray/dtype_traversal.h +++ b/numpy/core/src/multiarray/dtype_traversal.h @@ -19,6 +19,22 @@ npy_get_clear_void_and_legacy_user_dtype_loop( traverse_loop_function **out_loop, NpyAuxData **out_traversedata, NPY_ARRAYMETHOD_FLAGS *flags); +/* NumPy DType zero-filling implementations */ + +NPY_NO_EXPORT int +npy_object_get_fill_zero_loop( + void *NPY_UNUSED(traverse_context), PyArray_Descr *NPY_UNUSED(descr), + int NPY_UNUSED(aligned), npy_intp NPY_UNUSED(fixed_stride), + traverse_loop_function **out_loop, NpyAuxData **NPY_UNUSED(out_auxdata), + NPY_ARRAYMETHOD_FLAGS *flags); + +NPY_NO_EXPORT int +npy_void_get_fill_zero_loop( + void *NPY_UNUSED(traverse_context), PyArray_Descr *descr, + int NPY_UNUSED(aligned), npy_intp NPY_UNUSED(fixed_stride), + traverse_loop_function **out_loop, NpyAuxData **NPY_UNUSED(out_auxdata), + NPY_ARRAYMETHOD_FLAGS *flags); + /* Helper to deal with calling or nesting simple strided loops */ @@ -34,6 +50,7 @@ NPY_traverse_info_init(NPY_traverse_info *cast_info) { cast_info->func = NULL; /* mark as uninitialized. */ cast_info->auxdata = NULL; /* allow leaving auxdata untouched */ + cast_info->descr = NULL; /* mark as uninitialized. */ } @@ -45,7 +62,7 @@ NPY_traverse_info_xfree(NPY_traverse_info *traverse_info) } traverse_info->func = NULL; NPY_AUXDATA_FREE(traverse_info->auxdata); - Py_DECREF(traverse_info->descr); + Py_XDECREF(traverse_info->descr); } @@ -79,4 +96,4 @@ PyArray_GetClearFunction( NPY_traverse_info *clear_info, NPY_ARRAYMETHOD_FLAGS *flags); -#endif /* NUMPY_CORE_SRC_MULTIARRAY_DTYPE_TRAVERSAL_H_ */
\ No newline at end of file +#endif /* NUMPY_CORE_SRC_MULTIARRAY_DTYPE_TRAVERSAL_H_ */ diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index f268ba2cb..f8c1b6617 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -20,6 +20,8 @@ #include "usertypes.h" #include "conversion_utils.h" #include "templ_common.h" +#include "refcount.h" +#include "dtype_traversal.h" #include <assert.h> @@ -524,6 +526,7 @@ void_common_instance(PyArray_Descr *descr1, PyArray_Descr *descr2) return NULL; } + NPY_NO_EXPORT int python_builtins_are_known_scalar_types( PyArray_DTypeMeta *NPY_UNUSED(cls), PyTypeObject *pytype) @@ -855,6 +858,7 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) dt_slots->common_dtype = default_builtin_common_dtype; dt_slots->common_instance = NULL; dt_slots->ensure_canonical = ensure_native_byteorder; + dt_slots->get_fill_zero_loop = NULL; if (PyTypeNum_ISSIGNED(dtype_class->type_num)) { /* Convert our scalars (raise on too large unsigned and NaN, etc.) */ @@ -866,6 +870,8 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) } else if (descr->type_num == NPY_OBJECT) { dt_slots->common_dtype = object_common_dtype; + dt_slots->get_fill_zero_loop = npy_object_get_fill_zero_loop; + dt_slots->get_clear_loop = npy_get_clear_object_strided_loop; } else if (PyTypeNum_ISDATETIME(descr->type_num)) { /* Datetimes are flexible, but were not considered previously */ @@ -887,6 +893,9 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) void_discover_descr_from_pyobject); dt_slots->common_instance = void_common_instance; dt_slots->ensure_canonical = void_ensure_canonical; + dt_slots->get_fill_zero_loop = npy_void_get_fill_zero_loop; + dt_slots->get_clear_loop = + npy_get_clear_void_and_legacy_user_dtype_loop; } else { dt_slots->default_descr = string_and_unicode_default_descr; diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h index 3b4dbad24..6dbfd1549 100644 --- a/numpy/core/src/multiarray/dtypemeta.h +++ b/numpy/core/src/multiarray/dtypemeta.h @@ -42,6 +42,23 @@ typedef struct { */ get_traverse_loop_function *get_clear_loop; /* + Either NULL or a function that sets a function pointer to a traversal + loop that fills an array with zero values appropriate for the dtype. If + get_fill_zero_loop is undefined or the function pointer set by it is + NULL, the array buffer is allocated with calloc. If this function is + defined and it sets a non-NULL function pointer, the array buffer is + allocated with malloc and the zero-filling loop function pointer is + called to fill the buffer. For the best performance, avoid using this + function if a zero-filled array buffer allocated with calloc makes sense + for the dtype. + + Note that this is currently used only for zero-filling a newly allocated + array. While it can be used to zero-fill an already-filled buffer, that + will not work correctly for arrays holding references. If you need to do + that, clear the array first. + */ + get_traverse_loop_function *get_fill_zero_loop; + /* * The casting implementation (ArrayMethod) to convert between two * instances of this DType, stored explicitly for fast access: */ @@ -63,7 +80,7 @@ typedef struct { // This must be updated if new slots before within_dtype_castingimpl // are added -#define NPY_NUM_DTYPE_SLOTS 9 +#define NPY_NUM_DTYPE_SLOTS 10 #define NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS 22 #define NPY_DT_MAX_ARRFUNCS_SLOT \ NPY_NUM_DTYPE_PYARRAY_ARRFUNCS_SLOTS + _NPY_DT_ARRFUNCS_OFFSET diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c index ab35548ed..d019acbb5 100644 --- a/numpy/core/src/multiarray/getset.c +++ b/numpy/core/src/multiarray/getset.c @@ -797,20 +797,17 @@ array_imag_get(PyArrayObject *self, void *NPY_UNUSED(ignored)) } else { Py_INCREF(PyArray_DESCR(self)); - ret = (PyArrayObject *)PyArray_NewFromDescr(Py_TYPE(self), - PyArray_DESCR(self), - PyArray_NDIM(self), - PyArray_DIMS(self), - NULL, NULL, - PyArray_ISFORTRAN(self), - (PyObject *)self); + ret = (PyArrayObject *)PyArray_NewFromDescr_int( + Py_TYPE(self), + PyArray_DESCR(self), + PyArray_NDIM(self), + PyArray_DIMS(self), + NULL, NULL, + PyArray_ISFORTRAN(self), + (PyObject *)self, NULL, 1, 0); if (ret == NULL) { return NULL; } - if (_zerofill(ret) < 0) { - Py_DECREF(ret); - return NULL; - } PyArray_CLEARFLAGS(ret, NPY_ARRAY_WRITEABLE); } return (PyObject *) ret; diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 93b290020..3b2e72820 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -2804,9 +2804,7 @@ array_complex(PyArrayObject *self, PyObject *NPY_UNUSED(args)) PyArray_Descr *dtype; PyObject *c; - if (PyArray_SIZE(self) != 1) { - PyErr_SetString(PyExc_TypeError, - "only length-1 arrays can be converted to Python scalars"); + if (check_is_convertible_to_scalar(self) < 0) { return NULL; } diff --git a/numpy/core/src/multiarray/number.c b/numpy/core/src/multiarray/number.c index c208fb203..35e4af79c 100644 --- a/numpy/core/src/multiarray/number.c +++ b/numpy/core/src/multiarray/number.c @@ -869,13 +869,11 @@ array_scalar_forward(PyArrayObject *v, PyObject *(*builtin_func)(PyObject *), const char *where) { - PyObject *scalar; - if (PyArray_SIZE(v) != 1) { - PyErr_SetString(PyExc_TypeError, "only size-1 arrays can be"\ - " converted to Python scalars"); + if (check_is_convertible_to_scalar(v) < 0) { return NULL; } + PyObject *scalar; scalar = PyArray_GETITEM(v, PyArray_DATA(v)); if (scalar == NULL) { return NULL; diff --git a/numpy/core/src/multiarray/refcount.c b/numpy/core/src/multiarray/refcount.c index 20527f7af..d200957c3 100644 --- a/numpy/core/src/multiarray/refcount.c +++ b/numpy/core/src/multiarray/refcount.c @@ -17,15 +17,12 @@ #include "numpy/arrayscalars.h" #include "iterators.h" #include "dtypemeta.h" +#include "refcount.h" #include "npy_config.h" #include "npy_pycompat.h" -static void -_fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype); - - /* * Helper function to clear a strided memory (normally or always contiguous) * from all Python (or other) references. The function does nothing if the @@ -395,7 +392,7 @@ PyArray_FillObjectArray(PyArrayObject *arr, PyObject *obj) } } -static void +NPY_NO_EXPORT void _fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype) { if (!PyDataType_FLAGCHK(dtype, NPY_ITEM_REFCOUNT)) { diff --git a/numpy/core/src/multiarray/refcount.h b/numpy/core/src/multiarray/refcount.h index 16d34e292..7f39b9ca4 100644 --- a/numpy/core/src/multiarray/refcount.h +++ b/numpy/core/src/multiarray/refcount.h @@ -24,4 +24,7 @@ PyArray_XDECREF(PyArrayObject *mp); NPY_NO_EXPORT void PyArray_FillObjectArray(PyArrayObject *arr, PyObject *obj); +NPY_NO_EXPORT void +_fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype); + #endif /* NUMPY_CORE_SRC_MULTIARRAY_REFCOUNT_H_ */ diff --git a/numpy/core/src/npymath/halffloat.c b/numpy/core/src/npymath/halffloat.c deleted file mode 100644 index 51948c736..000000000 --- a/numpy/core/src/npymath/halffloat.c +++ /dev/null @@ -1,555 +0,0 @@ -#define NPY_NO_DEPRECATED_API NPY_API_VERSION - -#include "numpy/halffloat.h" - -/* - * This chooses between 'ties to even' and 'ties away from zero'. - */ -#define NPY_HALF_ROUND_TIES_TO_EVEN 1 -/* - * If these are 1, the conversions try to trigger underflow, - * overflow, and invalid exceptions in the FP system when needed. - */ -#define NPY_HALF_GENERATE_OVERFLOW 1 -#define NPY_HALF_GENERATE_UNDERFLOW 1 -#define NPY_HALF_GENERATE_INVALID 1 - -/* - ******************************************************************** - * HALF-PRECISION ROUTINES * - ******************************************************************** - */ - -float npy_half_to_float(npy_half h) -{ - union { float ret; npy_uint32 retbits; } conv; - conv.retbits = npy_halfbits_to_floatbits(h); - return conv.ret; -} - -double npy_half_to_double(npy_half h) -{ - union { double ret; npy_uint64 retbits; } conv; - conv.retbits = npy_halfbits_to_doublebits(h); - return conv.ret; -} - -npy_half npy_float_to_half(float f) -{ - union { float f; npy_uint32 fbits; } conv; - conv.f = f; - return npy_floatbits_to_halfbits(conv.fbits); -} - -npy_half npy_double_to_half(double d) -{ - union { double d; npy_uint64 dbits; } conv; - conv.d = d; - return npy_doublebits_to_halfbits(conv.dbits); -} - -int npy_half_iszero(npy_half h) -{ - return (h&0x7fff) == 0; -} - -int npy_half_isnan(npy_half h) -{ - return ((h&0x7c00u) == 0x7c00u) && ((h&0x03ffu) != 0x0000u); -} - -int npy_half_isinf(npy_half h) -{ - return ((h&0x7fffu) == 0x7c00u); -} - -int npy_half_isfinite(npy_half h) -{ - return ((h&0x7c00u) != 0x7c00u); -} - -int npy_half_signbit(npy_half h) -{ - return (h&0x8000u) != 0; -} - -npy_half npy_half_spacing(npy_half h) -{ - npy_half ret; - npy_uint16 h_exp = h&0x7c00u; - npy_uint16 h_sig = h&0x03ffu; - if (h_exp == 0x7c00u) { -#if NPY_HALF_GENERATE_INVALID - npy_set_floatstatus_invalid(); -#endif - ret = NPY_HALF_NAN; - } else if (h == 0x7bffu) { -#if NPY_HALF_GENERATE_OVERFLOW - npy_set_floatstatus_overflow(); -#endif - ret = NPY_HALF_PINF; - } else if ((h&0x8000u) && h_sig == 0) { /* Negative boundary case */ - if (h_exp > 0x2c00u) { /* If result is normalized */ - ret = h_exp - 0x2c00u; - } else if(h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */ - ret = 1 << ((h_exp >> 10) - 2); - } else { - ret = 0x0001u; /* Smallest subnormal half */ - } - } else if (h_exp > 0x2800u) { /* If result is still normalized */ - ret = h_exp - 0x2800u; - } else if (h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */ - ret = 1 << ((h_exp >> 10) - 1); - } else { - ret = 0x0001u; - } - - return ret; -} - -npy_half npy_half_copysign(npy_half x, npy_half y) -{ - return (x&0x7fffu) | (y&0x8000u); -} - -npy_half npy_half_nextafter(npy_half x, npy_half y) -{ - npy_half ret; - - if (npy_half_isnan(x) || npy_half_isnan(y)) { - ret = NPY_HALF_NAN; - } else if (npy_half_eq_nonan(x, y)) { - ret = x; - } else if (npy_half_iszero(x)) { - ret = (y&0x8000u) + 1; /* Smallest subnormal half */ - } else if (!(x&0x8000u)) { /* x > 0 */ - if ((npy_int16)x > (npy_int16)y) { /* x > y */ - ret = x-1; - } else { - ret = x+1; - } - } else { - if (!(y&0x8000u) || (x&0x7fffu) > (y&0x7fffu)) { /* x < y */ - ret = x-1; - } else { - ret = x+1; - } - } -#if NPY_HALF_GENERATE_OVERFLOW - if (npy_half_isinf(ret) && npy_half_isfinite(x)) { - npy_set_floatstatus_overflow(); - } -#endif - - return ret; -} - -int npy_half_eq_nonan(npy_half h1, npy_half h2) -{ - return (h1 == h2 || ((h1 | h2) & 0x7fff) == 0); -} - -int npy_half_eq(npy_half h1, npy_half h2) -{ - /* - * The equality cases are as follows: - * - If either value is NaN, never equal. - * - If the values are equal, equal. - * - If the values are both signed zeros, equal. - */ - return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && - (h1 == h2 || ((h1 | h2) & 0x7fff) == 0); -} - -int npy_half_ne(npy_half h1, npy_half h2) -{ - return !npy_half_eq(h1, h2); -} - -int npy_half_lt_nonan(npy_half h1, npy_half h2) -{ - if (h1&0x8000u) { - if (h2&0x8000u) { - return (h1&0x7fffu) > (h2&0x7fffu); - } else { - /* Signed zeros are equal, have to check for it */ - return (h1 != 0x8000u) || (h2 != 0x0000u); - } - } else { - if (h2&0x8000u) { - return 0; - } else { - return (h1&0x7fffu) < (h2&0x7fffu); - } - } -} - -int npy_half_lt(npy_half h1, npy_half h2) -{ - return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && npy_half_lt_nonan(h1, h2); -} - -int npy_half_gt(npy_half h1, npy_half h2) -{ - return npy_half_lt(h2, h1); -} - -int npy_half_le_nonan(npy_half h1, npy_half h2) -{ - if (h1&0x8000u) { - if (h2&0x8000u) { - return (h1&0x7fffu) >= (h2&0x7fffu); - } else { - return 1; - } - } else { - if (h2&0x8000u) { - /* Signed zeros are equal, have to check for it */ - return (h1 == 0x0000u) && (h2 == 0x8000u); - } else { - return (h1&0x7fffu) <= (h2&0x7fffu); - } - } -} - -int npy_half_le(npy_half h1, npy_half h2) -{ - return (!npy_half_isnan(h1) && !npy_half_isnan(h2)) && npy_half_le_nonan(h1, h2); -} - -int npy_half_ge(npy_half h1, npy_half h2) -{ - return npy_half_le(h2, h1); -} - -npy_half npy_half_divmod(npy_half h1, npy_half h2, npy_half *modulus) -{ - float fh1 = npy_half_to_float(h1); - float fh2 = npy_half_to_float(h2); - float div, mod; - - div = npy_divmodf(fh1, fh2, &mod); - *modulus = npy_float_to_half(mod); - return npy_float_to_half(div); -} - - - -/* - ******************************************************************** - * BIT-LEVEL CONVERSIONS * - ******************************************************************** - */ - -npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f) -{ - npy_uint32 f_exp, f_sig; - npy_uint16 h_sgn, h_exp, h_sig; - - h_sgn = (npy_uint16) ((f&0x80000000u) >> 16); - f_exp = (f&0x7f800000u); - - /* Exponent overflow/NaN converts to signed inf/NaN */ - if (f_exp >= 0x47800000u) { - if (f_exp == 0x7f800000u) { - /* Inf or NaN */ - f_sig = (f&0x007fffffu); - if (f_sig != 0) { - /* NaN - propagate the flag in the significand... */ - npy_uint16 ret = (npy_uint16) (0x7c00u + (f_sig >> 13)); - /* ...but make sure it stays a NaN */ - if (ret == 0x7c00u) { - ret++; - } - return h_sgn + ret; - } else { - /* signed inf */ - return (npy_uint16) (h_sgn + 0x7c00u); - } - } else { - /* overflow to signed inf */ -#if NPY_HALF_GENERATE_OVERFLOW - npy_set_floatstatus_overflow(); -#endif - return (npy_uint16) (h_sgn + 0x7c00u); - } - } - - /* Exponent underflow converts to a subnormal half or signed zero */ - if (f_exp <= 0x38000000u) { - /* - * Signed zeros, subnormal floats, and floats with small - * exponents all convert to signed zero half-floats. - */ - if (f_exp < 0x33000000u) { -#if NPY_HALF_GENERATE_UNDERFLOW - /* If f != 0, it underflowed to 0 */ - if ((f&0x7fffffff) != 0) { - npy_set_floatstatus_underflow(); - } -#endif - return h_sgn; - } - /* Make the subnormal significand */ - f_exp >>= 23; - f_sig = (0x00800000u + (f&0x007fffffu)); -#if NPY_HALF_GENERATE_UNDERFLOW - /* If it's not exactly represented, it underflowed */ - if ((f_sig&(((npy_uint32)1 << (126 - f_exp)) - 1)) != 0) { - npy_set_floatstatus_underflow(); - } -#endif - /* - * Usually the significand is shifted by 13. For subnormals an - * additional shift needs to occur. This shift is one for the largest - * exponent giving a subnormal `f_exp = 0x38000000 >> 23 = 112`, which - * offsets the new first bit. At most the shift can be 1+10 bits. - */ - f_sig >>= (113 - f_exp); - /* Handle rounding by adding 1 to the bit beyond half precision */ -#if NPY_HALF_ROUND_TIES_TO_EVEN - /* - * If the last bit in the half significand is 0 (already even), and - * the remaining bit pattern is 1000...0, then we do not add one - * to the bit after the half significand. However, the (113 - f_exp) - * shift can lose up to 11 bits, so the || checks them in the original. - * In all other cases, we can just add one. - */ - if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) { - f_sig += 0x00001000u; - } -#else - f_sig += 0x00001000u; -#endif - h_sig = (npy_uint16) (f_sig >> 13); - /* - * If the rounding causes a bit to spill into h_exp, it will - * increment h_exp from zero to one and h_sig will be zero. - * This is the correct result. - */ - return (npy_uint16) (h_sgn + h_sig); - } - - /* Regular case with no overflow or underflow */ - h_exp = (npy_uint16) ((f_exp - 0x38000000u) >> 13); - /* Handle rounding by adding 1 to the bit beyond half precision */ - f_sig = (f&0x007fffffu); -#if NPY_HALF_ROUND_TIES_TO_EVEN - /* - * If the last bit in the half significand is 0 (already even), and - * the remaining bit pattern is 1000...0, then we do not add one - * to the bit after the half significand. In all other cases, we do. - */ - if ((f_sig&0x00003fffu) != 0x00001000u) { - f_sig += 0x00001000u; - } -#else - f_sig += 0x00001000u; -#endif - h_sig = (npy_uint16) (f_sig >> 13); - /* - * If the rounding causes a bit to spill into h_exp, it will - * increment h_exp by one and h_sig will be zero. This is the - * correct result. h_exp may increment to 15, at greatest, in - * which case the result overflows to a signed inf. - */ -#if NPY_HALF_GENERATE_OVERFLOW - h_sig += h_exp; - if (h_sig == 0x7c00u) { - npy_set_floatstatus_overflow(); - } - return h_sgn + h_sig; -#else - return h_sgn + h_exp + h_sig; -#endif -} - -npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d) -{ - npy_uint64 d_exp, d_sig; - npy_uint16 h_sgn, h_exp, h_sig; - - h_sgn = (d&0x8000000000000000ULL) >> 48; - d_exp = (d&0x7ff0000000000000ULL); - - /* Exponent overflow/NaN converts to signed inf/NaN */ - if (d_exp >= 0x40f0000000000000ULL) { - if (d_exp == 0x7ff0000000000000ULL) { - /* Inf or NaN */ - d_sig = (d&0x000fffffffffffffULL); - if (d_sig != 0) { - /* NaN - propagate the flag in the significand... */ - npy_uint16 ret = (npy_uint16) (0x7c00u + (d_sig >> 42)); - /* ...but make sure it stays a NaN */ - if (ret == 0x7c00u) { - ret++; - } - return h_sgn + ret; - } else { - /* signed inf */ - return h_sgn + 0x7c00u; - } - } else { - /* overflow to signed inf */ -#if NPY_HALF_GENERATE_OVERFLOW - npy_set_floatstatus_overflow(); -#endif - return h_sgn + 0x7c00u; - } - } - - /* Exponent underflow converts to subnormal half or signed zero */ - if (d_exp <= 0x3f00000000000000ULL) { - /* - * Signed zeros, subnormal floats, and floats with small - * exponents all convert to signed zero half-floats. - */ - if (d_exp < 0x3e60000000000000ULL) { -#if NPY_HALF_GENERATE_UNDERFLOW - /* If d != 0, it underflowed to 0 */ - if ((d&0x7fffffffffffffffULL) != 0) { - npy_set_floatstatus_underflow(); - } -#endif - return h_sgn; - } - /* Make the subnormal significand */ - d_exp >>= 52; - d_sig = (0x0010000000000000ULL + (d&0x000fffffffffffffULL)); -#if NPY_HALF_GENERATE_UNDERFLOW - /* If it's not exactly represented, it underflowed */ - if ((d_sig&(((npy_uint64)1 << (1051 - d_exp)) - 1)) != 0) { - npy_set_floatstatus_underflow(); - } -#endif - /* - * Unlike floats, doubles have enough room to shift left to align - * the subnormal significand leading to no loss of the last bits. - * The smallest possible exponent giving a subnormal is: - * `d_exp = 0x3e60000000000000 >> 52 = 998`. All larger subnormals are - * shifted with respect to it. This adds a shift of 10+1 bits the final - * right shift when comparing it to the one in the normal branch. - */ - assert(d_exp - 998 >= 0); - d_sig <<= (d_exp - 998); - /* Handle rounding by adding 1 to the bit beyond half precision */ -#if NPY_HALF_ROUND_TIES_TO_EVEN - /* - * If the last bit in the half significand is 0 (already even), and - * the remaining bit pattern is 1000...0, then we do not add one - * to the bit after the half significand. In all other cases, we do. - */ - if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) { - d_sig += 0x0010000000000000ULL; - } -#else - d_sig += 0x0010000000000000ULL; -#endif - h_sig = (npy_uint16) (d_sig >> 53); - /* - * If the rounding causes a bit to spill into h_exp, it will - * increment h_exp from zero to one and h_sig will be zero. - * This is the correct result. - */ - return h_sgn + h_sig; - } - - /* Regular case with no overflow or underflow */ - h_exp = (npy_uint16) ((d_exp - 0x3f00000000000000ULL) >> 42); - /* Handle rounding by adding 1 to the bit beyond half precision */ - d_sig = (d&0x000fffffffffffffULL); -#if NPY_HALF_ROUND_TIES_TO_EVEN - /* - * If the last bit in the half significand is 0 (already even), and - * the remaining bit pattern is 1000...0, then we do not add one - * to the bit after the half significand. In all other cases, we do. - */ - if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) { - d_sig += 0x0000020000000000ULL; - } -#else - d_sig += 0x0000020000000000ULL; -#endif - h_sig = (npy_uint16) (d_sig >> 42); - - /* - * If the rounding causes a bit to spill into h_exp, it will - * increment h_exp by one and h_sig will be zero. This is the - * correct result. h_exp may increment to 15, at greatest, in - * which case the result overflows to a signed inf. - */ -#if NPY_HALF_GENERATE_OVERFLOW - h_sig += h_exp; - if (h_sig == 0x7c00u) { - npy_set_floatstatus_overflow(); - } - return h_sgn + h_sig; -#else - return h_sgn + h_exp + h_sig; -#endif -} - -npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h) -{ - npy_uint16 h_exp, h_sig; - npy_uint32 f_sgn, f_exp, f_sig; - - h_exp = (h&0x7c00u); - f_sgn = ((npy_uint32)h&0x8000u) << 16; - switch (h_exp) { - case 0x0000u: /* 0 or subnormal */ - h_sig = (h&0x03ffu); - /* Signed zero */ - if (h_sig == 0) { - return f_sgn; - } - /* Subnormal */ - h_sig <<= 1; - while ((h_sig&0x0400u) == 0) { - h_sig <<= 1; - h_exp++; - } - f_exp = ((npy_uint32)(127 - 15 - h_exp)) << 23; - f_sig = ((npy_uint32)(h_sig&0x03ffu)) << 13; - return f_sgn + f_exp + f_sig; - case 0x7c00u: /* inf or NaN */ - /* All-ones exponent and a copy of the significand */ - return f_sgn + 0x7f800000u + (((npy_uint32)(h&0x03ffu)) << 13); - default: /* normalized */ - /* Just need to adjust the exponent and shift */ - return f_sgn + (((npy_uint32)(h&0x7fffu) + 0x1c000u) << 13); - } -} - -npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h) -{ - npy_uint16 h_exp, h_sig; - npy_uint64 d_sgn, d_exp, d_sig; - - h_exp = (h&0x7c00u); - d_sgn = ((npy_uint64)h&0x8000u) << 48; - switch (h_exp) { - case 0x0000u: /* 0 or subnormal */ - h_sig = (h&0x03ffu); - /* Signed zero */ - if (h_sig == 0) { - return d_sgn; - } - /* Subnormal */ - h_sig <<= 1; - while ((h_sig&0x0400u) == 0) { - h_sig <<= 1; - h_exp++; - } - d_exp = ((npy_uint64)(1023 - 15 - h_exp)) << 52; - d_sig = ((npy_uint64)(h_sig&0x03ffu)) << 42; - return d_sgn + d_exp + d_sig; - case 0x7c00u: /* inf or NaN */ - /* All-ones exponent and a copy of the significand */ - return d_sgn + 0x7ff0000000000000ULL + - (((npy_uint64)(h&0x03ffu)) << 42); - default: /* normalized */ - /* Just need to adjust the exponent and shift */ - return d_sgn + (((npy_uint64)(h&0x7fffu) + 0xfc000u) << 42); - } -} diff --git a/numpy/core/src/npymath/halffloat.cpp b/numpy/core/src/npymath/halffloat.cpp new file mode 100644 index 000000000..aa582c1b9 --- /dev/null +++ b/numpy/core/src/npymath/halffloat.cpp @@ -0,0 +1,238 @@ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +/* + * If these are 1, the conversions try to trigger underflow, + * overflow, and invalid exceptions in the FP system when needed. + */ +#define NPY_HALF_GENERATE_OVERFLOW 1 +#define NPY_HALF_GENERATE_INVALID 1 + +#include "numpy/halffloat.h" + +#include "common.hpp" +/* + ******************************************************************** + * HALF-PRECISION ROUTINES * + ******************************************************************** + */ +using namespace np; + +float npy_half_to_float(npy_half h) +{ + return static_cast<float>(Half::FromBits(h)); +} + +double npy_half_to_double(npy_half h) +{ + return static_cast<double>(Half::FromBits(h)); +} + +npy_half npy_float_to_half(float f) +{ + return Half(f).Bits(); +} + +npy_half npy_double_to_half(double d) +{ + return Half(d).Bits(); +} + +int npy_half_iszero(npy_half h) +{ + return (h&0x7fff) == 0; +} + +int npy_half_isnan(npy_half h) +{ + return Half::FromBits(h).IsNaN(); +} + +int npy_half_isinf(npy_half h) +{ + return ((h&0x7fffu) == 0x7c00u); +} + +int npy_half_isfinite(npy_half h) +{ + return ((h&0x7c00u) != 0x7c00u); +} + +int npy_half_signbit(npy_half h) +{ + return (h&0x8000u) != 0; +} + +npy_half npy_half_spacing(npy_half h) +{ + npy_half ret; + npy_uint16 h_exp = h&0x7c00u; + npy_uint16 h_sig = h&0x03ffu; + if (h_exp == 0x7c00u) { +#if NPY_HALF_GENERATE_INVALID + npy_set_floatstatus_invalid(); +#endif + ret = NPY_HALF_NAN; + } else if (h == 0x7bffu) { +#if NPY_HALF_GENERATE_OVERFLOW + npy_set_floatstatus_overflow(); +#endif + ret = NPY_HALF_PINF; + } else if ((h&0x8000u) && h_sig == 0) { /* Negative boundary case */ + if (h_exp > 0x2c00u) { /* If result is normalized */ + ret = h_exp - 0x2c00u; + } else if(h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */ + ret = 1 << ((h_exp >> 10) - 2); + } else { + ret = 0x0001u; /* Smallest subnormal half */ + } + } else if (h_exp > 0x2800u) { /* If result is still normalized */ + ret = h_exp - 0x2800u; + } else if (h_exp > 0x0400u) { /* The result is a subnormal, but not the smallest */ + ret = 1 << ((h_exp >> 10) - 1); + } else { + ret = 0x0001u; + } + + return ret; +} + +npy_half npy_half_copysign(npy_half x, npy_half y) +{ + return (x&0x7fffu) | (y&0x8000u); +} + +npy_half npy_half_nextafter(npy_half x, npy_half y) +{ + npy_half ret; + + if (npy_half_isnan(x) || npy_half_isnan(y)) { + ret = NPY_HALF_NAN; + } else if (npy_half_eq_nonan(x, y)) { + ret = x; + } else if (npy_half_iszero(x)) { + ret = (y&0x8000u) + 1; /* Smallest subnormal half */ + } else if (!(x&0x8000u)) { /* x > 0 */ + if ((npy_int16)x > (npy_int16)y) { /* x > y */ + ret = x-1; + } else { + ret = x+1; + } + } else { + if (!(y&0x8000u) || (x&0x7fffu) > (y&0x7fffu)) { /* x < y */ + ret = x-1; + } else { + ret = x+1; + } + } +#if NPY_HALF_GENERATE_OVERFLOW + if (npy_half_isinf(ret) && npy_half_isfinite(x)) { + npy_set_floatstatus_overflow(); + } +#endif + + return ret; +} + +int npy_half_eq_nonan(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1).Equal(Half::FromBits(h2)); +} + +int npy_half_eq(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1) == Half::FromBits(h2); +} + +int npy_half_ne(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1) != Half::FromBits(h2); +} + +int npy_half_lt_nonan(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1).Less(Half::FromBits(h2)); +} + +int npy_half_lt(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1) < Half::FromBits(h2); +} + +int npy_half_gt(npy_half h1, npy_half h2) +{ + return npy_half_lt(h2, h1); +} + +int npy_half_le_nonan(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1).LessEqual(Half::FromBits(h2)); +} + +int npy_half_le(npy_half h1, npy_half h2) +{ + return Half::FromBits(h1) <= Half::FromBits(h2); +} + +int npy_half_ge(npy_half h1, npy_half h2) +{ + return npy_half_le(h2, h1); +} + +npy_half npy_half_divmod(npy_half h1, npy_half h2, npy_half *modulus) +{ + float fh1 = npy_half_to_float(h1); + float fh2 = npy_half_to_float(h2); + float div, mod; + + div = npy_divmodf(fh1, fh2, &mod); + *modulus = npy_float_to_half(mod); + return npy_float_to_half(div); +} + + +/* + ******************************************************************** + * BIT-LEVEL CONVERSIONS * + ******************************************************************** + */ + +npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f) +{ + if constexpr (Half::kNativeConversion<float>) { + return BitCast<uint16_t>(Half(BitCast<float>(f))); + } + else { + return half_private::FromFloatBits(f); + } +} + +npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d) +{ + if constexpr (Half::kNativeConversion<double>) { + return BitCast<uint16_t>(Half(BitCast<double>(d))); + } + else { + return half_private::FromDoubleBits(d); + } +} + +npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h) +{ + if constexpr (Half::kNativeConversion<float>) { + return BitCast<uint32_t>(static_cast<float>(Half::FromBits(h))); + } + else { + return half_private::ToFloatBits(h); + } +} + +npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h) +{ + if constexpr (Half::kNativeConversion<double>) { + return BitCast<uint64_t>(static_cast<double>(Half::FromBits(h))); + } + else { + return half_private::ToDoubleBits(h); + } +} + diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src index e36067822..ab54c1966 100644 --- a/numpy/core/src/umath/loops.h.src +++ b/numpy/core/src/umath/loops.h.src @@ -336,15 +336,6 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void HALF_@func@, /**end repeat**/ /**begin repeat - * #func = sin, cos# - */ - -NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void DOUBLE_@func@, - (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))) - -/**end repeat**/ - -/**begin repeat * #TYPE = FLOAT, DOUBLE# */ /**begin repeat1 @@ -360,12 +351,17 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@func@, #ifndef NPY_DISABLE_OPTIMIZATION #include "loops_trigonometric.dispatch.h" #endif + /**begin repeat + * #TYPE = FLOAT, DOUBLE# + */ +/**begin repeat1 * #func = sin, cos# */ -NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void FLOAT_@func@, ( +NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@func@, ( char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func) )) +/**end repeat1**/ /**end repeat**/ #ifndef NPY_DISABLE_OPTIMIZATION diff --git a/numpy/core/src/umath/loops_trigonometric.dispatch.c.src b/numpy/core/src/umath/loops_trigonometric.dispatch.c.src index 9f9978e66..28af04f6b 100644 --- a/numpy/core/src/umath/loops_trigonometric.dispatch.c.src +++ b/numpy/core/src/umath/loops_trigonometric.dispatch.c.src @@ -9,12 +9,18 @@ #include "simd/simd.h" #include "loops_utils.h" #include "loops.h" +#include "fast_loop_macros.h" /* * TODO: * - use vectorized version of Payne-Hanek style reduction for large elements or * when there's no native FUSED support instead of fallback to libc */ -#if NPY_SIMD_F32 && NPY_SIMD_FMA3 // native support +#if NPY_SIMD_FMA3 // native support +/**begin repeat + * #check = F64, F32# + * #sfx = f64, f32# + */ +#if NPY_SIMD_@check@ /* * Vectorized Cody-Waite range reduction technique * Performs the reduction step x* = x - y*C in three steps: @@ -23,14 +29,189 @@ * 3) x* = x - y*c3 * c1, c2 are exact floating points, c3 = C - c1 - c2 simulates higher precision */ -NPY_FINLINE npyv_f32 -simd_range_reduction_f32(npyv_f32 x, npyv_f32 y, npyv_f32 c1, npyv_f32 c2, npyv_f32 c3) +NPY_FINLINE npyv_@sfx@ +simd_range_reduction_@sfx@(npyv_@sfx@ x, npyv_@sfx@ y, npyv_@sfx@ c1, npyv_@sfx@ c2, npyv_@sfx@ c3) { - npyv_f32 reduced_x = npyv_muladd_f32(y, c1, x); - reduced_x = npyv_muladd_f32(y, c2, reduced_x); - reduced_x = npyv_muladd_f32(y, c3, reduced_x); + npyv_@sfx@ reduced_x = npyv_muladd_@sfx@(y, c1, x); + reduced_x = npyv_muladd_@sfx@(y, c2, reduced_x); + reduced_x = npyv_muladd_@sfx@(y, c3, reduced_x); return reduced_x; } +#endif +/**end repeat**/ + +#if NPY_SIMD_F64 +/**begin repeat + * #op = cos, sin# + */ +#if defined(NPY_OS_WIN32) || defined(NPY_OS_CYGWIN) +NPY_FINLINE npyv_f64 +#else +NPY_NOINLINE npyv_f64 +#endif +simd_@op@_scalar_f64(npyv_f64 out, npy_uint64 cmp_bits) +{ + // MSVC doesn't compile with direct vector access, so we copy it here + // as we have no npyv_get_lane/npyv_set_lane intrinsics + npy_double NPY_DECL_ALIGNED(NPY_SIMD_WIDTH) out_copy[npyv_nlanes_f64]; + npyv_storea_f64(out_copy, out); + + for (unsigned i = 0; i < npyv_nlanes_f64; ++i) { + if (cmp_bits & (1 << i)) { + out_copy[i] = npy_@op@(out_copy[i]); + } + } + + return npyv_loada_f64(out_copy); +} +/**end repeat**/ + +/* + * Approximate sine algorithm for x \in [-pi/2, pi/2] + * worst-case error is 3.5 ulp. + * abs error: 0x1.be222a58p-53 in [-pi/2, pi/2]. + */ +NPY_FINLINE npyv_f64 +simd_approx_sine_poly_f64(npyv_f64 r) +{ + const npyv_f64 poly1 = npyv_setall_f64(-0x1.9f4a9c8b21dc9p-41); + const npyv_f64 poly2 = npyv_setall_f64(0x1.60e88a10163f2p-33); + const npyv_f64 poly3 = npyv_setall_f64(-0x1.ae6361b7254e7p-26); + const npyv_f64 poly4 = npyv_setall_f64(0x1.71de382e8d62bp-19); + const npyv_f64 poly5 = npyv_setall_f64(-0x1.a01a019aeb4ffp-13); + const npyv_f64 poly6 = npyv_setall_f64(0x1.111111110b25ep-7); + const npyv_f64 poly7 = npyv_setall_f64(-0x1.55555555554c3p-3); + + npyv_f64 r2 = npyv_mul_f64(r, r); + npyv_f64 y = npyv_muladd_f64(poly1, r2, poly2); + y = npyv_muladd_f64(y, r2, poly3); + y = npyv_muladd_f64(y, r2, poly4); + y = npyv_muladd_f64(y, r2, poly5); + y = npyv_muladd_f64(y, r2, poly6); + y = npyv_muladd_f64(y, r2, poly7); + y = npyv_muladd_f64(npyv_mul_f64(y, r2), r, r); + + return y; +} + +/* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ +NPY_FINLINE npyv_f64 +simd_range_reduction_pi2(npyv_f64 r, npyv_f64 n) { + const npyv_f64 pi1 = npyv_setall_f64(-0x1.921fb54442d18p+1); + const npyv_f64 pi2 = npyv_setall_f64(-0x1.1a62633145c06p-53); + const npyv_f64 pi3 = npyv_setall_f64(-0x1.c1cd129024e09p-106); + + return simd_range_reduction_f64(r, n, pi1, pi2, pi3); +} + +NPY_FINLINE npyv_b64 simd_cos_range_check_f64(npyv_u64 ir) { + const npyv_u64 tiny_bound = npyv_setall_u64(0x202); /* top12 (asuint64 (0x1p-509)). */ + const npyv_u64 simd_thresh = npyv_setall_u64(0x214); /* top12 (asuint64 (RangeVal)) - SIMD_TINY_BOUND. */ + + return npyv_cmpge_u64(npyv_sub_u64(npyv_shri_u64(ir, 52), tiny_bound), simd_thresh); +} + +NPY_FINLINE npyv_b64 simd_sin_range_check_f64(npyv_u64 ir) { + const npyv_f64 range_val = npyv_setall_f64(0x1p23); + + return npyv_cmpge_u64(ir, npyv_reinterpret_u64_f64(range_val)); +} + +NPY_FINLINE npyv_f64 +simd_cos_poly_f64(npyv_f64 r, npyv_u64 ir, npyv_u64 sign) +{ + const npyv_f64 inv_pi = npyv_setall_f64(0x1.45f306dc9c883p-2); + const npyv_f64 half_pi = npyv_setall_f64(0x1.921fb54442d18p+0); + const npyv_f64 shift = npyv_setall_f64(0x1.8p52); + + /* n = rint((|x|+pi/2)/pi) - 0.5. */ + npyv_f64 n = npyv_muladd_f64(inv_pi, npyv_add_f64(r, half_pi), shift); + npyv_u64 odd = npyv_shli_u64(npyv_reinterpret_u64_f64(n), 63); + n = npyv_sub_f64(n, shift); + n = npyv_sub_f64(n, npyv_setall_f64(0.5)); + + /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ + r = simd_range_reduction_pi2(r, n); + + /* sin(r) poly approx. */ + npyv_f64 y = simd_approx_sine_poly_f64(r); + + /* sign. */ + return npyv_reinterpret_f64_u64(npyv_xor_u64(npyv_reinterpret_u64_f64(y), odd)); +} + +NPY_FINLINE npyv_f64 +simd_sin_poly_f64(npyv_f64 r, npyv_u64 ir, npyv_u64 sign) +{ + const npyv_f64 inv_pi = npyv_setall_f64(0x1.45f306dc9c883p-2); + const npyv_f64 shift = npyv_setall_f64(0x1.8p52); + + /* n = rint(|x|/pi). */ + npyv_f64 n = npyv_muladd_f64(inv_pi, r, shift); + npyv_u64 odd = npyv_shli_u64(npyv_reinterpret_u64_f64(n), 63); + n = npyv_sub_f64(n, shift); + + /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ + r = simd_range_reduction_pi2(r, n); + + /* sin(r) poly approx. */ + npyv_f64 y = simd_approx_sine_poly_f64(r); + + /* sign. */ + return npyv_reinterpret_f64_u64(npyv_xor_u64(npyv_xor_u64(npyv_reinterpret_u64_f64(y), sign), odd)); +} + +/**begin repeat + * #op = cos, sin# + */ +NPY_FINLINE void +simd_@op@_f64(const double *src, npy_intp ssrc, double *dst, npy_intp sdst, npy_intp len) +{ + const npyv_u64 abs_mask = npyv_setall_u64(0x7fffffffffffffff); + const int vstep = npyv_nlanes_f64; + + npyv_f64 out = npyv_zero_f64(); + npyv_f64 x_in; + + for (; len > 0; len -= vstep, src += ssrc*vstep, dst += sdst*vstep) { + if (ssrc == 1) { + x_in = npyv_load_tillz_f64(src, len); + } else { + x_in = npyv_loadn_tillz_f64(src, ssrc, len); + } + + npyv_u64 ir = npyv_and_u64(npyv_reinterpret_u64_f64(x_in), abs_mask); + npyv_f64 r = npyv_reinterpret_f64_u64(ir); + npyv_u64 sign = npyv_and_u64(npyv_reinterpret_u64_f64(x_in), npyv_not_u64(abs_mask)); + + npyv_b64 cmp = simd_@op@_range_check_f64(ir); + /* If fenv exceptions are to be triggered correctly, set any special lanes + to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by + scalar loop later. */ + r = npyv_select_f64(cmp, npyv_setall_f64(1.0), r); + + // Some in range, at least one calculation is useful + if (!npyv_all_b64(cmp)) { + out = simd_@op@_poly_f64(r, ir, sign); + } + + if (npyv_any_b64(cmp)) { + out = npyv_select_f64(cmp, x_in, out); + out = simd_@op@_scalar_f64(out, npyv_tobits_b64(cmp)); + } + + if (sdst == 1) { + npyv_store_till_f64(dst, len, out); + } else { + npyv_storen_till_f64(dst, sdst, len, out); + } + } + npyv_cleanup(); +} +/**end repeat**/ +#endif // NPY_SIMD_F64 + +#if NPY_SIMD_F32 /* * Approximate cosine algorithm for x \in [-PI/4, PI/4] * Maximum ULP across all 32-bit floats = 0.875 @@ -198,24 +379,58 @@ simd_sincos_f32(const float *src, npy_intp ssrc, float *dst, npy_intp sdst, } npyv_cleanup(); } -#endif // NPY_SIMD_FMA3 +#endif // NPY_SIMD_FP32 +#endif // NYP_SIMD_FMA3 /**begin repeat * #func = cos, sin# - * #enum = SIMD_COMPUTE_COS, SIMD_COMPUTE_SIN# + */ +NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_@func@) +(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) +{ +#if NPY_SIMD_F64 && NPY_SIMD_FMA3 + const double *src = (double*)args[0]; + double *dst = (double*)args[1]; + const int lsize = sizeof(src[0]); + const npy_intp ssrc = steps[0] / lsize; + const npy_intp sdst = steps[1] / lsize; + npy_intp len = dimensions[0]; + assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0)); + + if (is_mem_overlap(src, steps[0], dst, steps[1], len) || + !npyv_loadable_stride_f64(ssrc) || !npyv_storable_stride_f64(sdst) + ) { + for (; len > 0; --len, src += ssrc, dst += sdst) { + simd_@func@_f64(src, 1, dst, 1, 1); + } + } else { + simd_@func@_f64(src, ssrc, dst, sdst, len); + } +#else + UNARY_LOOP { + const npy_double in1 = *(npy_double *)ip1; + *(npy_double *)op1 = npy_@func@(in1); + } +#endif +} +/**end repeat**/ + +/**begin repeat + * #func = sin, cos# + * #enum = SIMD_COMPUTE_SIN, SIMD_COMPUTE_COS# */ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_@func@) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) { - const float *src = (float*)args[0]; - float *dst = (float*)args[1]; +#if NPY_SIMD_F32 && NPY_SIMD_FMA3 + const npy_float *src = (npy_float*)args[0]; + npy_float *dst = (npy_float*)args[1]; const int lsize = sizeof(src[0]); const npy_intp ssrc = steps[0] / lsize; const npy_intp sdst = steps[1] / lsize; npy_intp len = dimensions[0]; assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0)); -#if NPY_SIMD_F32 && NPY_SIMD_FMA3 if (is_mem_overlap(src, steps[0], dst, steps[1], len) || !npyv_loadable_stride_f32(ssrc) || !npyv_storable_stride_f32(sdst) ) { @@ -226,9 +441,9 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_@func@) simd_sincos_f32(src, ssrc, dst, sdst, len, @enum@); } #else - for (; len > 0; --len, src += ssrc, dst += sdst) { - const float src0 = *src; - *dst = npy_@func@f(src0); + UNARY_LOOP { + const npy_float in1 = *(npy_float *)ip1; + *(npy_float *)op1 = npy_@func@f(in1); } #endif } diff --git a/numpy/core/src/umath/loops_umath_fp.dispatch.c.src b/numpy/core/src/umath/loops_umath_fp.dispatch.c.src index d6703bfb9..89999e879 100644 --- a/numpy/core/src/umath/loops_umath_fp.dispatch.c.src +++ b/numpy/core/src/umath/loops_umath_fp.dispatch.c.src @@ -60,32 +60,6 @@ simd_@func@_@sfx@(const npyv_lanetype_@sfx@ *src, npy_intp ssrc, /**end repeat**/ /**begin repeat - * #func = sin, cos# - */ -static void -simd_@func@_f64(const double *src, npy_intp ssrc, - double *dst, npy_intp sdst, npy_intp len) -{ - const int vstep = npyv_nlanes_f64; - for (; len > 0; len -= vstep, src += ssrc*vstep, dst += sdst*vstep) { - npyv_f64 x; - if (ssrc == 1) { - x = npyv_load_tillz_f64(src, len); - } else { - x = npyv_loadn_tillz_f64(src, ssrc, len); - } - npyv_f64 out = __svml_@func@8(x); - if (sdst == 1) { - npyv_store_till_f64(dst, len, out); - } else { - npyv_storen_till_f64(dst, sdst, len, out); - } - } - npyv_cleanup(); -} -/**end repeat**/ - -/**begin repeat * #sfx = f32, f64# * #func_suffix = f16, 8# */ @@ -277,31 +251,3 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@func@) } /**end repeat1**/ /**end repeat**/ - -/**begin repeat - * #func = sin, cos# - */ -NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_@func@) -(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) -{ -#if NPY_SIMD && defined(NPY_HAVE_AVX512_SKX) && defined(NPY_CAN_LINK_SVML) - const double *src = (double*)args[0]; - double *dst = (double*)args[1]; - const int lsize = sizeof(src[0]); - const npy_intp ssrc = steps[0] / lsize; - const npy_intp sdst = steps[1] / lsize; - const npy_intp len = dimensions[0]; - assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0)); - if (!is_mem_overlap(src, steps[0], dst, steps[1], len) && - npyv_loadable_stride_f64(ssrc) && - npyv_storable_stride_f64(sdst)) { - simd_@func@_f64(src, ssrc, dst, sdst, len); - return; - } -#endif - UNARY_LOOP { - const npy_double in1 = *(npy_double *)ip1; - *(npy_double *)op1 = npy_@func@(in1); - } -} -/**end repeat**/ diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py index aeddac585..baca076ae 100644 --- a/numpy/core/tests/test_array_coercion.py +++ b/numpy/core/tests/test_array_coercion.py @@ -39,9 +39,9 @@ def arraylikes(): yield subclass class _SequenceLike(): - # We are giving a warning that array-like's were also expected to be - # sequence-like in `np.array([array_like])`. This can be removed - # when the deprecation expired (started NumPy 1.20). + # Older NumPy versions, sometimes cared whether a protocol array was + # also _SequenceLike. This shouldn't matter, but keep it for now + # for __array__ and not the others. def __len__(self): raise TypeError @@ -62,7 +62,7 @@ def arraylikes(): yield param(memoryview, id="memoryview") # Array-interface - class ArrayInterface(_SequenceLike): + class ArrayInterface: def __init__(self, a): self.a = a # need to hold on to keep interface valid self.__array_interface__ = a.__array_interface__ @@ -70,7 +70,7 @@ def arraylikes(): yield param(ArrayInterface, id="__array_interface__") # Array-Struct - class ArrayStruct(_SequenceLike): + class ArrayStruct: def __init__(self, a): self.a = a # need to hold on to keep struct valid self.__array_struct__ = a.__array_struct__ @@ -654,6 +654,14 @@ class TestArrayLikes: res = np.array([obj], dtype=object) assert res[0] is obj + @pytest.mark.parametrize("arraylike", arraylikes()) + @pytest.mark.parametrize("arr", [np.array(0.), np.arange(4)]) + def test_object_assignment_special_case(self, arraylike, arr): + obj = arraylike(arr) + empty = np.arange(1, dtype=object) + empty[:] = [obj] + assert empty[0] is obj + def test_0d_generic_special_case(self): class ArraySubclass(np.ndarray): def __float__(self): diff --git a/numpy/core/tests/test_arrayprint.py b/numpy/core/tests/test_arrayprint.py index 372cb8d41..b92c8ae8c 100644 --- a/numpy/core/tests/test_arrayprint.py +++ b/numpy/core/tests/test_arrayprint.py @@ -353,6 +353,33 @@ class TestArray2String: ' [ 501, 502, 503, ..., 999, 1000, 1001]])' assert_equal(repr(A), reprA) + def test_summarize_structure(self): + A = (np.arange(2002, dtype="<i8").reshape(2, 1001) + .view([('i', "<i8", (1001,))])) + strA = ("[[([ 0, 1, 2, ..., 998, 999, 1000],)]\n" + " [([1001, 1002, 1003, ..., 1999, 2000, 2001],)]]") + assert_equal(str(A), strA) + + reprA = ("array([[([ 0, 1, 2, ..., 998, 999, 1000],)],\n" + " [([1001, 1002, 1003, ..., 1999, 2000, 2001],)]],\n" + " dtype=[('i', '<i8', (1001,))])") + assert_equal(repr(A), reprA) + + B = np.ones(2002, dtype=">i8").view([('i', ">i8", (2, 1001))]) + strB = "[([[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]],)]" + assert_equal(str(B), strB) + + reprB = ( + "array([([[1, 1, 1, ..., 1, 1, 1], [1, 1, 1, ..., 1, 1, 1]],)],\n" + " dtype=[('i', '>i8', (2, 1001))])" + ) + assert_equal(repr(B), reprB) + + C = (np.arange(22, dtype="<i8").reshape(2, 11) + .view([('i1', "<i8"), ('i10', "<i8", (10,))])) + strC = "[[( 0, [ 1, ..., 10])]\n [(11, [12, ..., 21])]]" + assert_equal(np.array2string(C, threshold=1, edgeitems=1), strC) + def test_linewidth(self): a = np.full(6, 1) @@ -817,7 +844,7 @@ class TestPrintOptions: ) def test_dtype_endianness_repr(self, native): ''' - there was an issue where + there was an issue where repr(array([0], dtype='<u2')) and repr(array([0], dtype='>u2')) both returned the same thing: array([0], dtype=uint16) @@ -963,6 +990,16 @@ class TestPrintOptions: [[ 0.]]]])""") ) + def test_edgeitems_structured(self): + np.set_printoptions(edgeitems=1, threshold=1) + A = np.arange(5*2*3, dtype="<i8").view([('i', "<i8", (5, 2, 3))]) + reprA = ( + "array([([[[ 0, ..., 2], [ 3, ..., 5]], ..., " + "[[24, ..., 26], [27, ..., 29]]],)],\n" + " dtype=[('i', '<i8', (5, 2, 3))])" + ) + assert_equal(repr(A), reprA) + def test_bad_args(self): assert_raises(ValueError, np.set_printoptions, threshold=float('nan')) assert_raises(TypeError, np.set_printoptions, threshold='1') diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index b92a20a12..0449b3257 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -580,92 +580,6 @@ class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase): self.assert_deprecated(check) -class TestFutureWarningArrayLikeNotIterable(_DeprecationTestCase): - # Deprecated 2020-12-09, NumPy 1.20 - warning_cls = FutureWarning - message = "The input object of type.*but not a sequence" - - @pytest.mark.parametrize("protocol", - ["__array__", "__array_interface__", "__array_struct__"]) - def test_deprecated(self, protocol): - """Test that these objects give a warning since they are not 0-D, - not coerced at the top level `np.array(obj)`, but nested, and do - *not* define the sequence protocol. - - NOTE: Tests for the versions including __len__ and __getitem__ exist - in `test_array_coercion.py` and they can be modified or amended - when this deprecation expired. - """ - blueprint = np.arange(10) - MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)}) - self.assert_deprecated(lambda: np.array([MyArr()], dtype=object)) - - @pytest.mark.parametrize("protocol", - ["__array__", "__array_interface__", "__array_struct__"]) - def test_0d_not_deprecated(self, protocol): - # 0-D always worked (albeit it would use __float__ or similar for the - # conversion, which may not happen anymore) - blueprint = np.array(1.) - MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)}) - myarr = MyArr() - - self.assert_not_deprecated(lambda: np.array([myarr], dtype=object)) - res = np.array([myarr], dtype=object) - expected = np.empty(1, dtype=object) - expected[0] = myarr - assert_array_equal(res, expected) - - @pytest.mark.parametrize("protocol", - ["__array__", "__array_interface__", "__array_struct__"]) - def test_unnested_not_deprecated(self, protocol): - blueprint = np.arange(10) - MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol)}) - myarr = MyArr() - - self.assert_not_deprecated(lambda: np.array(myarr)) - res = np.array(myarr) - assert_array_equal(res, blueprint) - - @pytest.mark.parametrize("protocol", - ["__array__", "__array_interface__", "__array_struct__"]) - def test_strange_dtype_handling(self, protocol): - """The old code would actually use the dtype from the array, but - then end up not using the array (for dimension discovery) - """ - blueprint = np.arange(10).astype("f4") - MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol), - "__float__": lambda _: 0.5}) - myarr = MyArr() - - # Make sure we warn (and capture the FutureWarning) - with pytest.warns(FutureWarning, match=self.message): - res = np.array([[myarr]]) - - assert res.shape == (1, 1) - assert res.dtype == "f4" - assert res[0, 0] == 0.5 - - @pytest.mark.parametrize("protocol", - ["__array__", "__array_interface__", "__array_struct__"]) - def test_assignment_not_deprecated(self, protocol): - # If the result is dtype=object we do not unpack a nested array or - # array-like, if it is nested at exactly the right depth. - # NOTE: We actually do still call __array__, etc. but ignore the result - # in the end. For `dtype=object` we could optimize that away. - blueprint = np.arange(10).astype("f4") - MyArr = type("MyArr", (), {protocol: getattr(blueprint, protocol), - "__float__": lambda _: 0.5}) - myarr = MyArr() - - res = np.empty(3, dtype=object) - def set(): - res[:] = [myarr, myarr, myarr] - self.assert_not_deprecated(set) - assert res[0] is myarr - assert res[1] is myarr - assert res[2] is myarr - - class TestDeprecatedUnpickleObjectScalar(_DeprecationTestCase): # Deprecated 2020-11-24, NumPy 1.20 """ @@ -822,6 +736,18 @@ class TestLoadtxtParseIntsViaFloat(_DeprecationTestCase): assert isinstance(e.__cause__, DeprecationWarning) +class TestScalarConversion(_DeprecationTestCase): + # 2023-01-02, 1.25.0 + def test_float_conversion(self): + self.assert_deprecated(float, args=(np.array([3.14]),)) + + def test_behaviour(self): + b = np.array([[3.14]]) + c = np.zeros(5) + with pytest.warns(DeprecationWarning): + c[0] = b + + class TestPyIntConversion(_DeprecationTestCase): message = r".*stop allowing conversion of out-of-bound.*" diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 4a064827d..984047c87 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -3644,9 +3644,13 @@ class TestMethods: msg = 'dtype: {0}'.format(dt) ap = complex(a) assert_equal(ap, a, msg) - bp = complex(b) + + with assert_warns(DeprecationWarning): + bp = complex(b) assert_equal(bp, b, msg) - cp = complex(c) + + with assert_warns(DeprecationWarning): + cp = complex(c) assert_equal(cp, c, msg) def test__complex__should_not_work(self): @@ -3669,7 +3673,8 @@ class TestMethods: assert_raises(TypeError, complex, d) e = np.array(['1+1j'], 'U') - assert_raises(TypeError, complex, e) + with assert_warns(DeprecationWarning): + assert_raises(TypeError, complex, e) class TestCequenceMethods: def test_array_contains(self): @@ -8756,8 +8761,10 @@ class TestConversion: int_funcs = (int, lambda x: x.__int__()) for int_func in int_funcs: assert_equal(int_func(np.array(0)), 0) - assert_equal(int_func(np.array([1])), 1) - assert_equal(int_func(np.array([[42]])), 42) + with assert_warns(DeprecationWarning): + assert_equal(int_func(np.array([1])), 1) + with assert_warns(DeprecationWarning): + assert_equal(int_func(np.array([[42]])), 42) assert_raises(TypeError, int_func, np.array([1, 2])) # gh-9972 @@ -8772,7 +8779,8 @@ class TestConversion: def __trunc__(self): return 3 assert_equal(3, int_func(np.array(HasTrunc()))) - assert_equal(3, int_func(np.array([HasTrunc()]))) + with assert_warns(DeprecationWarning): + assert_equal(3, int_func(np.array([HasTrunc()]))) else: pass @@ -8781,8 +8789,9 @@ class TestConversion: raise NotImplementedError assert_raises(NotImplementedError, int_func, np.array(NotConvertible())) - assert_raises(NotImplementedError, - int_func, np.array([NotConvertible()])) + with assert_warns(DeprecationWarning): + assert_raises(NotImplementedError, + int_func, np.array([NotConvertible()])) class TestWhere: diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index 0ed64d72a..438b5600a 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -1426,23 +1426,36 @@ class TestSpecialFloats: np.log(a) @pytest.mark.skipif(IS_WASM, reason="fp errors don't work in wasm") - def test_sincos_values(self): + @pytest.mark.parametrize('dtype', ['e', 'f', 'd', 'g']) + def test_sincos_values(self, dtype): with np.errstate(all='ignore'): x = [np.nan, np.nan, np.nan, np.nan] y = [np.nan, -np.nan, np.inf, -np.inf] - for dt in ['e', 'f', 'd', 'g']: - xf = np.array(x, dtype=dt) - yf = np.array(y, dtype=dt) - assert_equal(np.sin(yf), xf) - assert_equal(np.cos(yf), xf) - + xf = np.array(x, dtype=dtype) + yf = np.array(y, dtype=dtype) + assert_equal(np.sin(yf), xf) + assert_equal(np.cos(yf), xf) + @pytest.mark.skipif(IS_WASM, reason="fp errors don't work in wasm") + @pytest.mark.parametrize('callable', [np.sin, np.cos]) + @pytest.mark.parametrize('dtype', ['e', 'f', 'd']) + @pytest.mark.parametrize('value', [np.inf, -np.inf]) + def test_sincos_errors(self, callable, dtype, value): with np.errstate(invalid='raise'): - for callable in [np.sin, np.cos]: - for value in [np.inf, -np.inf]: - for dt in ['e', 'f', 'd']: - assert_raises(FloatingPointError, callable, - np.array([value], dtype=dt)) + assert_raises(FloatingPointError, callable, + np.array([value], dtype=dtype)) + + @pytest.mark.parametrize('callable', [np.sin, np.cos]) + @pytest.mark.parametrize('dtype', ['f', 'd']) + @pytest.mark.parametrize('stride', [-1, 1, 2, 4, 5]) + def test_sincos_overlaps(self, callable, dtype, stride): + N = 100 + M = N // abs(stride) + rng = np.random.default_rng(42) + x = rng.standard_normal(N, dtype) + y = callable(x[::stride]) + callable(x[::stride], out=x[:M]) + assert_equal(x[:M], y) @pytest.mark.parametrize('dt', ['e', 'f', 'd', 'g']) def test_sqrt_values(self, dt): diff --git a/numpy/distutils/command/build_ext.py b/numpy/distutils/command/build_ext.py index d24162a42..871aa1099 100644 --- a/numpy/distutils/command/build_ext.py +++ b/numpy/distutils/command/build_ext.py @@ -259,7 +259,7 @@ class build_ext (old_build_ext): log.warn('resetting extension %r language from %r to %r.' % (ext.name, l, ext_language)) - ext.language = ext_language + ext.language = ext_language # global language all_languages.update(ext_languages) diff --git a/numpy/f2py/crackfortran.py b/numpy/f2py/crackfortran.py index c085baf12..c0a79bcae 100755 --- a/numpy/f2py/crackfortran.py +++ b/numpy/f2py/crackfortran.py @@ -1740,6 +1740,28 @@ def updatevars(typespec, selector, attrspec, entitydecl): d1[k] = unmarkouterparen(d1[k]) else: del d1[k] + + if 'len' in d1: + if typespec in ['complex', 'integer', 'logical', 'real']: + if ('kindselector' not in edecl) or (not edecl['kindselector']): + edecl['kindselector'] = {} + edecl['kindselector']['*'] = d1['len'] + del d1['len'] + elif typespec == 'character': + if ('charselector' not in edecl) or (not edecl['charselector']): + edecl['charselector'] = {} + if 'len' in edecl['charselector']: + del edecl['charselector']['len'] + edecl['charselector']['*'] = d1['len'] + del d1['len'] + + if 'init' in d1: + if '=' in edecl and (not edecl['='] == d1['init']): + outmess('updatevars: attempt to change the init expression of "%s" ("%s") to "%s". Ignoring.\n' % ( + ename, edecl['='], d1['init'])) + else: + edecl['='] = d1['init'] + if 'len' in d1 and 'array' in d1: if d1['len'] == '': d1['len'] = d1['array'] @@ -1749,6 +1771,7 @@ def updatevars(typespec, selector, attrspec, entitydecl): del d1['len'] errmess('updatevars: "%s %s" is mapped to "%s %s(%s)"\n' % ( typespec, e, typespec, ename, d1['array'])) + if 'array' in d1: dm = 'dimension(%s)' % d1['array'] if 'attrspec' not in edecl or (not edecl['attrspec']): @@ -1762,23 +1785,6 @@ def updatevars(typespec, selector, attrspec, entitydecl): % (ename, dm1, dm)) break - if 'len' in d1: - if typespec in ['complex', 'integer', 'logical', 'real']: - if ('kindselector' not in edecl) or (not edecl['kindselector']): - edecl['kindselector'] = {} - edecl['kindselector']['*'] = d1['len'] - elif typespec == 'character': - if ('charselector' not in edecl) or (not edecl['charselector']): - edecl['charselector'] = {} - if 'len' in edecl['charselector']: - del edecl['charselector']['len'] - edecl['charselector']['*'] = d1['len'] - if 'init' in d1: - if '=' in edecl and (not edecl['='] == d1['init']): - outmess('updatevars: attempt to change the init expression of "%s" ("%s") to "%s". Ignoring.\n' % ( - ename, edecl['='], d1['init'])) - else: - edecl['='] = d1['init'] else: outmess('updatevars: could not crack entity declaration "%s". Ignoring.\n' % ( ename + m.group('after'))) diff --git a/numpy/f2py/tests/src/string/scalar_string.f90 b/numpy/f2py/tests/src/string/scalar_string.f90 index d847668bb..f8f076172 100644 --- a/numpy/f2py/tests/src/string/scalar_string.f90 +++ b/numpy/f2py/tests/src/string/scalar_string.f90 @@ -1,7 +1,9 @@ MODULE string_test character(len=8) :: string + character string77 * 8 character(len=12), dimension(5,7) :: strarr + character strarr77(5,7) * 12 END MODULE string_test diff --git a/numpy/f2py/tests/test_character.py b/numpy/f2py/tests/test_character.py index 5f9805158..0bb0f4290 100644 --- a/numpy/f2py/tests/test_character.py +++ b/numpy/f2py/tests/test_character.py @@ -576,16 +576,18 @@ class TestStringScalarArr(util.F2PyTest): @pytest.mark.slow def test_char(self): - out = self.module.string_test.string - expected = () - assert out.shape == expected - expected = '|S8' - assert out.dtype == expected + for out in (self.module.string_test.string, + self.module.string_test.string77): + expected = () + assert out.shape == expected + expected = '|S8' + assert out.dtype == expected @pytest.mark.slow def test_char_arr(self): - out = self.module.string_test.strarr - expected = (5,7) - assert out.shape == expected - expected = '|S12' - assert out.dtype == expected + for out in (self.module.string_test.strarr, + self.module.string_test.strarr77): + expected = (5,7) + assert out.shape == expected + expected = '|S12' + assert out.dtype == expected diff --git a/numpy/f2py/tests/test_crackfortran.py b/numpy/f2py/tests/test_crackfortran.py index 886fc596e..dc0f7e27a 100644 --- a/numpy/f2py/tests/test_crackfortran.py +++ b/numpy/f2py/tests/test_crackfortran.py @@ -290,39 +290,29 @@ class TestNameArgsPatternBacktracking: def test_nameargspattern_backtracking(self, adversary): '''address ReDOS vulnerability: https://github.com/numpy/numpy/issues/23338''' - last_median = 0. - trials_per_count = 128 + trials_per_batch = 12 + batches_per_regex = 4 start_reps, end_reps = 15, 25 - times_median_doubled = 0 for ii in range(start_reps, end_reps): repeated_adversary = adversary * ii - times = [] - for _ in range(trials_per_count): - t0 = time.perf_counter() - mtch = nameargspattern.search(repeated_adversary) - times.append(time.perf_counter() - t0) - # We should use a measure of time that's resilient to outliers. - # Times jump around a lot due to the CPU's scheduler. - median = np.median(times) + # test times in small batches. + # this gives us more chances to catch a bad regex + # while still catching it before too long if it is bad + for _ in range(batches_per_regex): + times = [] + for _ in range(trials_per_batch): + t0 = time.perf_counter() + mtch = nameargspattern.search(repeated_adversary) + times.append(time.perf_counter() - t0) + # our pattern should be much faster than 0.2s per search + # it's unlikely that a bad regex will pass even on fast CPUs + assert np.median(times) < 0.2 assert not mtch # if the adversary is capped with @)@, it becomes acceptable # according to the old version of the regex. # that should still be true. good_version_of_adversary = repeated_adversary + '@)@' assert nameargspattern.search(good_version_of_adversary) - if ii > start_reps: - # the hallmark of exponentially catastrophic backtracking - # is that runtime doubles for every added instance of - # the problematic pattern. - times_median_doubled += median > 2 * last_median - # also try to rule out non-exponential but still bad cases - # arbitrarily, we should set a hard limit of 10ms as too slow - assert median < trials_per_count * 0.01 - last_median = median - # we accept that maybe the median might double once, due to - # the CPU scheduler acting weird or whatever. More than that - # seems suspicious. - assert times_median_doubled < 2 class TestFunctionReturn(util.F2PyTest): diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index f8f2ab7a2..22fb0eb7d 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -262,6 +262,9 @@ class NpzFile(Mapping): else: raise KeyError("%s is not a file in the archive" % key) + def __contains__(self, key): + return (key in self._files or key in self.files) + def __repr__(self): # Get filename or default to `object` if isinstance(self.fid, str): diff --git a/numpy/lib/npyio.pyi b/numpy/lib/npyio.pyi index 9dd3d6809..cc81e82b7 100644 --- a/numpy/lib/npyio.pyi +++ b/numpy/lib/npyio.pyi @@ -98,6 +98,7 @@ class NpzFile(Mapping[str, NDArray[Any]]): def __iter__(self) -> Iterator[str]: ... def __len__(self) -> int: ... def __getitem__(self, key: str) -> NDArray[Any]: ... + def __contains__(self, key: str) -> bool: ... def __repr__(self) -> str: ... # NOTE: Returns a `NpzFile` if file is a zip file; diff --git a/numpy/ma/core.py b/numpy/ma/core.py index d2bd3d47e..0cf748dfd 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -3340,6 +3340,10 @@ class MaskedArray(ndarray): # Note: Don't try to check for m.any(), that'll take too long return dout + # setitem may put NaNs into integer arrays or occasionally overflow a + # float. But this may happen in masked values, so avoid otherwise + # correct warnings (as is typical also in masked calculations). + @np.errstate(over='ignore', invalid='ignore') def __setitem__(self, indx, value): """ x.__setitem__(i, y) <==> x[i]=y @@ -4631,6 +4635,7 @@ class MaskedArray(ndarray): otherwise. 'K' means to read the elements in the order they occur in memory, except for reversing the data when strides are negative. By default, 'C' index order is used. + (Masked arrays currently use 'A' on the data when 'K' is passed.) Returns ------- @@ -4657,6 +4662,13 @@ class MaskedArray(ndarray): fill_value=999999) """ + # The order of _data and _mask could be different (it shouldn't be + # normally). Passing order `K` or `A` would be incorrect. + # So we ignore the mask memory order. + # TODO: We don't actually support K, so use A instead. We could + # try to guess this correct by sorting strides or deprecate. + if order in "kKaA": + order = "C" if self._data.flags.fnc else "F" r = ndarray.ravel(self._data, order=order).view(type(self)) r._update_from(self) if self._mask is not nomask: diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index 5db01b74a..9a4b74997 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -377,6 +377,24 @@ class TestMaskedArray: assert_equal(s1, s2) assert_(x1[1:1].shape == (0,)) + def test_setitem_no_warning(self): + # Setitem shouldn't warn, because the assignment might be masked + # and warning for a masked assignment is weird (see gh-23000) + # (When the value is masked, otherwise a warning would be acceptable + # but is not given currently.) + x = np.ma.arange(60).reshape((6, 10)) + index = (slice(1, 5, 2), [7, 5]) + value = np.ma.masked_all((2, 2)) + value._data[...] = np.inf # not a valid integer... + x[index] = value + # The masked scalar is special cased, but test anyway (it's NaN): + x[...] = np.ma.masked + # Finally, a large value that cannot be cast to the float32 `x` + x = np.ma.arange(3., dtype=np.float32) + value = np.ma.array([2e234, 1, 1], mask=[True, False, False]) + x[...] = value + x[[0, 1, 2]] = value + @suppress_copy_mask_on_assignment def test_copy(self): # Tests of some subtle points of copying and sizing. @@ -3411,6 +3429,22 @@ class TestMaskedArrayMethods: assert_equal(a.ravel(order='C'), [1, 2, 3, 4]) assert_equal(a.ravel(order='F'), [1, 3, 2, 4]) + @pytest.mark.parametrize("order", "AKCF") + @pytest.mark.parametrize("data_order", "CF") + def test_ravel_order(self, order, data_order): + # Ravelling must ravel mask and data in the same order always to avoid + # misaligning the two in the ravel result. + arr = np.ones((5, 10), order=data_order) + arr[0, :] = 0 + mask = np.ones((10, 5), dtype=bool, order=data_order).T + mask[0, :] = False + x = array(arr, mask=mask) + assert x._data.flags.fnc != x._mask.flags.fnc + assert (x.filled(0) == 0).all() + raveled = x.ravel(order) + assert (raveled.filled(0) == 0).all() + + def test_reshape(self): # Tests reshape x = arange(4) |