diff options
Diffstat (limited to 'numpy/core')
4 files changed, 29 insertions, 28 deletions
diff --git a/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp b/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp index 26a54e36b..51cb4dbb0 100644 --- a/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp +++ b/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp @@ -57,7 +57,7 @@ struct vector<int16_t> { static opmask_t knot_opmask(opmask_t x) { - return _knot_mask32(x); + return npyv_not_b16(x); } static opmask_t ge(zmm_t x, zmm_t y) { @@ -106,16 +106,16 @@ struct vector<int16_t> { { zmm_t lo = _mm512_cvtepi16_epi32(_mm512_extracti64x4_epi64(v, 0)); zmm_t hi = _mm512_cvtepi16_epi32(_mm512_extracti64x4_epi64(v, 1)); - type_t lo_max = (type_t)_mm512_reduce_max_epi32(lo); - type_t hi_max = (type_t)_mm512_reduce_max_epi32(hi); + type_t lo_max = (type_t)npyv_reduce_max_s32(lo); + type_t hi_max = (type_t)npyv_reduce_max_s32(hi); return std::max(lo_max, hi_max); } static type_t reducemin(zmm_t v) { zmm_t lo = _mm512_cvtepi16_epi32(_mm512_extracti64x4_epi64(v, 0)); zmm_t hi = _mm512_cvtepi16_epi32(_mm512_extracti64x4_epi64(v, 1)); - type_t lo_min = (type_t)_mm512_reduce_min_epi32(lo); - type_t hi_min = (type_t)_mm512_reduce_min_epi32(hi); + type_t lo_min = (type_t)npyv_reduce_min_s32(lo); + type_t hi_min = (type_t)npyv_reduce_min_s32(hi); return std::min(lo_min, hi_min); } static zmm_t set1(type_t v) @@ -161,7 +161,7 @@ struct vector<uint16_t> { //} static opmask_t knot_opmask(opmask_t x) { - return _knot_mask32(x); + return npyv_not_b16(x); } static opmask_t ge(zmm_t x, zmm_t y) { @@ -203,16 +203,16 @@ struct vector<uint16_t> { { zmm_t lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(v, 0)); zmm_t hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(v, 1)); - type_t lo_max = (type_t)_mm512_reduce_max_epi32(lo); - type_t hi_max = (type_t)_mm512_reduce_max_epi32(hi); + type_t lo_max = (type_t)npyv_reduce_max_s32(lo); + type_t hi_max = (type_t)npyv_reduce_max_s32(hi); return std::max(lo_max, hi_max); } static type_t reducemin(zmm_t v) { zmm_t lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(v, 0)); zmm_t hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(v, 1)); - type_t lo_min = (type_t)_mm512_reduce_min_epi32(lo); - type_t hi_min = (type_t)_mm512_reduce_min_epi32(hi); + type_t lo_min = (type_t)npyv_reduce_min_s32(lo); + type_t hi_min = (type_t)npyv_reduce_min_s32(hi); return std::min(lo_min, hi_min); } static zmm_t set1(type_t v) diff --git a/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp b/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp index 7899d8522..ac5bece7a 100644 --- a/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp +++ b/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp @@ -46,7 +46,7 @@ struct vector<int32_t> { static opmask_t knot_opmask(opmask_t x) { - return _knot_mask16(x); + return _mm512_knot(x); } static opmask_t ge(zmm_t x, zmm_t y) { @@ -96,11 +96,11 @@ struct vector<int32_t> { } static type_t reducemax(zmm_t v) { - return _mm512_reduce_max_epi32(v); + return npyv_reduce_max_s32(v); } static type_t reducemin(zmm_t v) { - return _mm512_reduce_min_epi32(v); + return npyv_reduce_min_s32(v); } static zmm_t set1(type_t v) { @@ -158,7 +158,7 @@ struct vector<uint32_t> { } static opmask_t knot_opmask(opmask_t x) { - return _knot_mask16(x); + return _mm512_knot(x); } static opmask_t ge(zmm_t x, zmm_t y) { @@ -198,11 +198,11 @@ struct vector<uint32_t> { } static type_t reducemax(zmm_t v) { - return _mm512_reduce_max_epu32(v); + return npyv_reduce_max_u32(v); } static type_t reducemin(zmm_t v) { - return _mm512_reduce_min_epu32(v); + return npyv_reduce_min_u32(v); } static zmm_t set1(type_t v) { @@ -250,7 +250,7 @@ struct vector<float> { static opmask_t knot_opmask(opmask_t x) { - return _knot_mask16(x); + return _mm512_knot(x); } static opmask_t ge(zmm_t x, zmm_t y) { @@ -301,11 +301,11 @@ struct vector<float> { } static type_t reducemax(zmm_t v) { - return _mm512_reduce_max_ps(v); + return npyv_reduce_max_f32(v); } static type_t reducemin(zmm_t v) { - return _mm512_reduce_min_ps(v); + return npyv_reduce_min_f32(v); } static zmm_t set1(type_t v) { diff --git a/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp b/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp index 62a7fa54e..e6b7f8943 100644 --- a/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp +++ b/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp @@ -56,7 +56,7 @@ struct vector<int64_t> { static opmask_t knot_opmask(opmask_t x) { - return _knot_mask8(x); + return npyv_not_b64(x); } static opmask_t ge(zmm_t x, zmm_t y) { @@ -101,11 +101,11 @@ struct vector<int64_t> { } static type_t reducemax(zmm_t v) { - return _mm512_reduce_max_epi64(v); + return npyv_reduce_max_s64(v); } static type_t reducemin(zmm_t v) { - return _mm512_reduce_min_epi64(v); + return npyv_reduce_min_s64(v); } static zmm_t set1(type_t v) { @@ -163,7 +163,7 @@ struct vector<uint64_t> { } static opmask_t knot_opmask(opmask_t x) { - return _knot_mask8(x); + return npyv_not_b64(x); } static opmask_t ge(zmm_t x, zmm_t y) { @@ -203,11 +203,11 @@ struct vector<uint64_t> { } static type_t reducemax(zmm_t v) { - return _mm512_reduce_max_epu64(v); + return npyv_reduce_max_u64(v); } static type_t reducemin(zmm_t v) { - return _mm512_reduce_min_epu64(v); + return npyv_reduce_min_u64(v); } static zmm_t set1(type_t v) { @@ -260,7 +260,7 @@ struct vector<double> { static opmask_t knot_opmask(opmask_t x) { - return _knot_mask8(x); + return npyv_not_b64(x); } static opmask_t ge(zmm_t x, zmm_t y) { @@ -305,11 +305,11 @@ struct vector<double> { } static type_t reducemax(zmm_t v) { - return _mm512_reduce_max_pd(v); + return npyv_reduce_max_f64(v); } static type_t reducemin(zmm_t v) { - return _mm512_reduce_min_pd(v); + return npyv_reduce_min_f64(v); } static zmm_t set1(type_t v) { diff --git a/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h b/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h index e713e1f20..56560185c 100644 --- a/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h +++ b/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h @@ -38,6 +38,7 @@ #include <cstdint> #include <immintrin.h> #include <limits> +#include "simd/simd.h" #define X86_SIMD_SORT_INFINITY std::numeric_limits<double>::infinity() #define X86_SIMD_SORT_INFINITYF std::numeric_limits<float>::infinity() |