summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp20
-rw-r--r--numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp18
-rw-r--r--numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp18
-rw-r--r--numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h1
4 files changed, 29 insertions, 28 deletions
diff --git a/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp b/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp
index 26a54e36b..51cb4dbb0 100644
--- a/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp
+++ b/numpy/core/src/npysort/x86-simd-sort/src/avx512-16bit-qsort.hpp
@@ -57,7 +57,7 @@ struct vector<int16_t> {
static opmask_t knot_opmask(opmask_t x)
{
- return _knot_mask32(x);
+ return npyv_not_b16(x);
}
static opmask_t ge(zmm_t x, zmm_t y)
{
@@ -106,16 +106,16 @@ struct vector<int16_t> {
{
zmm_t lo = _mm512_cvtepi16_epi32(_mm512_extracti64x4_epi64(v, 0));
zmm_t hi = _mm512_cvtepi16_epi32(_mm512_extracti64x4_epi64(v, 1));
- type_t lo_max = (type_t)_mm512_reduce_max_epi32(lo);
- type_t hi_max = (type_t)_mm512_reduce_max_epi32(hi);
+ type_t lo_max = (type_t)npyv_reduce_max_s32(lo);
+ type_t hi_max = (type_t)npyv_reduce_max_s32(hi);
return std::max(lo_max, hi_max);
}
static type_t reducemin(zmm_t v)
{
zmm_t lo = _mm512_cvtepi16_epi32(_mm512_extracti64x4_epi64(v, 0));
zmm_t hi = _mm512_cvtepi16_epi32(_mm512_extracti64x4_epi64(v, 1));
- type_t lo_min = (type_t)_mm512_reduce_min_epi32(lo);
- type_t hi_min = (type_t)_mm512_reduce_min_epi32(hi);
+ type_t lo_min = (type_t)npyv_reduce_min_s32(lo);
+ type_t hi_min = (type_t)npyv_reduce_min_s32(hi);
return std::min(lo_min, hi_min);
}
static zmm_t set1(type_t v)
@@ -161,7 +161,7 @@ struct vector<uint16_t> {
//}
static opmask_t knot_opmask(opmask_t x)
{
- return _knot_mask32(x);
+ return npyv_not_b16(x);
}
static opmask_t ge(zmm_t x, zmm_t y)
{
@@ -203,16 +203,16 @@ struct vector<uint16_t> {
{
zmm_t lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(v, 0));
zmm_t hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(v, 1));
- type_t lo_max = (type_t)_mm512_reduce_max_epi32(lo);
- type_t hi_max = (type_t)_mm512_reduce_max_epi32(hi);
+ type_t lo_max = (type_t)npyv_reduce_max_s32(lo);
+ type_t hi_max = (type_t)npyv_reduce_max_s32(hi);
return std::max(lo_max, hi_max);
}
static type_t reducemin(zmm_t v)
{
zmm_t lo = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(v, 0));
zmm_t hi = _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(v, 1));
- type_t lo_min = (type_t)_mm512_reduce_min_epi32(lo);
- type_t hi_min = (type_t)_mm512_reduce_min_epi32(hi);
+ type_t lo_min = (type_t)npyv_reduce_min_s32(lo);
+ type_t hi_min = (type_t)npyv_reduce_min_s32(hi);
return std::min(lo_min, hi_min);
}
static zmm_t set1(type_t v)
diff --git a/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp b/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp
index 7899d8522..ac5bece7a 100644
--- a/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp
+++ b/numpy/core/src/npysort/x86-simd-sort/src/avx512-32bit-qsort.hpp
@@ -46,7 +46,7 @@ struct vector<int32_t> {
static opmask_t knot_opmask(opmask_t x)
{
- return _knot_mask16(x);
+ return _mm512_knot(x);
}
static opmask_t ge(zmm_t x, zmm_t y)
{
@@ -96,11 +96,11 @@ struct vector<int32_t> {
}
static type_t reducemax(zmm_t v)
{
- return _mm512_reduce_max_epi32(v);
+ return npyv_reduce_max_s32(v);
}
static type_t reducemin(zmm_t v)
{
- return _mm512_reduce_min_epi32(v);
+ return npyv_reduce_min_s32(v);
}
static zmm_t set1(type_t v)
{
@@ -158,7 +158,7 @@ struct vector<uint32_t> {
}
static opmask_t knot_opmask(opmask_t x)
{
- return _knot_mask16(x);
+ return _mm512_knot(x);
}
static opmask_t ge(zmm_t x, zmm_t y)
{
@@ -198,11 +198,11 @@ struct vector<uint32_t> {
}
static type_t reducemax(zmm_t v)
{
- return _mm512_reduce_max_epu32(v);
+ return npyv_reduce_max_u32(v);
}
static type_t reducemin(zmm_t v)
{
- return _mm512_reduce_min_epu32(v);
+ return npyv_reduce_min_u32(v);
}
static zmm_t set1(type_t v)
{
@@ -250,7 +250,7 @@ struct vector<float> {
static opmask_t knot_opmask(opmask_t x)
{
- return _knot_mask16(x);
+ return _mm512_knot(x);
}
static opmask_t ge(zmm_t x, zmm_t y)
{
@@ -301,11 +301,11 @@ struct vector<float> {
}
static type_t reducemax(zmm_t v)
{
- return _mm512_reduce_max_ps(v);
+ return npyv_reduce_max_f32(v);
}
static type_t reducemin(zmm_t v)
{
- return _mm512_reduce_min_ps(v);
+ return npyv_reduce_min_f32(v);
}
static zmm_t set1(type_t v)
{
diff --git a/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp b/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp
index 62a7fa54e..e6b7f8943 100644
--- a/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp
+++ b/numpy/core/src/npysort/x86-simd-sort/src/avx512-64bit-qsort.hpp
@@ -56,7 +56,7 @@ struct vector<int64_t> {
static opmask_t knot_opmask(opmask_t x)
{
- return _knot_mask8(x);
+ return npyv_not_b64(x);
}
static opmask_t ge(zmm_t x, zmm_t y)
{
@@ -101,11 +101,11 @@ struct vector<int64_t> {
}
static type_t reducemax(zmm_t v)
{
- return _mm512_reduce_max_epi64(v);
+ return npyv_reduce_max_s64(v);
}
static type_t reducemin(zmm_t v)
{
- return _mm512_reduce_min_epi64(v);
+ return npyv_reduce_min_s64(v);
}
static zmm_t set1(type_t v)
{
@@ -163,7 +163,7 @@ struct vector<uint64_t> {
}
static opmask_t knot_opmask(opmask_t x)
{
- return _knot_mask8(x);
+ return npyv_not_b64(x);
}
static opmask_t ge(zmm_t x, zmm_t y)
{
@@ -203,11 +203,11 @@ struct vector<uint64_t> {
}
static type_t reducemax(zmm_t v)
{
- return _mm512_reduce_max_epu64(v);
+ return npyv_reduce_max_u64(v);
}
static type_t reducemin(zmm_t v)
{
- return _mm512_reduce_min_epu64(v);
+ return npyv_reduce_min_u64(v);
}
static zmm_t set1(type_t v)
{
@@ -260,7 +260,7 @@ struct vector<double> {
static opmask_t knot_opmask(opmask_t x)
{
- return _knot_mask8(x);
+ return npyv_not_b64(x);
}
static opmask_t ge(zmm_t x, zmm_t y)
{
@@ -305,11 +305,11 @@ struct vector<double> {
}
static type_t reducemax(zmm_t v)
{
- return _mm512_reduce_max_pd(v);
+ return npyv_reduce_max_f64(v);
}
static type_t reducemin(zmm_t v)
{
- return _mm512_reduce_min_pd(v);
+ return npyv_reduce_min_f64(v);
}
static zmm_t set1(type_t v)
{
diff --git a/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h b/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h
index e713e1f20..56560185c 100644
--- a/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h
+++ b/numpy/core/src/npysort/x86-simd-sort/src/avx512-common-qsort.h
@@ -38,6 +38,7 @@
#include <cstdint>
#include <immintrin.h>
#include <limits>
+#include "simd/simd.h"
#define X86_SIMD_SORT_INFINITY std::numeric_limits<double>::infinity()
#define X86_SIMD_SORT_INFINITYF std::numeric_limits<float>::infinity()