summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2023-02-19 09:01:18 +0200
committerGitHub <noreply@github.com>2023-02-19 09:01:18 +0200
commitcfef4c030bfc18126fff156cbd3670b2ee0b18c0 (patch)
treed8333496f0a2874ffe3c20eb8c69f22c3389673a
parentb8feb4247af42cb0a27ddc7414d4a112e84d8d64 (diff)
parent6b470186a16044aea06a33a4d9295f87dcd651f5 (diff)
downloadnumpy-cfef4c030bfc18126fff156cbd3670b2ee0b18c0.tar.gz
Merge pull request #22051 from r-devulap/spr-support
BLD: Add compile and runtime checks for AVX512_SPR
-rw-r--r--doc/source/reference/simd/generated_tables/compilers-diff.inc42
-rw-r--r--doc/source/reference/simd/generated_tables/cpu_features.inc53
-rw-r--r--numpy/core/src/common/npy_cpu_features.c7
-rw-r--r--numpy/core/src/common/npy_cpu_features.h3
-rw-r--r--numpy/core/tests/test_cpu_features.py6
-rw-r--r--numpy/distutils/ccompiler_opt.py16
-rw-r--r--numpy/distutils/checks/cpu_avx512_spr.c22
7 files changed, 99 insertions, 50 deletions
diff --git a/doc/source/reference/simd/generated_tables/compilers-diff.inc b/doc/source/reference/simd/generated_tables/compilers-diff.inc
index 4b9009a68..d5a87da3c 100644
--- a/doc/source/reference/simd/generated_tables/compilers-diff.inc
+++ b/doc/source/reference/simd/generated_tables/compilers-diff.inc
@@ -1,33 +1,35 @@
-.. generated via /home/seiko/work/repos/numpy/doc/source/reference/simd/./gen_features.py
+.. generated via /numpy/numpy/./doc/source/reference/simd/gen_features.py
On x86::Intel Compiler
~~~~~~~~~~~~~~~~~~~~~~
.. table::
:align: left
- ================ ==========================================================================================================================================
- Name Implies
- ================ ==========================================================================================================================================
- FMA3 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`AVX2`
- AVX2 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`FMA3`
- AVX512F SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 :enabled:`AVX512CD`
- :disabled:`XOP` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX`
- :disabled:`FMA4` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX`
- ================ ==========================================================================================================================================
+ ====================== ================================================================================================================================================================================================================================================================================================================================== ======================
+ Name Implies Gathers
+ ====================== ================================================================================================================================================================================================================================================================================================================================== ======================
+ FMA3 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`AVX2`
+ AVX2 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`FMA3`
+ AVX512F SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 :enabled:`AVX512CD`
+ :disabled:`XOP` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX`
+ :disabled:`FMA4` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX`
+ :disabled:`AVX512_SPR` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512_SKX` :disabled:`AVX512_CLX` :disabled:`AVX512_CNL` :disabled:`AVX512_ICL` :disabled:`AVX512FP16`
+ ====================== ================================================================================================================================================================================================================================================================================================================================== ======================
On x86::Microsoft Visual C/C++
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. table::
:align: left
- ====================== ============================================================================================================================================================================================================================================================= =============================================================================
- Name Implies Gathers
- ====================== ============================================================================================================================================================================================================================================================= =============================================================================
- FMA3 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`AVX2`
- AVX2 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`FMA3`
- AVX512F SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 :enabled:`AVX512CD` :enabled:`AVX512_SKX`
- AVX512CD SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 AVX512F :enabled:`AVX512_SKX`
- :disabled:`AVX512_KNL` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512ER` :disabled:`AVX512PF`
- :disabled:`AVX512_KNM` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512_KNL` :disabled:`AVX5124FMAPS` :disabled:`AVX5124VNNIW` :disabled:`AVX512VPOPCNTDQ`
- ====================== ============================================================================================================================================================================================================================================================= =============================================================================
+ ====================== ================================================================================================================================================================================================================================================================================================================================== =============================================================================
+ Name Implies Gathers
+ ====================== ================================================================================================================================================================================================================================================================================================================================== =============================================================================
+ FMA3 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`AVX2`
+ AVX2 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`FMA3`
+ AVX512F SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 :enabled:`AVX512CD` :enabled:`AVX512_SKX`
+ AVX512CD SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 AVX512F :enabled:`AVX512_SKX`
+ :disabled:`AVX512_KNL` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512ER` :disabled:`AVX512PF`
+ :disabled:`AVX512_KNM` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512_KNL` :disabled:`AVX5124FMAPS` :disabled:`AVX5124VNNIW` :disabled:`AVX512VPOPCNTDQ`
+ :disabled:`AVX512_SPR` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512_SKX` :disabled:`AVX512_CLX` :disabled:`AVX512_CNL` :disabled:`AVX512_ICL` :disabled:`AVX512FP16`
+ ====================== ================================================================================================================================================================================================================================================================================================================================== =============================================================================
diff --git a/doc/source/reference/simd/generated_tables/cpu_features.inc b/doc/source/reference/simd/generated_tables/cpu_features.inc
index 7782172d2..603370e21 100644
--- a/doc/source/reference/simd/generated_tables/cpu_features.inc
+++ b/doc/source/reference/simd/generated_tables/cpu_features.inc
@@ -1,35 +1,36 @@
-.. generated via /home/seiko/work/repos/review/numpy/doc/source/reference/simd/gen_features.py
+.. generated via /numpy/numpy/./doc/source/reference/simd/gen_features.py
On x86
~~~~~~
.. table::
:align: left
- ============== =========================================================================================================================================================================== =====================================================
- Name Implies Gathers
- ============== =========================================================================================================================================================================== =====================================================
- ``SSE`` ``SSE2``
- ``SSE2`` ``SSE``
- ``SSE3`` ``SSE`` ``SSE2``
- ``SSSE3`` ``SSE`` ``SSE2`` ``SSE3``
- ``SSE41`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3``
- ``POPCNT`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41``
- ``SSE42`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT``
- ``AVX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42``
- ``XOP`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX``
- ``FMA4`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX``
- ``F16C`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX``
- ``FMA3`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C``
- ``AVX2`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C``
- ``AVX512F`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2``
- ``AVX512CD`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F``
- ``AVX512_KNL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512ER`` ``AVX512PF``
- ``AVX512_KNM`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_KNL`` ``AVX5124FMAPS`` ``AVX5124VNNIW`` ``AVX512VPOPCNTDQ``
- ``AVX512_SKX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512VL`` ``AVX512BW`` ``AVX512DQ``
- ``AVX512_CLX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512VNNI``
- ``AVX512_CNL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512IFMA`` ``AVX512VBMI``
- ``AVX512_ICL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512_CLX`` ``AVX512_CNL`` ``AVX512VBMI2`` ``AVX512BITALG`` ``AVX512VPOPCNTDQ``
- ============== =========================================================================================================================================================================== =====================================================
+ ============== ========================================================================================================================================================================================== =====================================================
+ Name Implies Gathers
+ ============== ========================================================================================================================================================================================== =====================================================
+ ``SSE`` ``SSE2``
+ ``SSE2`` ``SSE``
+ ``SSE3`` ``SSE`` ``SSE2``
+ ``SSSE3`` ``SSE`` ``SSE2`` ``SSE3``
+ ``SSE41`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3``
+ ``POPCNT`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41``
+ ``SSE42`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT``
+ ``AVX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42``
+ ``XOP`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX``
+ ``FMA4`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX``
+ ``F16C`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX``
+ ``FMA3`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C``
+ ``AVX2`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C``
+ ``AVX512F`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2``
+ ``AVX512CD`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F``
+ ``AVX512_KNL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512ER`` ``AVX512PF``
+ ``AVX512_KNM`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_KNL`` ``AVX5124FMAPS`` ``AVX5124VNNIW`` ``AVX512VPOPCNTDQ``
+ ``AVX512_SKX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512VL`` ``AVX512BW`` ``AVX512DQ``
+ ``AVX512_CLX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512VNNI``
+ ``AVX512_CNL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512IFMA`` ``AVX512VBMI``
+ ``AVX512_ICL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512_CLX`` ``AVX512_CNL`` ``AVX512VBMI2`` ``AVX512BITALG`` ``AVX512VPOPCNTDQ``
+ ``AVX512_SPR`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512_CLX`` ``AVX512_CNL`` ``AVX512_ICL`` ``AVX512FP16``
+ ============== ========================================================================================================================================================================================== =====================================================
On IBM/POWER big-endian
~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/numpy/core/src/common/npy_cpu_features.c b/numpy/core/src/common/npy_cpu_features.c
index 949c75ad5..92a4e432b 100644
--- a/numpy/core/src/common/npy_cpu_features.c
+++ b/numpy/core/src/common/npy_cpu_features.c
@@ -81,12 +81,14 @@ static struct {
{NPY_CPU_FEATURE_AVX512VBMI, "AVX512VBMI"},
{NPY_CPU_FEATURE_AVX512VBMI2, "AVX512VBMI2"},
{NPY_CPU_FEATURE_AVX512BITALG, "AVX512BITALG"},
+ {NPY_CPU_FEATURE_AVX512FP16 , "AVX512FP16"},
{NPY_CPU_FEATURE_AVX512_KNL, "AVX512_KNL"},
{NPY_CPU_FEATURE_AVX512_KNM, "AVX512_KNM"},
{NPY_CPU_FEATURE_AVX512_SKX, "AVX512_SKX"},
{NPY_CPU_FEATURE_AVX512_CLX, "AVX512_CLX"},
{NPY_CPU_FEATURE_AVX512_CNL, "AVX512_CNL"},
{NPY_CPU_FEATURE_AVX512_ICL, "AVX512_ICL"},
+ {NPY_CPU_FEATURE_AVX512_SPR, "AVX512_SPR"},
{NPY_CPU_FEATURE_VSX, "VSX"},
{NPY_CPU_FEATURE_VSX2, "VSX2"},
{NPY_CPU_FEATURE_VSX3, "VSX3"},
@@ -506,6 +508,11 @@ npy__cpu_init_features(void)
npy__cpu_have[NPY_CPU_FEATURE_AVX512VBMI2] &&
npy__cpu_have[NPY_CPU_FEATURE_AVX512BITALG] &&
npy__cpu_have[NPY_CPU_FEATURE_AVX512VPOPCNTDQ];
+ // Sapphire Rapids
+ npy__cpu_have[NPY_CPU_FEATURE_AVX512FP16] = (reg[3] & (1 << 23)) != 0;
+ npy__cpu_have[NPY_CPU_FEATURE_AVX512_SPR] = npy__cpu_have[NPY_CPU_FEATURE_AVX512_ICL] &&
+ npy__cpu_have[NPY_CPU_FEATURE_AVX512FP16];
+
}
}
diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h
index 96c543e70..b49aea247 100644
--- a/numpy/core/src/common/npy_cpu_features.h
+++ b/numpy/core/src/common/npy_cpu_features.h
@@ -43,6 +43,7 @@ enum npy_cpu_features
NPY_CPU_FEATURE_AVX512VNNI = 42,
NPY_CPU_FEATURE_AVX512VBMI2 = 43,
NPY_CPU_FEATURE_AVX512BITALG = 44,
+ NPY_CPU_FEATURE_AVX512FP16 = 45,
// X86 CPU Groups
// Knights Landing (F,CD,ER,PF)
@@ -57,6 +58,8 @@ enum npy_cpu_features
NPY_CPU_FEATURE_AVX512_CNL = 105,
// Ice Lake (F,CD,BW,DQ,VL,IFMA,VBMI,VNNI,VBMI2,BITALG,VPOPCNTDQ)
NPY_CPU_FEATURE_AVX512_ICL = 106,
+ // Sapphire Rapids (Ice Lake, AVX512FP16)
+ NPY_CPU_FEATURE_AVX512_SPR = 107,
// IBM/POWER VSX
// POWER7
diff --git a/numpy/core/tests/test_cpu_features.py b/numpy/core/tests/test_cpu_features.py
index 1a76897e2..8c1c25ed4 100644
--- a/numpy/core/tests/test_cpu_features.py
+++ b/numpy/core/tests/test_cpu_features.py
@@ -116,7 +116,7 @@ class Test_X86_Features(AbstractTest):
"AVX", "F16C", "XOP", "FMA4", "FMA3", "AVX2", "AVX512F", "AVX512CD",
"AVX512ER", "AVX512PF", "AVX5124FMAPS", "AVX5124VNNIW", "AVX512VPOPCNTDQ",
"AVX512VL", "AVX512BW", "AVX512DQ", "AVX512VNNI", "AVX512IFMA",
- "AVX512VBMI", "AVX512VBMI2", "AVX512BITALG",
+ "AVX512VBMI", "AVX512VBMI2", "AVX512BITALG", "AVX512FP16",
]
features_groups = dict(
AVX512_KNL = ["AVX512F", "AVX512CD", "AVX512ER", "AVX512PF"],
@@ -128,6 +128,10 @@ class Test_X86_Features(AbstractTest):
"AVX512VBMI"],
AVX512_ICL = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL", "AVX512IFMA",
"AVX512VBMI", "AVX512VNNI", "AVX512VBMI2", "AVX512BITALG", "AVX512VPOPCNTDQ"],
+ AVX512_SPR = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ",
+ "AVX512VL", "AVX512IFMA", "AVX512VBMI", "AVX512VNNI",
+ "AVX512VBMI2", "AVX512BITALG", "AVX512VPOPCNTDQ",
+ "AVX512FP16"],
)
features_map = dict(
SSE3="PNI", SSE41="SSE4_1", SSE42="SSE4_2", FMA3="FMA",
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py
index 4bb0dd008..781404446 100644
--- a/numpy/distutils/ccompiler_opt.py
+++ b/numpy/distutils/ccompiler_opt.py
@@ -295,6 +295,10 @@ class _Config:
group="AVX512VBMI2 AVX512BITALG AVX512VPOPCNTDQ",
detect="AVX512_ICL", implies_detect=False
),
+ AVX512_SPR = dict(
+ interest=46, implies="AVX512_ICL", group="AVX512FP16",
+ detect="AVX512_SPR", implies_detect=False
+ ),
# IBM/Power
## Power7/ISA 2.06
VSX = dict(interest=1, headers="altivec.h", extra_checks="VSX_ASM"),
@@ -365,7 +369,8 @@ class _Config:
AVX512_CNL = dict(flags="-mavx512ifma -mavx512vbmi"),
AVX512_ICL = dict(
flags="-mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq"
- )
+ ),
+ AVX512_SPR = dict(flags="-mavx512fp16"),
)
if on_x86 and self.cc_is_icc: return dict(
SSE = dict(flags="-msse"),
@@ -397,6 +402,7 @@ class _Config:
AVX512_CLX = dict(flags="-xCASCADELAKE"),
AVX512_CNL = dict(flags="-xCANNONLAKE"),
AVX512_ICL = dict(flags="-xICELAKE-CLIENT"),
+ AVX512_SPR = dict(disable="Not supported yet")
)
if on_x86 and self.cc_is_iccw: return dict(
SSE = dict(flags="/arch:SSE"),
@@ -429,7 +435,8 @@ class _Config:
AVX512_SKX = dict(flags="/Qx:SKYLAKE-AVX512"),
AVX512_CLX = dict(flags="/Qx:CASCADELAKE"),
AVX512_CNL = dict(flags="/Qx:CANNONLAKE"),
- AVX512_ICL = dict(flags="/Qx:ICELAKE-CLIENT")
+ AVX512_ICL = dict(flags="/Qx:ICELAKE-CLIENT"),
+ AVX512_SPR = dict(disable="Not supported yet")
)
if on_x86 and self.cc_is_msvc: return dict(
SSE = dict(flags="/arch:SSE") if self.cc_on_x86 else {},
@@ -467,7 +474,10 @@ class _Config:
AVX512_SKX = dict(flags="/arch:AVX512"),
AVX512_CLX = {},
AVX512_CNL = {},
- AVX512_ICL = {}
+ AVX512_ICL = {},
+ AVX512_SPR= dict(
+ disable="MSVC compiler doesn't support it"
+ )
)
on_power = self.cc_on_ppc64le or self.cc_on_ppc64
diff --git a/numpy/distutils/checks/cpu_avx512_spr.c b/numpy/distutils/checks/cpu_avx512_spr.c
new file mode 100644
index 000000000..3c9575a57
--- /dev/null
+++ b/numpy/distutils/checks/cpu_avx512_spr.c
@@ -0,0 +1,22 @@
+#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
+ /*
+ * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
+ * whether or not the build options for those features are specified.
+ * Therefore, we must test #definitions of CPU features when option native/host
+ * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
+ * the test will be broken and leads to enable all possible features.
+ */
+ #if !defined(__AVX512FP16__)
+ #error "HOST/ARCH doesn't support Sapphire Rapids AVX512FP16 features"
+ #endif
+#endif
+
+#include <immintrin.h>
+
+int main(int argc, char **argv)
+{
+ __m512h a = _mm512_loadu_ph((void*)argv[argc-1]);
+ __m512h temp = _mm512_fmadd_ph(a, a, a);
+ _mm512_storeu_ph((void*)(argv[argc-1]), temp);
+ return 0;
+}