diff options
author | Matti Picus <matti.picus@gmail.com> | 2023-02-19 09:01:18 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-02-19 09:01:18 +0200 |
commit | cfef4c030bfc18126fff156cbd3670b2ee0b18c0 (patch) | |
tree | d8333496f0a2874ffe3c20eb8c69f22c3389673a | |
parent | b8feb4247af42cb0a27ddc7414d4a112e84d8d64 (diff) | |
parent | 6b470186a16044aea06a33a4d9295f87dcd651f5 (diff) | |
download | numpy-cfef4c030bfc18126fff156cbd3670b2ee0b18c0.tar.gz |
Merge pull request #22051 from r-devulap/spr-support
BLD: Add compile and runtime checks for AVX512_SPR
-rw-r--r-- | doc/source/reference/simd/generated_tables/compilers-diff.inc | 42 | ||||
-rw-r--r-- | doc/source/reference/simd/generated_tables/cpu_features.inc | 53 | ||||
-rw-r--r-- | numpy/core/src/common/npy_cpu_features.c | 7 | ||||
-rw-r--r-- | numpy/core/src/common/npy_cpu_features.h | 3 | ||||
-rw-r--r-- | numpy/core/tests/test_cpu_features.py | 6 | ||||
-rw-r--r-- | numpy/distutils/ccompiler_opt.py | 16 | ||||
-rw-r--r-- | numpy/distutils/checks/cpu_avx512_spr.c | 22 |
7 files changed, 99 insertions, 50 deletions
diff --git a/doc/source/reference/simd/generated_tables/compilers-diff.inc b/doc/source/reference/simd/generated_tables/compilers-diff.inc index 4b9009a68..d5a87da3c 100644 --- a/doc/source/reference/simd/generated_tables/compilers-diff.inc +++ b/doc/source/reference/simd/generated_tables/compilers-diff.inc @@ -1,33 +1,35 @@ -.. generated via /home/seiko/work/repos/numpy/doc/source/reference/simd/./gen_features.py +.. generated via /numpy/numpy/./doc/source/reference/simd/gen_features.py On x86::Intel Compiler ~~~~~~~~~~~~~~~~~~~~~~ .. table:: :align: left - ================ ========================================================================================================================================== - Name Implies - ================ ========================================================================================================================================== - FMA3 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`AVX2` - AVX2 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`FMA3` - AVX512F SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 :enabled:`AVX512CD` - :disabled:`XOP` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` - :disabled:`FMA4` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` - ================ ========================================================================================================================================== + ====================== ================================================================================================================================================================================================================================================================================================================================== ====================== + Name Implies Gathers + ====================== ================================================================================================================================================================================================================================================================================================================================== ====================== + FMA3 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`AVX2` + AVX2 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`FMA3` + AVX512F SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 :enabled:`AVX512CD` + :disabled:`XOP` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` + :disabled:`FMA4` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` + :disabled:`AVX512_SPR` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512_SKX` :disabled:`AVX512_CLX` :disabled:`AVX512_CNL` :disabled:`AVX512_ICL` :disabled:`AVX512FP16` + ====================== ================================================================================================================================================================================================================================================================================================================================== ====================== On x86::Microsoft Visual C/C++ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. table:: :align: left - ====================== ============================================================================================================================================================================================================================================================= ============================================================================= - Name Implies Gathers - ====================== ============================================================================================================================================================================================================================================================= ============================================================================= - FMA3 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`AVX2` - AVX2 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`FMA3` - AVX512F SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 :enabled:`AVX512CD` :enabled:`AVX512_SKX` - AVX512CD SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 AVX512F :enabled:`AVX512_SKX` - :disabled:`AVX512_KNL` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512ER` :disabled:`AVX512PF` - :disabled:`AVX512_KNM` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512_KNL` :disabled:`AVX5124FMAPS` :disabled:`AVX5124VNNIW` :disabled:`AVX512VPOPCNTDQ` - ====================== ============================================================================================================================================================================================================================================================= ============================================================================= + ====================== ================================================================================================================================================================================================================================================================================================================================== ============================================================================= + Name Implies Gathers + ====================== ================================================================================================================================================================================================================================================================================================================================== ============================================================================= + FMA3 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`AVX2` + AVX2 SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C :enabled:`FMA3` + AVX512F SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 :enabled:`AVX512CD` :enabled:`AVX512_SKX` + AVX512CD SSE SSE2 SSE3 SSSE3 SSE41 POPCNT SSE42 AVX F16C FMA3 AVX2 AVX512F :enabled:`AVX512_SKX` + :disabled:`AVX512_KNL` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512ER` :disabled:`AVX512PF` + :disabled:`AVX512_KNM` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512_KNL` :disabled:`AVX5124FMAPS` :disabled:`AVX5124VNNIW` :disabled:`AVX512VPOPCNTDQ` + :disabled:`AVX512_SPR` :disabled:`SSE` :disabled:`SSE2` :disabled:`SSE3` :disabled:`SSSE3` :disabled:`SSE41` :disabled:`POPCNT` :disabled:`SSE42` :disabled:`AVX` :disabled:`F16C` :disabled:`FMA3` :disabled:`AVX2` :disabled:`AVX512F` :disabled:`AVX512CD` :disabled:`AVX512_SKX` :disabled:`AVX512_CLX` :disabled:`AVX512_CNL` :disabled:`AVX512_ICL` :disabled:`AVX512FP16` + ====================== ================================================================================================================================================================================================================================================================================================================================== ============================================================================= diff --git a/doc/source/reference/simd/generated_tables/cpu_features.inc b/doc/source/reference/simd/generated_tables/cpu_features.inc index 7782172d2..603370e21 100644 --- a/doc/source/reference/simd/generated_tables/cpu_features.inc +++ b/doc/source/reference/simd/generated_tables/cpu_features.inc @@ -1,35 +1,36 @@ -.. generated via /home/seiko/work/repos/review/numpy/doc/source/reference/simd/gen_features.py +.. generated via /numpy/numpy/./doc/source/reference/simd/gen_features.py On x86 ~~~~~~ .. table:: :align: left - ============== =========================================================================================================================================================================== ===================================================== - Name Implies Gathers - ============== =========================================================================================================================================================================== ===================================================== - ``SSE`` ``SSE2`` - ``SSE2`` ``SSE`` - ``SSE3`` ``SSE`` ``SSE2`` - ``SSSE3`` ``SSE`` ``SSE2`` ``SSE3`` - ``SSE41`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` - ``POPCNT`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` - ``SSE42`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` - ``AVX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` - ``XOP`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` - ``FMA4`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` - ``F16C`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` - ``FMA3`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` - ``AVX2`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` - ``AVX512F`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` - ``AVX512CD`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` - ``AVX512_KNL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512ER`` ``AVX512PF`` - ``AVX512_KNM`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_KNL`` ``AVX5124FMAPS`` ``AVX5124VNNIW`` ``AVX512VPOPCNTDQ`` - ``AVX512_SKX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512VL`` ``AVX512BW`` ``AVX512DQ`` - ``AVX512_CLX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512VNNI`` - ``AVX512_CNL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512IFMA`` ``AVX512VBMI`` - ``AVX512_ICL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512_CLX`` ``AVX512_CNL`` ``AVX512VBMI2`` ``AVX512BITALG`` ``AVX512VPOPCNTDQ`` - ============== =========================================================================================================================================================================== ===================================================== + ============== ========================================================================================================================================================================================== ===================================================== + Name Implies Gathers + ============== ========================================================================================================================================================================================== ===================================================== + ``SSE`` ``SSE2`` + ``SSE2`` ``SSE`` + ``SSE3`` ``SSE`` ``SSE2`` + ``SSSE3`` ``SSE`` ``SSE2`` ``SSE3`` + ``SSE41`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` + ``POPCNT`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` + ``SSE42`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` + ``AVX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` + ``XOP`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` + ``FMA4`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` + ``F16C`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` + ``FMA3`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` + ``AVX2`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` + ``AVX512F`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` + ``AVX512CD`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` + ``AVX512_KNL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512ER`` ``AVX512PF`` + ``AVX512_KNM`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_KNL`` ``AVX5124FMAPS`` ``AVX5124VNNIW`` ``AVX512VPOPCNTDQ`` + ``AVX512_SKX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512VL`` ``AVX512BW`` ``AVX512DQ`` + ``AVX512_CLX`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512VNNI`` + ``AVX512_CNL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512IFMA`` ``AVX512VBMI`` + ``AVX512_ICL`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512_CLX`` ``AVX512_CNL`` ``AVX512VBMI2`` ``AVX512BITALG`` ``AVX512VPOPCNTDQ`` + ``AVX512_SPR`` ``SSE`` ``SSE2`` ``SSE3`` ``SSSE3`` ``SSE41`` ``POPCNT`` ``SSE42`` ``AVX`` ``F16C`` ``FMA3`` ``AVX2`` ``AVX512F`` ``AVX512CD`` ``AVX512_SKX`` ``AVX512_CLX`` ``AVX512_CNL`` ``AVX512_ICL`` ``AVX512FP16`` + ============== ========================================================================================================================================================================================== ===================================================== On IBM/POWER big-endian ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/numpy/core/src/common/npy_cpu_features.c b/numpy/core/src/common/npy_cpu_features.c index 949c75ad5..92a4e432b 100644 --- a/numpy/core/src/common/npy_cpu_features.c +++ b/numpy/core/src/common/npy_cpu_features.c @@ -81,12 +81,14 @@ static struct { {NPY_CPU_FEATURE_AVX512VBMI, "AVX512VBMI"}, {NPY_CPU_FEATURE_AVX512VBMI2, "AVX512VBMI2"}, {NPY_CPU_FEATURE_AVX512BITALG, "AVX512BITALG"}, + {NPY_CPU_FEATURE_AVX512FP16 , "AVX512FP16"}, {NPY_CPU_FEATURE_AVX512_KNL, "AVX512_KNL"}, {NPY_CPU_FEATURE_AVX512_KNM, "AVX512_KNM"}, {NPY_CPU_FEATURE_AVX512_SKX, "AVX512_SKX"}, {NPY_CPU_FEATURE_AVX512_CLX, "AVX512_CLX"}, {NPY_CPU_FEATURE_AVX512_CNL, "AVX512_CNL"}, {NPY_CPU_FEATURE_AVX512_ICL, "AVX512_ICL"}, + {NPY_CPU_FEATURE_AVX512_SPR, "AVX512_SPR"}, {NPY_CPU_FEATURE_VSX, "VSX"}, {NPY_CPU_FEATURE_VSX2, "VSX2"}, {NPY_CPU_FEATURE_VSX3, "VSX3"}, @@ -506,6 +508,11 @@ npy__cpu_init_features(void) npy__cpu_have[NPY_CPU_FEATURE_AVX512VBMI2] && npy__cpu_have[NPY_CPU_FEATURE_AVX512BITALG] && npy__cpu_have[NPY_CPU_FEATURE_AVX512VPOPCNTDQ]; + // Sapphire Rapids + npy__cpu_have[NPY_CPU_FEATURE_AVX512FP16] = (reg[3] & (1 << 23)) != 0; + npy__cpu_have[NPY_CPU_FEATURE_AVX512_SPR] = npy__cpu_have[NPY_CPU_FEATURE_AVX512_ICL] && + npy__cpu_have[NPY_CPU_FEATURE_AVX512FP16]; + } } diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h index 96c543e70..b49aea247 100644 --- a/numpy/core/src/common/npy_cpu_features.h +++ b/numpy/core/src/common/npy_cpu_features.h @@ -43,6 +43,7 @@ enum npy_cpu_features NPY_CPU_FEATURE_AVX512VNNI = 42, NPY_CPU_FEATURE_AVX512VBMI2 = 43, NPY_CPU_FEATURE_AVX512BITALG = 44, + NPY_CPU_FEATURE_AVX512FP16 = 45, // X86 CPU Groups // Knights Landing (F,CD,ER,PF) @@ -57,6 +58,8 @@ enum npy_cpu_features NPY_CPU_FEATURE_AVX512_CNL = 105, // Ice Lake (F,CD,BW,DQ,VL,IFMA,VBMI,VNNI,VBMI2,BITALG,VPOPCNTDQ) NPY_CPU_FEATURE_AVX512_ICL = 106, + // Sapphire Rapids (Ice Lake, AVX512FP16) + NPY_CPU_FEATURE_AVX512_SPR = 107, // IBM/POWER VSX // POWER7 diff --git a/numpy/core/tests/test_cpu_features.py b/numpy/core/tests/test_cpu_features.py index 1a76897e2..8c1c25ed4 100644 --- a/numpy/core/tests/test_cpu_features.py +++ b/numpy/core/tests/test_cpu_features.py @@ -116,7 +116,7 @@ class Test_X86_Features(AbstractTest): "AVX", "F16C", "XOP", "FMA4", "FMA3", "AVX2", "AVX512F", "AVX512CD", "AVX512ER", "AVX512PF", "AVX5124FMAPS", "AVX5124VNNIW", "AVX512VPOPCNTDQ", "AVX512VL", "AVX512BW", "AVX512DQ", "AVX512VNNI", "AVX512IFMA", - "AVX512VBMI", "AVX512VBMI2", "AVX512BITALG", + "AVX512VBMI", "AVX512VBMI2", "AVX512BITALG", "AVX512FP16", ] features_groups = dict( AVX512_KNL = ["AVX512F", "AVX512CD", "AVX512ER", "AVX512PF"], @@ -128,6 +128,10 @@ class Test_X86_Features(AbstractTest): "AVX512VBMI"], AVX512_ICL = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", "AVX512VL", "AVX512IFMA", "AVX512VBMI", "AVX512VNNI", "AVX512VBMI2", "AVX512BITALG", "AVX512VPOPCNTDQ"], + AVX512_SPR = ["AVX512F", "AVX512CD", "AVX512BW", "AVX512DQ", + "AVX512VL", "AVX512IFMA", "AVX512VBMI", "AVX512VNNI", + "AVX512VBMI2", "AVX512BITALG", "AVX512VPOPCNTDQ", + "AVX512FP16"], ) features_map = dict( SSE3="PNI", SSE41="SSE4_1", SSE42="SSE4_2", FMA3="FMA", diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py index 4bb0dd008..781404446 100644 --- a/numpy/distutils/ccompiler_opt.py +++ b/numpy/distutils/ccompiler_opt.py @@ -295,6 +295,10 @@ class _Config: group="AVX512VBMI2 AVX512BITALG AVX512VPOPCNTDQ", detect="AVX512_ICL", implies_detect=False ), + AVX512_SPR = dict( + interest=46, implies="AVX512_ICL", group="AVX512FP16", + detect="AVX512_SPR", implies_detect=False + ), # IBM/Power ## Power7/ISA 2.06 VSX = dict(interest=1, headers="altivec.h", extra_checks="VSX_ASM"), @@ -365,7 +369,8 @@ class _Config: AVX512_CNL = dict(flags="-mavx512ifma -mavx512vbmi"), AVX512_ICL = dict( flags="-mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq" - ) + ), + AVX512_SPR = dict(flags="-mavx512fp16"), ) if on_x86 and self.cc_is_icc: return dict( SSE = dict(flags="-msse"), @@ -397,6 +402,7 @@ class _Config: AVX512_CLX = dict(flags="-xCASCADELAKE"), AVX512_CNL = dict(flags="-xCANNONLAKE"), AVX512_ICL = dict(flags="-xICELAKE-CLIENT"), + AVX512_SPR = dict(disable="Not supported yet") ) if on_x86 and self.cc_is_iccw: return dict( SSE = dict(flags="/arch:SSE"), @@ -429,7 +435,8 @@ class _Config: AVX512_SKX = dict(flags="/Qx:SKYLAKE-AVX512"), AVX512_CLX = dict(flags="/Qx:CASCADELAKE"), AVX512_CNL = dict(flags="/Qx:CANNONLAKE"), - AVX512_ICL = dict(flags="/Qx:ICELAKE-CLIENT") + AVX512_ICL = dict(flags="/Qx:ICELAKE-CLIENT"), + AVX512_SPR = dict(disable="Not supported yet") ) if on_x86 and self.cc_is_msvc: return dict( SSE = dict(flags="/arch:SSE") if self.cc_on_x86 else {}, @@ -467,7 +474,10 @@ class _Config: AVX512_SKX = dict(flags="/arch:AVX512"), AVX512_CLX = {}, AVX512_CNL = {}, - AVX512_ICL = {} + AVX512_ICL = {}, + AVX512_SPR= dict( + disable="MSVC compiler doesn't support it" + ) ) on_power = self.cc_on_ppc64le or self.cc_on_ppc64 diff --git a/numpy/distutils/checks/cpu_avx512_spr.c b/numpy/distutils/checks/cpu_avx512_spr.c new file mode 100644 index 000000000..3c9575a57 --- /dev/null +++ b/numpy/distutils/checks/cpu_avx512_spr.c @@ -0,0 +1,22 @@ +#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER) + /* + * Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics, + * whether or not the build options for those features are specified. + * Therefore, we must test #definitions of CPU features when option native/host + * is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise + * the test will be broken and leads to enable all possible features. + */ + #if !defined(__AVX512FP16__) + #error "HOST/ARCH doesn't support Sapphire Rapids AVX512FP16 features" + #endif +#endif + +#include <immintrin.h> + +int main(int argc, char **argv) +{ + __m512h a = _mm512_loadu_ph((void*)argv[argc-1]); + __m512h temp = _mm512_fmadd_ph(a, a, a); + _mm512_storeu_ph((void*)(argv[argc-1]), temp); + return 0; +} |