Merge pull request #22168 from Developer-Ecosystem-Engineering/remove_Avx_when_not_used

ENH: Remove AVX related functions from non x86 based builds
author: Matti Picus <matti.picus@gmail.com> 2022-10-20 08:59:42 +0300
committer: GitHub <noreply@github.com> 2022-10-20 08:59:42 +0300
commit: 7dd4684ac9fe0917690f12397a74106f72e66eee (patch)
tree: cb5e20a858b745013066b5ddfcf703a5f4365e85
parent: 7776bc6166915322bc61c4be988fc99727bc2f9a (diff)
parent: bdc12d124e5cfa02d7f0bd544d39117da051d87c (diff)
download: numpy-7dd4684ac9fe0917690f12397a74106f72e66eee.tar.gz
3 files changed, 60 insertions, 41 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 17dc8438e..10b8c093e 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -177,6 +177,16 @@ def check_math_capabilities(config, ext, moredefs, mathlibs):
         else:
             return 1
 
+    # GH-14787: Work around GCC<8.4 bug when compiling with AVX512
+    # support on Windows-based platforms
+    def check_gh14787(fn):
+        if fn == 'attribute_target_avx512f':
+            if (sys.platform in ('win32', 'cygwin') and
+                    config.check_compiler_gcc() and
+                    not config.check_gcc_version_at_least(8, 4)):
+                ext.extra_compile_args.extend(
+                        ['-ffixed-xmm%s' % n for n in range(16, 32)])
+
     #use_msvc = config.check_decl("_MSC_VER")
     if not check_funcs_once(MANDATORY_FUNCS, add_to_moredefs=False):
         raise SystemError("One of the required function to build numpy is not"
@@ -227,19 +237,19 @@ def check_math_capabilities(config, ext, moredefs, mathlibs):
     for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES:
         if config.check_gcc_function_attribute(dec, fn):
             moredefs.append((fname2def(fn), 1))
-            if fn == 'attribute_target_avx512f':
-                # GH-14787: Work around GCC<8.4 bug when compiling with AVX512
-                # support on Windows-based platforms
-                if (sys.platform in ('win32', 'cygwin') and
-                        config.check_compiler_gcc() and
-                        not config.check_gcc_version_at_least(8, 4)):
-                    ext.extra_compile_args.extend(
-                            ['-ffixed-xmm%s' % n for n in range(16, 32)])
-
-    for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS:
-        if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code,
-                                                               header):
-            moredefs.append((fname2def(fn), 1))
+            check_gh14787(fn)
+
+    platform = sysconfig.get_platform()
+    if ("x86_64" in platform):
+        for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES_AVX:
+            if config.check_gcc_function_attribute(dec, fn):
+                moredefs.append((fname2def(fn), 1))
+                check_gh14787(fn)
+        for dec, fn, code, header in (
+        OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX):
+            if config.check_gcc_function_attribute_with_intrinsics(
+                    dec, fn, code, header):
+                moredefs.append((fname2def(fn), 1))
 
     for fn in OPTIONAL_VARIABLE_ATTRIBUTES:
         if config.check_gcc_variable_attribute(fn):
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index a8497fe75..55daa8648 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -209,16 +209,18 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
                                  'attribute_optimize_opt_2'),
                                 ('__attribute__((nonnull (1)))',
                                  'attribute_nonnull'),
-                                ('__attribute__((target ("avx")))',
-                                 'attribute_target_avx'),
-                                ('__attribute__((target ("avx2")))',
-                                 'attribute_target_avx2'),
-                                ('__attribute__((target ("avx512f")))',
-                                 'attribute_target_avx512f'),
-                                ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
-                                 'attribute_target_avx512_skx'),
                                 ]
 
+OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))',
+    'attribute_target_avx'),
+    ('__attribute__((target ("avx2")))',
+    'attribute_target_avx2'),
+    ('__attribute__((target ("avx512f")))',
+    'attribute_target_avx512f'),
+    ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
+    'attribute_target_avx512_skx'),
+    ]
+
 # function attributes with intrinsics
 # To ensure your compiler can compile avx intrinsics with just the attributes
 # gcc 4.8.4 support attributes but not with intrisics
@@ -227,23 +229,24 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
 # The _mm512_castps_si512 instruction is specific check for AVX-512F support
 # in gcc-4.9 which is missing a subset of intrinsics. See
 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878
-OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))',
-                                'attribute_target_avx2_with_intrinsics',
-                                '__m256 temp = _mm256_set1_ps(1.0); temp = \
-                                _mm256_fmadd_ps(temp, temp, temp)',
-                                'immintrin.h'),
-                                ('__attribute__((target("avx512f")))',
-                                'attribute_target_avx512f_with_intrinsics',
-                                '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))',
-                                'immintrin.h'),
-                                ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
-                                'attribute_target_avx512_skx_with_intrinsics',
-                                '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\
-                                __m512i unused_temp = \
-                                    _mm512_castps_si512(_mm512_set1_ps(1.0));\
-                                _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))',
-                                'immintrin.h'),
-                                ]
+OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [
+    ('__attribute__((target("avx2,fma")))',
+    'attribute_target_avx2_with_intrinsics',
+    '__m256 temp = _mm256_set1_ps(1.0); temp = \
+    _mm256_fmadd_ps(temp, temp, temp)',
+    'immintrin.h'),
+    ('__attribute__((target("avx512f")))',
+    'attribute_target_avx512f_with_intrinsics',
+    '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))',
+    'immintrin.h'),
+    ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
+    'attribute_target_avx512_skx_with_intrinsics',
+    '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\
+    __m512i unused_temp = \
+        _mm512_castps_si512(_mm512_set1_ps(1.0));\
+    _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))',
+    'immintrin.h'),
+    ]
 
 def fname2def(name):
     return "HAVE_%s" % name.upper()
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index e5104db81..fe5aa9374 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -571,7 +571,6 @@ NPY_NO_EXPORT void
 
 /**begin repeat1
  * #isa = , _avx2#
- * #ISA = , AVX2#
  * #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX2)#
  * #ATTR = , NPY_GCC_TARGET_AVX2#
  */
@@ -658,6 +657,7 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
 #define INT_left_shift_needs_clear_floatstatus
 #define UINT_left_shift_needs_clear_floatstatus
 
+#if @CHK@
 NPY_NO_EXPORT NPY_GCC_OPT_3 void
 @TYPE@_left_shift@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps,
                   void *NPY_UNUSED(func))
@@ -670,10 +670,12 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void
     npy_clear_floatstatus_barrier((char*)dimensions);
 #endif
 }
+#endif
 
 #undef INT_left_shift_needs_clear_floatstatus
 #undef UINT_left_shift_needs_clear_floatstatus
 
+#if @CHK@
 NPY_NO_EXPORT
 #ifndef NPY_DO_NOT_OPTIMIZE_@TYPE@_right_shift
 NPY_GCC_OPT_3
@@ -684,7 +686,7 @@ void
 {
     BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2));
 }
-
+#endif
 
 /**begin repeat2
  * #kind = logical_and, logical_or#
@@ -1448,7 +1450,10 @@ NPY_NO_EXPORT void
 /**begin repeat2
  * #ISA  = , _avx512_skx#
  * #isa  = simd, avx512_skx#
+ * #CHK  = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX512_SKX)#
  **/
+
+#if @CHK@
 NPY_NO_EXPORT void
 @TYPE@_@kind@@ISA@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
 {
@@ -1460,6 +1465,7 @@ NPY_NO_EXPORT void
     }
     npy_clear_floatstatus_barrier((char*)dimensions);
 }
+#endif
 /**end repeat2**/
 /**end repeat1**/
 
@@ -2289,7 +2295,7 @@ NPY_NO_EXPORT void
     }
 }
 
-#if @SIMD@
+#if @SIMD@ && defined(HAVE_ATTRIBUTE_TARGET_AVX512F)
 /**begin repeat1
  * arithmetic
  * #kind = conjugate, square, absolute#
author	Matti Picus <matti.picus@gmail.com>	2022-10-20 08:59:42 +0300
committer	GitHub <noreply@github.com>	2022-10-20 08:59:42 +0300
commit	7dd4684ac9fe0917690f12397a74106f72e66eee (patch)
tree	cb5e20a858b745013066b5ddfcf703a5f4365e85
parent	7776bc6166915322bc61c4be988fc99727bc2f9a (diff)
parent	bdc12d124e5cfa02d7f0bd544d39117da051d87c (diff)
download	numpy-7dd4684ac9fe0917690f12397a74106f72e66eee.tar.gz