summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2022-10-20 08:59:42 +0300
committerGitHub <noreply@github.com>2022-10-20 08:59:42 +0300
commit7dd4684ac9fe0917690f12397a74106f72e66eee (patch)
treecb5e20a858b745013066b5ddfcf703a5f4365e85
parent7776bc6166915322bc61c4be988fc99727bc2f9a (diff)
parentbdc12d124e5cfa02d7f0bd544d39117da051d87c (diff)
downloadnumpy-7dd4684ac9fe0917690f12397a74106f72e66eee.tar.gz
Merge pull request #22168 from Developer-Ecosystem-Engineering/remove_Avx_when_not_used
ENH: Remove AVX related functions from non x86 based builds
-rw-r--r--numpy/core/setup.py36
-rw-r--r--numpy/core/setup_common.py53
-rw-r--r--numpy/core/src/umath/loops.c.src12
3 files changed, 60 insertions, 41 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 17dc8438e..10b8c093e 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -177,6 +177,16 @@ def check_math_capabilities(config, ext, moredefs, mathlibs):
else:
return 1
+ # GH-14787: Work around GCC<8.4 bug when compiling with AVX512
+ # support on Windows-based platforms
+ def check_gh14787(fn):
+ if fn == 'attribute_target_avx512f':
+ if (sys.platform in ('win32', 'cygwin') and
+ config.check_compiler_gcc() and
+ not config.check_gcc_version_at_least(8, 4)):
+ ext.extra_compile_args.extend(
+ ['-ffixed-xmm%s' % n for n in range(16, 32)])
+
#use_msvc = config.check_decl("_MSC_VER")
if not check_funcs_once(MANDATORY_FUNCS, add_to_moredefs=False):
raise SystemError("One of the required function to build numpy is not"
@@ -227,19 +237,19 @@ def check_math_capabilities(config, ext, moredefs, mathlibs):
for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES:
if config.check_gcc_function_attribute(dec, fn):
moredefs.append((fname2def(fn), 1))
- if fn == 'attribute_target_avx512f':
- # GH-14787: Work around GCC<8.4 bug when compiling with AVX512
- # support on Windows-based platforms
- if (sys.platform in ('win32', 'cygwin') and
- config.check_compiler_gcc() and
- not config.check_gcc_version_at_least(8, 4)):
- ext.extra_compile_args.extend(
- ['-ffixed-xmm%s' % n for n in range(16, 32)])
-
- for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS:
- if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code,
- header):
- moredefs.append((fname2def(fn), 1))
+ check_gh14787(fn)
+
+ platform = sysconfig.get_platform()
+ if ("x86_64" in platform):
+ for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES_AVX:
+ if config.check_gcc_function_attribute(dec, fn):
+ moredefs.append((fname2def(fn), 1))
+ check_gh14787(fn)
+ for dec, fn, code, header in (
+ OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX):
+ if config.check_gcc_function_attribute_with_intrinsics(
+ dec, fn, code, header):
+ moredefs.append((fname2def(fn), 1))
for fn in OPTIONAL_VARIABLE_ATTRIBUTES:
if config.check_gcc_variable_attribute(fn):
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index a8497fe75..55daa8648 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -209,16 +209,18 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
'attribute_optimize_opt_2'),
('__attribute__((nonnull (1)))',
'attribute_nonnull'),
- ('__attribute__((target ("avx")))',
- 'attribute_target_avx'),
- ('__attribute__((target ("avx2")))',
- 'attribute_target_avx2'),
- ('__attribute__((target ("avx512f")))',
- 'attribute_target_avx512f'),
- ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
- 'attribute_target_avx512_skx'),
]
+OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))',
+ 'attribute_target_avx'),
+ ('__attribute__((target ("avx2")))',
+ 'attribute_target_avx2'),
+ ('__attribute__((target ("avx512f")))',
+ 'attribute_target_avx512f'),
+ ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
+ 'attribute_target_avx512_skx'),
+ ]
+
# function attributes with intrinsics
# To ensure your compiler can compile avx intrinsics with just the attributes
# gcc 4.8.4 support attributes but not with intrisics
@@ -227,23 +229,24 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
# The _mm512_castps_si512 instruction is specific check for AVX-512F support
# in gcc-4.9 which is missing a subset of intrinsics. See
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878
-OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))',
- 'attribute_target_avx2_with_intrinsics',
- '__m256 temp = _mm256_set1_ps(1.0); temp = \
- _mm256_fmadd_ps(temp, temp, temp)',
- 'immintrin.h'),
- ('__attribute__((target("avx512f")))',
- 'attribute_target_avx512f_with_intrinsics',
- '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))',
- 'immintrin.h'),
- ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
- 'attribute_target_avx512_skx_with_intrinsics',
- '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\
- __m512i unused_temp = \
- _mm512_castps_si512(_mm512_set1_ps(1.0));\
- _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))',
- 'immintrin.h'),
- ]
+OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [
+ ('__attribute__((target("avx2,fma")))',
+ 'attribute_target_avx2_with_intrinsics',
+ '__m256 temp = _mm256_set1_ps(1.0); temp = \
+ _mm256_fmadd_ps(temp, temp, temp)',
+ 'immintrin.h'),
+ ('__attribute__((target("avx512f")))',
+ 'attribute_target_avx512f_with_intrinsics',
+ '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))',
+ 'immintrin.h'),
+ ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
+ 'attribute_target_avx512_skx_with_intrinsics',
+ '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\
+ __m512i unused_temp = \
+ _mm512_castps_si512(_mm512_set1_ps(1.0));\
+ _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))',
+ 'immintrin.h'),
+ ]
def fname2def(name):
return "HAVE_%s" % name.upper()
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index e5104db81..fe5aa9374 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -571,7 +571,6 @@ NPY_NO_EXPORT void
/**begin repeat1
* #isa = , _avx2#
- * #ISA = , AVX2#
* #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX2)#
* #ATTR = , NPY_GCC_TARGET_AVX2#
*/
@@ -658,6 +657,7 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
#define INT_left_shift_needs_clear_floatstatus
#define UINT_left_shift_needs_clear_floatstatus
+#if @CHK@
NPY_NO_EXPORT NPY_GCC_OPT_3 void
@TYPE@_left_shift@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps,
void *NPY_UNUSED(func))
@@ -670,10 +670,12 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void
npy_clear_floatstatus_barrier((char*)dimensions);
#endif
}
+#endif
#undef INT_left_shift_needs_clear_floatstatus
#undef UINT_left_shift_needs_clear_floatstatus
+#if @CHK@
NPY_NO_EXPORT
#ifndef NPY_DO_NOT_OPTIMIZE_@TYPE@_right_shift
NPY_GCC_OPT_3
@@ -684,7 +686,7 @@ void
{
BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2));
}
-
+#endif
/**begin repeat2
* #kind = logical_and, logical_or#
@@ -1448,7 +1450,10 @@ NPY_NO_EXPORT void
/**begin repeat2
* #ISA = , _avx512_skx#
* #isa = simd, avx512_skx#
+ * #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX512_SKX)#
**/
+
+#if @CHK@
NPY_NO_EXPORT void
@TYPE@_@kind@@ISA@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
{
@@ -1460,6 +1465,7 @@ NPY_NO_EXPORT void
}
npy_clear_floatstatus_barrier((char*)dimensions);
}
+#endif
/**end repeat2**/
/**end repeat1**/
@@ -2289,7 +2295,7 @@ NPY_NO_EXPORT void
}
}
-#if @SIMD@
+#if @SIMD@ && defined(HAVE_ATTRIBUTE_TARGET_AVX512F)
/**begin repeat1
* arithmetic
* #kind = conjugate, square, absolute#