diff options
author | Matti Picus <matti.picus@gmail.com> | 2022-10-20 08:59:42 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-20 08:59:42 +0300 |
commit | 7dd4684ac9fe0917690f12397a74106f72e66eee (patch) | |
tree | cb5e20a858b745013066b5ddfcf703a5f4365e85 | |
parent | 7776bc6166915322bc61c4be988fc99727bc2f9a (diff) | |
parent | bdc12d124e5cfa02d7f0bd544d39117da051d87c (diff) | |
download | numpy-7dd4684ac9fe0917690f12397a74106f72e66eee.tar.gz |
Merge pull request #22168 from Developer-Ecosystem-Engineering/remove_Avx_when_not_used
ENH: Remove AVX related functions from non x86 based builds
-rw-r--r-- | numpy/core/setup.py | 36 | ||||
-rw-r--r-- | numpy/core/setup_common.py | 53 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 12 |
3 files changed, 60 insertions, 41 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 17dc8438e..10b8c093e 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -177,6 +177,16 @@ def check_math_capabilities(config, ext, moredefs, mathlibs): else: return 1 + # GH-14787: Work around GCC<8.4 bug when compiling with AVX512 + # support on Windows-based platforms + def check_gh14787(fn): + if fn == 'attribute_target_avx512f': + if (sys.platform in ('win32', 'cygwin') and + config.check_compiler_gcc() and + not config.check_gcc_version_at_least(8, 4)): + ext.extra_compile_args.extend( + ['-ffixed-xmm%s' % n for n in range(16, 32)]) + #use_msvc = config.check_decl("_MSC_VER") if not check_funcs_once(MANDATORY_FUNCS, add_to_moredefs=False): raise SystemError("One of the required function to build numpy is not" @@ -227,19 +237,19 @@ def check_math_capabilities(config, ext, moredefs, mathlibs): for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES: if config.check_gcc_function_attribute(dec, fn): moredefs.append((fname2def(fn), 1)) - if fn == 'attribute_target_avx512f': - # GH-14787: Work around GCC<8.4 bug when compiling with AVX512 - # support on Windows-based platforms - if (sys.platform in ('win32', 'cygwin') and - config.check_compiler_gcc() and - not config.check_gcc_version_at_least(8, 4)): - ext.extra_compile_args.extend( - ['-ffixed-xmm%s' % n for n in range(16, 32)]) - - for dec, fn, code, header in OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS: - if config.check_gcc_function_attribute_with_intrinsics(dec, fn, code, - header): - moredefs.append((fname2def(fn), 1)) + check_gh14787(fn) + + platform = sysconfig.get_platform() + if ("x86_64" in platform): + for dec, fn in OPTIONAL_FUNCTION_ATTRIBUTES_AVX: + if config.check_gcc_function_attribute(dec, fn): + moredefs.append((fname2def(fn), 1)) + check_gh14787(fn) + for dec, fn, code, header in ( + OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX): + if config.check_gcc_function_attribute_with_intrinsics( + dec, fn, code, header): + moredefs.append((fname2def(fn), 1)) for fn in OPTIONAL_VARIABLE_ATTRIBUTES: if config.check_gcc_variable_attribute(fn): diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index a8497fe75..55daa8648 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -209,16 +209,18 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', 'attribute_optimize_opt_2'), ('__attribute__((nonnull (1)))', 'attribute_nonnull'), - ('__attribute__((target ("avx")))', - 'attribute_target_avx'), - ('__attribute__((target ("avx2")))', - 'attribute_target_avx2'), - ('__attribute__((target ("avx512f")))', - 'attribute_target_avx512f'), - ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', - 'attribute_target_avx512_skx'), ] +OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))', + 'attribute_target_avx'), + ('__attribute__((target ("avx2")))', + 'attribute_target_avx2'), + ('__attribute__((target ("avx512f")))', + 'attribute_target_avx512f'), + ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', + 'attribute_target_avx512_skx'), + ] + # function attributes with intrinsics # To ensure your compiler can compile avx intrinsics with just the attributes # gcc 4.8.4 support attributes but not with intrisics @@ -227,23 +229,24 @@ OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))', # The _mm512_castps_si512 instruction is specific check for AVX-512F support # in gcc-4.9 which is missing a subset of intrinsics. See # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878 -OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS = [('__attribute__((target("avx2,fma")))', - 'attribute_target_avx2_with_intrinsics', - '__m256 temp = _mm256_set1_ps(1.0); temp = \ - _mm256_fmadd_ps(temp, temp, temp)', - 'immintrin.h'), - ('__attribute__((target("avx512f")))', - 'attribute_target_avx512f_with_intrinsics', - '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))', - 'immintrin.h'), - ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', - 'attribute_target_avx512_skx_with_intrinsics', - '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\ - __m512i unused_temp = \ - _mm512_castps_si512(_mm512_set1_ps(1.0));\ - _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))', - 'immintrin.h'), - ] +OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [ + ('__attribute__((target("avx2,fma")))', + 'attribute_target_avx2_with_intrinsics', + '__m256 temp = _mm256_set1_ps(1.0); temp = \ + _mm256_fmadd_ps(temp, temp, temp)', + 'immintrin.h'), + ('__attribute__((target("avx512f")))', + 'attribute_target_avx512f_with_intrinsics', + '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))', + 'immintrin.h'), + ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))', + 'attribute_target_avx512_skx_with_intrinsics', + '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\ + __m512i unused_temp = \ + _mm512_castps_si512(_mm512_set1_ps(1.0));\ + _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))', + 'immintrin.h'), + ] def fname2def(name): return "HAVE_%s" % name.upper() diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index e5104db81..fe5aa9374 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -571,7 +571,6 @@ NPY_NO_EXPORT void /**begin repeat1 * #isa = , _avx2# - * #ISA = , AVX2# * #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX2)# * #ATTR = , NPY_GCC_TARGET_AVX2# */ @@ -658,6 +657,7 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void #define INT_left_shift_needs_clear_floatstatus #define UINT_left_shift_needs_clear_floatstatus +#if @CHK@ NPY_NO_EXPORT NPY_GCC_OPT_3 void @TYPE@_left_shift@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) @@ -670,10 +670,12 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void npy_clear_floatstatus_barrier((char*)dimensions); #endif } +#endif #undef INT_left_shift_needs_clear_floatstatus #undef UINT_left_shift_needs_clear_floatstatus +#if @CHK@ NPY_NO_EXPORT #ifndef NPY_DO_NOT_OPTIMIZE_@TYPE@_right_shift NPY_GCC_OPT_3 @@ -684,7 +686,7 @@ void { BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2)); } - +#endif /**begin repeat2 * #kind = logical_and, logical_or# @@ -1448,7 +1450,10 @@ NPY_NO_EXPORT void /**begin repeat2 * #ISA = , _avx512_skx# * #isa = simd, avx512_skx# + * #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX512_SKX)# **/ + +#if @CHK@ NPY_NO_EXPORT void @TYPE@_@kind@@ISA@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) { @@ -1460,6 +1465,7 @@ NPY_NO_EXPORT void } npy_clear_floatstatus_barrier((char*)dimensions); } +#endif /**end repeat2**/ /**end repeat1**/ @@ -2289,7 +2295,7 @@ NPY_NO_EXPORT void } } -#if @SIMD@ +#if @SIMD@ && defined(HAVE_ATTRIBUTE_TARGET_AVX512F) /**begin repeat1 * arithmetic * #kind = conjugate, square, absolute# |