summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartijn van Beurden <mvanb1@gmail.com>2022-10-20 07:39:18 +0200
committerGitHub <noreply@github.com>2022-10-20 07:39:18 +0200
commite7b584eaf979be9d07dcdcc30c54f5fcac95ebac (patch)
treeb7759c37ef0329c63f9a60be2fdb1bc1ba32f99f
parent396313c9d0bac849f378f2d7072e2ea55ddc8e9f (diff)
downloadflac-e7b584eaf979be9d07dcdcc30c54f5fcac95ebac.tar.gz
Remove default msse2 on x86, tidy up asm optimizations
Remove default addition of msse2 on x86. After profiling it was found this helps little on modern systems. See https://github.com/xiph/flac/issues/486 The mention of Asm optimizations at the end of configuration was wrong in many ways: it was 'yes' on platforms for which there are no optimizations, and wasn't set to 'no' in case intrinsics headers aren't available.
-rw-r--r--configure.ac51
-rw-r--r--src/libFLAC/CMakeLists.txt12
2 files changed, 16 insertions, 47 deletions
diff --git a/configure.ac b/configure.ac
index 6fa2c752..913e5f16 100644
--- a/configure.ac
+++ b/configure.ac
@@ -74,8 +74,7 @@ AC_DEFINE_UNQUOTED(CPU_IS_LITTLE_ENDIAN, ${ac_cv_c_little_endian},
AC_DEFINE_UNQUOTED(WORDS_BIGENDIAN, ${ac_cv_c_big_endian},
[Target processor is big endian.])
-AC_ARG_ENABLE(asm-optimizations, AS_HELP_STRING([--disable-asm-optimizations],[Don't use any assembly optimization routines]), asm_opt=no, asm_opt=yes)
-dnl ' Terminate the damn single quote
+AC_ARG_ENABLE(asm-optimizations, AS_HELP_STRING([--disable-asm-optimizations],[Do not use any CPU specific optimization routines]), asm_opt=no, asm_opt=yes)
AM_CONDITIONAL(FLaC__NO_ASM, test "x$asm_opt" = xno)
if test "x$asm_opt" = xno ; then
AC_DEFINE(FLAC__NO_ASM)
@@ -100,7 +99,6 @@ case "$host_cpu" in
cpu_x86_64=true
AC_DEFINE(FLAC__CPU_X86_64)
AH_TEMPLATE(FLAC__CPU_X86_64, [define if building for x86_64])
- asm_optimisation=$asm_opt
;;
*)
if test $ac_cv_sizeof_voidp = 4 ; then
@@ -115,7 +113,6 @@ case "$host_cpu" in
AC_DEFINE(FLAC__CPU_X86_64)
AH_TEMPLATE(FLAC__CPU_X86_64, [define if building for x86_64])
fi
- asm_optimisation=$asm_opt
;;
esac
;;
@@ -123,7 +120,6 @@ case "$host_cpu" in
cpu_ia32=true
AC_DEFINE(FLAC__CPU_IA32)
AH_TEMPLATE(FLAC__CPU_IA32, [define if building for ia32/i386])
- asm_optimisation=$asm_opt
;;
powerpc64|powerpc64le)
cpu_ppc64=true
@@ -132,25 +128,21 @@ case "$host_cpu" in
AH_TEMPLATE(FLAC__CPU_PPC, [define if building for PowerPC])
AC_DEFINE(FLAC__CPU_PPC64)
AH_TEMPLATE(FLAC__CPU_PPC64, [define if building for PowerPC64])
- asm_optimisation=$asm_opt
;;
powerpc|powerpcle)
cpu_ppc=true
AC_DEFINE(FLAC__CPU_PPC)
AH_TEMPLATE(FLAC__CPU_PPC, [define if building for PowerPC])
- asm_optimisation=$asm_opt
;;
arm64|aarch64)
cpu_arm64=true
AC_DEFINE(FLAC__CPU_ARM64)
AH_TEMPLATE(FLAC__CPU_ARM64, [define if building for ARM])
- asm_optimisation=$asm_opt
;;
sparc)
cpu_sparc=true
AC_DEFINE(FLAC__CPU_SPARC)
AH_TEMPLATE(FLAC__CPU_SPARC, [define if building for SPARC])
- asm_optimisation=$asm_opt
;;
esac
AM_CONDITIONAL(FLAC__CPU_X86_64, test "x$cpu_x86_64" = xtrue)
@@ -160,16 +152,15 @@ AM_CONDITIONAL(FLaC__CPU_PPC64, test "x$cpu_ppc64" = xtrue)
AM_CONDITIONAL(FLAC__CPU_ARM64, test "x$cpu_arm64" = xtrue)
AM_CONDITIONAL(FLaC__CPU_SPARC, test "x$cpu_sparc" = xtrue)
-if test "x$ac_cv_header_x86intrin_h" = xyes; then
-AC_DEFINE([FLAC__HAS_X86INTRIN], 1, [Set to 1 if <x86intrin.h> is available.])
+if test "x$ac_cv_header_x86intrin_h" = xyes -a "x$asm_opt" = xyes; then
+ AC_DEFINE([FLAC__HAS_X86INTRIN], 1, [Set to 1 if <x86intrin.h> is available.])
+ asm_optimisation=yes
else
-AC_DEFINE([FLAC__HAS_X86INTRIN], 0)
+ AC_DEFINE([FLAC__HAS_X86INTRIN], 0)
fi
-neon=no
-if test "x$ac_cv_header_arm_neon_h" = xyes; then
-AC_DEFINE([FLAC__HAS_NEONINTRIN], 1, [Set to 1 if <arm_neon.h> is available.])
-neon=yes
+if test "x$ac_cv_header_arm_neon_h" = xyes -a "x$asm_opt" = xyes; then
+ AC_DEFINE([FLAC__HAS_NEONINTRIN], 1, [Set to 1 if <arm_neon.h> is available.])
AC_MSG_CHECKING([whether arm_neon.h has A64 functions])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <arm_neon.h>]],
@@ -178,15 +169,16 @@ neon=yes
has_a64neon=yes],
[AC_MSG_RESULT([no])])
if test "x$has_a64neon" = xyes; then
- AC_DEFINE([FLAC__HAS_A64NEONINTRIN], 1, [Set to 1 if <arm_neon.h> has A64 instructions.])
+ AC_DEFINE([FLAC__HAS_A64NEONINTRIN], 1, [Set to 1 if <arm_neon.h> has A64 instructions.])
+ asm_optimisation=yes
else
- AC_DEFINE([FLAC__HAS_A64NEONINTRIN], 0)
+ AC_DEFINE([FLAC__HAS_A64NEONINTRIN], 0)
fi
else
-AC_DEFINE([FLAC__HAS_NEONINTRIN], 0)
+ AC_DEFINE([FLAC__HAS_NEONINTRIN], 0)
fi
-if test x"$cpu_ppc64" = xtrue ; then
+if test x"$cpu_ppc64" = xtrue -a "x$asm_opt" = xyes ; then
AC_C_ATTRIBUTE([target("cpu=power8")],
[have_cpu_power8=yes],
@@ -257,16 +249,8 @@ fi
AM_CONDITIONAL([DEBUG], [test "x${ax_enable_debug}" = "xyes" || test "x${ax_enable_debug}" = "xinfo"])
-AC_ARG_ENABLE(sse,
-AS_HELP_STRING([--disable-sse],[Disable passing of -msse2 to the compiler]),
-[case "${enableval}" in
- yes) sse_os=yes ;;
- no) sse_os=no ;;
- *) AC_MSG_ERROR(bad value ${enableval} for --enable-sse) ;;
-esac],[sse_os=yes])
-
AC_ARG_ENABLE(altivec,
-AS_HELP_STRING([--disable-altivec],[Disable Altivec optimizations]),
+AS_HELP_STRING([--disable-altivec],[Disable use of Altivec instructions]),
[case "${enableval}" in
yes) use_altivec=true ;;
no) use_altivec=false ;;
@@ -289,10 +273,11 @@ AM_CONDITIONAL(FLaC__USE_VSX, test "x$use_vsx" = xtrue)
if test "x$use_vsx$has_vec_doubleh" = xtruetrue ; then
AC_DEFINE(FLAC__USE_VSX)
AH_TEMPLATE(FLAC__USE_VSX, [define to enable use of VSX instructions])
+asm_optimisation=yes
fi
AC_ARG_ENABLE(avx,
-AS_HELP_STRING([--disable-avx],[Disable AVX, AVX2 optimizations]),
+AS_HELP_STRING([--disable-avx],[Disable AVX, AVX2 optimizations. There is runtime detection of CPU features, so disabling is only necessary when a compiler does not know about them]),
[case "${enableval}" in
yes) use_avx=true ;;
no) use_avx=false ;;
@@ -536,10 +521,6 @@ if test x$ac_cv_c_compiler_gnu = xyes -o x$xiph_cv_c_compiler_clang = xyes ; the
XIPH_ADD_CFLAGS([-fno-inline-small-functions])
fi
- if test "x$asm_optimisation$sse_os" = "xyesyes" ; then
- XIPH_ADD_CFLAGS([-msse2])
- fi
-
fi
case "$host_os" in
@@ -684,8 +665,6 @@ if test x$ac_cv_c_compiler_gnu = xyes ; then
echo " GCC version : ............................. ${GCC_VERSION}"
fi
echo " Compiler is Clang : ....................... ${xiph_cv_c_compiler_clang}"
- echo " SSE optimizations : ....................... ${sse_os}"
- echo " Neon optimizations : ...................... ${neon}"
echo " Asm optimizations : ....................... ${asm_optimisation}"
echo " Ogg/FLAC support : ........................ ${have_ogg}"
echo " Stack protector : ........................ ${enable_stack_smash_protection}"
diff --git a/src/libFLAC/CMakeLists.txt b/src/libFLAC/CMakeLists.txt
index cd99c8f8..caf0864d 100644
--- a/src/libFLAC/CMakeLists.txt
+++ b/src/libFLAC/CMakeLists.txt
@@ -19,7 +19,7 @@ endif()
if(FLAC__CPU_X86_64 OR FLAC__CPU_IA32)
set(FLAC__ALIGN_MALLOC_DATA 1)
- option(WITH_AVX "Enable AVX, AVX2 optimizations (with runtime detection, resulting binary does not require AVX2)" ON)
+ option(WITH_AVX "Enable AVX, AVX2 optimizations (with runtime detection, resulting binary does not require AVX2, so only necessary when a compiler doesn't know about AVX)" ON)
if(WITH_AVX AND MSVC)
set_source_files_properties(lpc_intrin_avx2.c stream_encoder_intrin_avx2.c lpc_intrin_fma.c PROPERTIES COMPILE_FLAGS /arch:AVX2)
endif()
@@ -42,16 +42,6 @@ if(NOT WITH_ASM)
add_definitions(-DFLAC__NO_ASM)
endif()
-if(FLAC__CPU_IA32)
- option(WITH_SSE "Enable SSE2 optimizations (WITHOUT runtime detection, resulting binary requires SSE2)" ON)
- check_c_compiler_flag(-msse2 HAVE_MSSE2_FLAG)
- if(WITH_SSE)
- add_compile_options(
- $<$<BOOL:${HAVE_MSSE2_FLAG}>:-msse2>
- $<$<BOOL:${MSVC}>:/arch:SSE2>)
- endif()
-endif()
-
include_directories("include")
add_library(FLAC