summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartijn van Beurden <mvanb1@gmail.com>2022-07-25 12:07:24 +0200
committerGitHub <noreply@github.com>2022-07-25 12:07:24 +0200
commitc6a4d5c07b8e84a634a39fe2d86153796e207351 (patch)
tree052f56df6363d9f5c0e0e26d269a9406543a4bc4
parent67131c04b8817c83b1744bcb7e4ffdde2bf92720 (diff)
downloadflac-c6a4d5c07b8e84a634a39fe2d86153796e207351.tar.gz
Add FMA intrinsics for autocorrelation calculation
See https://github.com/xiph/flac/pull/387 for details
-rw-r--r--src/libFLAC/CMakeLists.txt2
-rw-r--r--src/libFLAC/Makefile.am2
-rw-r--r--src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin.c13
-rw-r--r--src/libFLAC/include/private/cpu.h5
-rw-r--r--src/libFLAC/include/private/lpc.h7
-rw-r--r--src/libFLAC/lpc_intrin_fma.c73
-rw-r--r--src/libFLAC/stream_encoder.c37
7 files changed, 130 insertions, 9 deletions
diff --git a/src/libFLAC/CMakeLists.txt b/src/libFLAC/CMakeLists.txt
index 3e3804e0..50cb6dbf 100644
--- a/src/libFLAC/CMakeLists.txt
+++ b/src/libFLAC/CMakeLists.txt
@@ -22,6 +22,7 @@ if(FLAC__CPU_X86_64 OR FLAC__CPU_IA32)
option(WITH_AVX "Enable AVX, AVX2 optimizations (with runtime detection, resulting binary does not require AVX2)" ON)
if(WITH_AVX AND MSVC)
set_source_files_properties(lpc_intrin_avx2.c stream_encoder_intrin_avx2.c PROPERTIES COMPILE_FLAGS /arch:AVX2)
+ set_source_files_properties(lpc_intrin_fma.c PROPERTIES COMPILE_FLAGS "/arch:AVX2 /fp:fast")
endif()
else()
check_cpu_arch_ppc64(FLAC__CPU_PPC64)
@@ -82,6 +83,7 @@ add_library(FLAC
lpc_intrin_sse2.c
lpc_intrin_sse41.c
lpc_intrin_avx2.c
+ lpc_intrin_fma.c
lpc_intrin_vsx.c
md5.c
memory.c
diff --git a/src/libFLAC/Makefile.am b/src/libFLAC/Makefile.am
index ddd37bc7..12c7abdf 100644
--- a/src/libFLAC/Makefile.am
+++ b/src/libFLAC/Makefile.am
@@ -75,6 +75,7 @@ EXTRA_DIST = \
CMakeLists.txt \
flac.pc.in \
libFLAC.m4 \
+ deduplication/lpc_compute_autocorrelation_intrin.c \
deduplication/lpc_compute_autocorrelation_intrin_sse2.c \
deduplication/lpc_compute_autocorrelation_intrin_vsx.c \
deduplication/lpc_compute_autocorrelation_intrin_neon.c
@@ -109,6 +110,7 @@ libFLAC_sources = \
lpc_intrin_sse2.c \
lpc_intrin_sse41.c \
lpc_intrin_avx2.c \
+ lpc_intrin_fma.c \
lpc_intrin_vsx.c \
lpc_intrin_neon.c \
md5.c \
diff --git a/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin.c b/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin.c
new file mode 100644
index 00000000..5843b000
--- /dev/null
+++ b/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin.c
@@ -0,0 +1,13 @@
+ (void) lag;
+ FLAC__ASSERT(lag <= MAX_LAG);
+
+ for(int i = 0; i < MAX_LAG; i++)
+ autoc[i] = 0.0;
+
+ for(int i = 0; i < MAX_LAG; i++)
+ for(int j = 0; j <= i; j++)
+ autoc[j] += (double)data[i] * (double)data[i-j];
+
+ for(int i = MAX_LAG; i < (int)data_len; i++)
+ for(int j = 0; j < MAX_LAG; j++)
+ autoc[j] += (double)data[i] * (double)data[i-j];
diff --git a/src/libFLAC/include/private/cpu.h b/src/libFLAC/include/private/cpu.h
index 638c1b20..0a115135 100644
--- a/src/libFLAC/include/private/cpu.h
+++ b/src/libFLAC/include/private/cpu.h
@@ -63,6 +63,7 @@
/* SSE intrinsics support by ICC/MSVC/GCC */
#if defined __INTEL_COMPILER
#define FLAC__SSE_TARGET(x)
+ #define FLAC__FAST_MATH_TARGET(x)
#define FLAC__SSE_SUPPORTED 1
#define FLAC__SSE2_SUPPORTED 1
#if (__INTEL_COMPILER >= 1000) /* Intel C++ Compiler 10.0 */
@@ -80,6 +81,7 @@
#endif
#elif defined __clang__ && __has_attribute(__target__) /* clang */
#define FLAC__SSE_TARGET(x) __attribute__ ((__target__ (x)))
+ #define FLAC__FAST_MATH_TARGET(x) __attribute__ ((__target__ (x), optimize("-ffast-math")))
#if __has_builtin(__builtin_ia32_maxps)
#define FLAC__SSE_SUPPORTED 1
#endif
@@ -105,6 +107,7 @@
#endif
#elif defined __GNUC__ && !defined __clang__ && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)) /* GCC 4.9+ */
#define FLAC__SSE_TARGET(x) __attribute__ ((__target__ (x)))
+ #define FLAC__FAST_MATH_TARGET(x) __attribute__ ((__target__ (x), optimize("-ffast-math")))
#define FLAC__SSE_SUPPORTED 1
#define FLAC__SSE2_SUPPORTED 1
#define FLAC__SSSE3_SUPPORTED 1
@@ -116,6 +119,7 @@
#endif
#elif defined _MSC_VER
#define FLAC__SSE_TARGET(x)
+ #define FLAC__FAST_MATH_TARGET(x)
#define FLAC__SSE_SUPPORTED 1
#define FLAC__SSE2_SUPPORTED 1
#if (_MSC_VER >= 1500) /* MS Visual Studio 2008 */
@@ -133,6 +137,7 @@
#endif
#else
#define FLAC__SSE_TARGET(x)
+ #define FLAC__FAST_MATH_TARGET(x)
#ifdef __SSE__
#define FLAC__SSE_SUPPORTED 1
#endif
diff --git a/src/libFLAC/include/private/lpc.h b/src/libFLAC/include/private/lpc.h
index 99f62de8..40971356 100644
--- a/src/libFLAC/include/private/lpc.h
+++ b/src/libFLAC/include/private/lpc.h
@@ -78,6 +78,13 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_10(const FLAC__real data[
void FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
# endif
# endif
+# if defined FLAC__CPU_X86_64 && FLAC__HAS_X86INTRIN
+# ifdef FLAC__FMA_SUPPORTED
+void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_12(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_16(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
+# endif
+# endif
#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX)
#ifdef FLAC__HAS_TARGET_POWER9
void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
diff --git a/src/libFLAC/lpc_intrin_fma.c b/src/libFLAC/lpc_intrin_fma.c
new file mode 100644
index 00000000..ad125c8a
--- /dev/null
+++ b/src/libFLAC/lpc_intrin_fma.c
@@ -0,0 +1,73 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2022 Xiph.Org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include "private/cpu.h"
+
+#ifndef FLAC__INTEGER_ONLY_LIBRARY
+#ifndef FLAC__NO_ASM
+#if defined FLAC__CPU_X86_64 && FLAC__HAS_X86INTRIN
+#include "private/lpc.h"
+#ifdef FLAC__FMA_SUPPORTED
+
+#include "FLAC/assert.h"
+
+FLAC__FAST_MATH_TARGET("fma")
+void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 8
+#include "deduplication/lpc_compute_autocorrelation_intrin.c"
+}
+
+FLAC__FAST_MATH_TARGET("fma")
+void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_12(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 12
+#include "deduplication/lpc_compute_autocorrelation_intrin.c"
+}
+FLAC__FAST_MATH_TARGET("fma")
+void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_16(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 16
+#include "deduplication/lpc_compute_autocorrelation_intrin.c"
+
+}
+
+#endif /* FLAC__FMA_SUPPORTED */
+#endif /* FLAC__CPU_X86_64 && FLAC__HAS_X86INTRIN */
+#endif /* FLAC__NO_ASM */
+#endif /* FLAC__INTEGER_ONLY_LIBRARY */
diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c
index 34222174..1b4f9668 100644
--- a/src/libFLAC/stream_encoder.c
+++ b/src/libFLAC/stream_encoder.c
@@ -368,6 +368,7 @@ typedef struct FLAC__StreamEncoderPrivate {
FLAC__bool disable_ssse3;
FLAC__bool disable_sse41;
FLAC__bool disable_avx2;
+ FLAC__bool disable_fma;
FLAC__bool disable_constant_subframes;
FLAC__bool disable_fixed_subframes;
FLAC__bool disable_verbatim_subframes;
@@ -885,6 +886,8 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
encoder->private_->cpuinfo.x86.sse41 = false;
if(encoder->private_->disable_avx2)
encoder->private_->cpuinfo.x86.avx2 = false;
+ if(encoder->private_->disable_fma)
+ encoder->private_->cpuinfo.x86.fma = false;
/* first default to the non-asm routines */
#ifndef FLAC__INTEGER_ONLY_LIBRARY
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
@@ -1005,14 +1008,16 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_X86_64);
# if FLAC__HAS_X86INTRIN
# ifdef FLAC__SSE2_SUPPORTED
- if(encoder->protected_->max_lpc_order < 8)
- encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_8;
- else if(encoder->protected_->max_lpc_order < 10)
- encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_10;
- else if(encoder->protected_->max_lpc_order < 14)
- encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_14;
+ if(encoder->private_->cpuinfo.x86.sse2) { /* For fuzzing */
+ if(encoder->protected_->max_lpc_order < 8)
+ encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_8;
+ else if(encoder->protected_->max_lpc_order < 10)
+ encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_10;
+ else if(encoder->protected_->max_lpc_order < 14)
+ encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_14;
- encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
+ encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
+ }
# endif
# ifdef FLAC__SSE4_1_SUPPORTED
if(encoder->private_->cpuinfo.x86.sse41) {
@@ -1026,10 +1031,23 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2;
}
# endif
+# ifdef FLAC__FMA_SUPPORTED
+ if(encoder->private_->cpuinfo.x86.fma) {
+ if(encoder->protected_->max_lpc_order < 8)
+ encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_fma_lag_8;
+ else if(encoder->protected_->max_lpc_order < 12)
+ encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_fma_lag_12;
+ else if(encoder->protected_->max_lpc_order < 16)
+ encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_fma_lag_16;
+ }
+# endif
+
# ifdef FLAC__SSE2_SUPPORTED
- encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_sse2;
- encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
+ if(encoder->private_->cpuinfo.x86.sse2) { /* For fuzzing */
+ encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_intrin_sse2;
+ encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
+ }
# endif
# ifdef FLAC__SSSE3_SUPPORTED
if (encoder->private_->cpuinfo.x86.ssse3) {
@@ -1957,6 +1975,7 @@ FLAC_API FLAC__bool FLAC__stream_encoder_disable_instruction_set(FLAC__StreamEnc
encoder->private_->disable_ssse3 = value & 4;
encoder->private_->disable_sse41 = value & 8;
encoder->private_->disable_avx2 = value & 16;
+ encoder->private_->disable_fma = value & 32;
return true;
}