summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMiroslav Lichvar <mlichvar@redhat.com>2014-06-19 13:04:33 +0200
committerErik de Castro Lopo <erikd@mega-nerd.com>2014-07-04 21:22:44 +1000
commitf081524c19eeafd08f4db6ee5d52a9634c60f475 (patch)
treef5c8bd631a69072019e127d576ca7a8b2d38b105 /src
parent71246dcc8146ea0b8152e18d2627e6b6b0f56273 (diff)
downloadflac-f081524c19eeafd08f4db6ee5d52a9634c60f475.tar.gz
stream_encoder : Improve selection of residual accumulator width
In the precompute_partition_info_sums_ function, instead of selecting 64-bit accumulator when the signal bps is larger than 16, revert to the original approach based on partition size, but make room for few extra bits to not overflow with unusual signals where the average residual magnitude may be larger than bps. It slightly improves the performance with standard encoding levels and 16-bit files as the 17-bit side channel can still be processed with the 32-bit accumulator and correctly selects the 64-bit accumulator with very large 16-bit partitions. This is related to commits 6f7ec60c and 187e596e. Signed-off-by: Erik de Castro Lopo <erikd@mega-nerd.com>
Diffstat (limited to 'src')
-rw-r--r--src/libFLAC/include/private/stream_encoder.h6
-rw-r--r--src/libFLAC/stream_encoder.c14
-rw-r--r--src/libFLAC/stream_encoder_intrin_sse2.c3
-rw-r--r--src/libFLAC/stream_encoder_intrin_ssse3.c3
4 files changed, 16 insertions, 10 deletions
diff --git a/src/libFLAC/include/private/stream_encoder.h b/src/libFLAC/include/private/stream_encoder.h
index d26039ac..8147f9ed 100644
--- a/src/libFLAC/include/private/stream_encoder.h
+++ b/src/libFLAC/include/private/stream_encoder.h
@@ -37,6 +37,12 @@
#include <config.h>
#endif
+/*
+ * This is used to avoid overflow with unusual signals in 32-bit
+ * accumulator in the *precompute_partition_info_sums_* functions.
+ */
+#define FLAC__MAX_EXTRA_RESIDUAL_BPS 4
+
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
#include "private/cpu.h"
#include "FLAC/format.h"
diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c
index e64ece27..8928a397 100644
--- a/src/libFLAC/stream_encoder.c
+++ b/src/libFLAC/stream_encoder.c
@@ -3872,10 +3872,9 @@ void precompute_partition_info_sums_(
FLAC__ASSERT(default_partition_samples > predictor_order);
#if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && defined FLAC__HAS_NASM && 0
- /* WATCHOUT: "+ bps" is an assumption that the average residual magnitude will not be more than "bps" bits */
- /* previously the condition was: if(FLAC__bitmath_ilog2(default_partition_samples) + bps < 32) */
- /* see http://git.xiph.org/?p=flac.git;a=commit;h=6f7ec60c7e7f05f5ab0b1cf6b7b0945e44afcd4b */
- if(bps <= 16) {
+ /* WATCHOUT: "+ bps + FLAC__MAX_EXTRA_RESIDUAL_BPS" is the maximum
+ * assumed size of the average residual magnitude */
+ if(FLAC__bitmath_ilog2(default_partition_samples) + bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < 32) {
FLAC__precompute_partition_info_sums_32bit_asm_ia32_(residual, abs_residual_partition_sums, residual_samples + predictor_order, predictor_order, min_partition_order, max_partition_order);
return;
}
@@ -3884,10 +3883,9 @@ void precompute_partition_info_sums_(
/* first do max_partition_order */
{
unsigned partition, residual_sample, end = (unsigned)(-(int)predictor_order);
- /* WATCHOUT: "+ bps" is an assumption that the average residual magnitude will not be more than "bps" bits */
- /* previously the condition was: if(FLAC__bitmath_ilog2(default_partition_samples) + bps < 32) */
- /* see http://git.xiph.org/?p=flac.git;a=commit;h=6f7ec60c7e7f05f5ab0b1cf6b7b0945e44afcd4b */
- if(bps <= 16) {
+ /* WATCHOUT: "+ bps + FLAC__MAX_EXTRA_RESIDUAL_BPS" is the maximum
+ * assumed size of the average residual magnitude */
+ if(FLAC__bitmath_ilog2(default_partition_samples) + bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < 32) {
FLAC__uint32 abs_residual_partition_sum;
for(partition = residual_sample = 0; partition < partitions; partition++) {
diff --git a/src/libFLAC/stream_encoder_intrin_sse2.c b/src/libFLAC/stream_encoder_intrin_sse2.c
index bef55453..4e9d5dbf 100644
--- a/src/libFLAC/stream_encoder_intrin_sse2.c
+++ b/src/libFLAC/stream_encoder_intrin_sse2.c
@@ -37,6 +37,7 @@
#ifndef FLAC__NO_ASM
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
#include "private/stream_encoder.h"
+#include "private/bitmath.h"
#ifdef FLAC__SSE2_SUPPORTED
#include <stdlib.h> /* for abs() */
@@ -58,7 +59,7 @@ void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual
unsigned e1, e3;
__m128i mm_res, mm_sum, mm_mask;
- if(bps <= 16) {
+ if(FLAC__bitmath_ilog2(default_partition_samples) + bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < 32) {
for(partition = residual_sample = 0; partition < partitions; partition++) {
end += default_partition_samples;
mm_sum = _mm_setzero_si128();
diff --git a/src/libFLAC/stream_encoder_intrin_ssse3.c b/src/libFLAC/stream_encoder_intrin_ssse3.c
index 95b5f623..669536ac 100644
--- a/src/libFLAC/stream_encoder_intrin_ssse3.c
+++ b/src/libFLAC/stream_encoder_intrin_ssse3.c
@@ -37,6 +37,7 @@
#ifndef FLAC__NO_ASM
#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
#include "private/stream_encoder.h"
+#include "private/bitmath.h"
#ifdef FLAC__SSSE3_SUPPORTED
#include <stdlib.h> /* for abs() */
@@ -58,7 +59,7 @@ void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residua
unsigned e1, e3;
__m128i mm_res, mm_sum;
- if(bps <= 16) {
+ if(FLAC__bitmath_ilog2(default_partition_samples) + bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < 32) {
for(partition = residual_sample = 0; partition < partitions; partition++) {
end += default_partition_samples;
mm_sum = _mm_setzero_si128();