sbc: New SBC analysis filter function to replace current broken code

This code is heavily based on the patch submitted by Jaska Uimonen. Additional changes include preserving extra bits in the output of filter function for better precision, support for both 16-bit and 32-bit fixed point implementation. Sign of some table values was changed in order to preserve a regular code structure and have multiply-accumulate oparations only. No additional optimizations were applied as this code is intended to be some kind of "reference" implementation. Platform specific optimizations may require different tricks and can be branched off from this implementation. Some extra information about this code can be found in linux-bluetooth mailing list archive for December 2008.
author: Siarhei Siamashka <siarhei.siamashka@nokia.com> 2008-12-28 03:22:59 +0200
committer: Marcel Holtmann <marcel@holtmann.org> 2012-07-29 19:48:27 -0700
commit: b6f0c4c40d02e09e37027c053052163bb7c8c90e (patch)
tree: 7eaa6264867c2f5ab3ae088efdb877db903a7e38 /sbc/sbc.c
parent: 396c35da7701b85e78fd907da32f8d17c691a05c (diff)
download: sbc-b6f0c4c40d02e09e37027c053052163bb7c8c90e.tar.gz
1 files changed, 108 insertions, 198 deletions
diff --git a/sbc/sbc.c b/sbc/sbc.c
index d3dcd9a..ce52e1e 100644
--- a/sbc/sbc.c
+++ b/sbc/sbc.c
@@ -40,6 +40,7 @@
 #include <string.h>
 #include <stdlib.h>
 #include <sys/types.h>
+#include <limits.h>
 
 #include "sbc_math.h"
 #include "sbc_tables.h"
@@ -93,7 +94,7 @@ struct sbc_decoder_state {
 struct sbc_encoder_state {
 	int subbands;
 	int position[2];
-	int32_t X[2][160];
+	int16_t X[2][160];
 };
 
 /*
@@ -656,75 +657,47 @@ static void sbc_encoder_init(struct sbc_encoder_state *state,
 	state->position[0] = state->position[1] = 9 * frame->subbands;
 }
 
-static inline void _sbc_analyze_four(const int32_t *in, int32_t *out)
+static inline void _sbc_analyze_four(const int16_t *in, int32_t *out)
 {
-	sbc_fixed_t t[8], s[5];
-
-	t[0] = SCALE4_STAGE1( /* Q8 */
-		MULA(_sbc_proto_4[0], in[8] - in[32], /* Q18 */
-		MUL( _sbc_proto_4[1], in[16] - in[24])));
-
-	t[1] = SCALE4_STAGE1(
-		MULA(_sbc_proto_4[2], in[1],
-		MULA(_sbc_proto_4[3], in[9],
-		MULA(_sbc_proto_4[4], in[17],
-		MULA(_sbc_proto_4[5], in[25],
-		MUL( _sbc_proto_4[6], in[33]))))));
-
-	t[2] = SCALE4_STAGE1(
-		MULA(_sbc_proto_4[7], in[2],
-		MULA(_sbc_proto_4[8], in[10],
-		MULA(_sbc_proto_4[9], in[18],
-		MULA(_sbc_proto_4[10], in[26],
-		MUL( _sbc_proto_4[11], in[34]))))));
-
-	t[3] = SCALE4_STAGE1(
-		MULA(_sbc_proto_4[12], in[3],
-		MULA(_sbc_proto_4[13], in[11],
-		MULA(_sbc_proto_4[14], in[19],
-		MULA(_sbc_proto_4[15], in[27],
-		MUL( _sbc_proto_4[16], in[35]))))));
-
-	t[4] = SCALE4_STAGE1(
-		MULA(_sbc_proto_4[17], in[4] + in[36],
-		MULA(_sbc_proto_4[18], in[12] + in[28],
-		MUL( _sbc_proto_4[19], in[20]))));
-
-	t[5] = SCALE4_STAGE1(
-		MULA(_sbc_proto_4[16], in[5],
-		MULA(_sbc_proto_4[15], in[13],
-		MULA(_sbc_proto_4[14], in[21],
-		MULA(_sbc_proto_4[13], in[29],
-		MUL( _sbc_proto_4[12], in[37]))))));
-
-	/* don't compute t[6]... this term always multiplies
-	 * with cos(pi/2) = 0 */
-
-	t[7] = SCALE4_STAGE1(
-		MULA(_sbc_proto_4[6], in[7],
-		MULA(_sbc_proto_4[5], in[15],
-		MULA(_sbc_proto_4[4], in[23],
-		MULA(_sbc_proto_4[3], in[31],
-		MUL( _sbc_proto_4[2], in[39]))))));
-
-	s[0] = MUL( _anamatrix4[0], t[0] + t[4]);
-	s[1] = MUL( _anamatrix4[2], t[2]);
-	s[2] = MULA(_anamatrix4[1], t[1] + t[3],
-		MUL(_anamatrix4[3], t[5]));
-	s[3] = MULA(_anamatrix4[3], t[1] + t[3],
-		MUL(_anamatrix4[1], -t[5] + t[7]));
-	s[4] = MUL( _anamatrix4[3], t[7]);
-
-	out[0] = SCALE4_STAGE2( s[0] + s[1] + s[2] + s[4]); /* Q0 */
-	out[1] = SCALE4_STAGE2(-s[0] + s[1] + s[3]);
-	out[2] = SCALE4_STAGE2(-s[0] + s[1] - s[3]);
-	out[3] = SCALE4_STAGE2( s[0] + s[1] - s[2] - s[4]);
+	FIXED_A t1[4];
+	FIXED_T t2[4];
+	int i = 0, hop = 0;
+
+	/* rounding coefficient */
+	t1[0] = t1[1] = t1[2] = t1[3] =
+		(FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1);
+
+	/* low pass polyphase filter */
+	for (hop = 0; hop < 40; hop += 8) {
+		t1[0] += (FIXED_A) in[hop] * _sbc_proto_fixed4[hop];
+		t1[1] += (FIXED_A) in[hop + 1] * _sbc_proto_fixed4[hop + 1];
+		t1[2] += (FIXED_A) in[hop + 2] * _sbc_proto_fixed4[hop + 2];
+		t1[1] += (FIXED_A) in[hop + 3] * _sbc_proto_fixed4[hop + 3];
+		t1[0] += (FIXED_A) in[hop + 4] * _sbc_proto_fixed4[hop + 4];
+		t1[3] += (FIXED_A) in[hop + 5] * _sbc_proto_fixed4[hop + 5];
+		t1[3] += (FIXED_A) in[hop + 7] * _sbc_proto_fixed4[hop + 7];
+	}
+
+	/* scaling */
+	t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE;
+	t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE;
+	t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE;
+	t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE;
+
+	/* do the cos transform */
+	for (i = 0, hop = 0; i < 4; hop += 8, i++) {
+		out[i] = ((FIXED_A) t2[0] * cos_table_fixed_4[0 + hop] +
+			  (FIXED_A) t2[1] * cos_table_fixed_4[1 + hop] +
+			  (FIXED_A) t2[2] * cos_table_fixed_4[2 + hop] +
+			  (FIXED_A) t2[3] * cos_table_fixed_4[5 + hop]) >>
+			(SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+	}
 }
 
 static inline void sbc_analyze_four(struct sbc_encoder_state *state,
 				struct sbc_frame *frame, int ch, int blk)
 {
-	int32_t *x = &state->X[ch][state->position[ch]];
+	int16_t *x = &state->X[ch][state->position[ch]];
 	int16_t *pcm = &frame->pcm_sample[ch][blk * 4];
 
 	/* Input 4 Audio Samples */
@@ -740,133 +713,64 @@ static inline void sbc_analyze_four(struct sbc_encoder_state *state,
 		state->position[ch] = 36;
 }
 
-static inline void _sbc_analyze_eight(const int32_t *in, int32_t *out)
+static inline void _sbc_analyze_eight(const int16_t *in, int32_t *out)
 {
-	sbc_fixed_t t[8], s[8];
-
-	t[0] = SCALE8_STAGE1( /* Q10 */
-		MULA(_sbc_proto_8[0], (in[16] - in[64]), /* Q18 = Q18 * Q0 */
-		MULA(_sbc_proto_8[1], (in[32] - in[48]),
-		MULA(_sbc_proto_8[2], in[4],
-		MULA(_sbc_proto_8[3], in[20],
-		MULA(_sbc_proto_8[4], in[36],
-		MUL( _sbc_proto_8[5], in[52])))))));
-
-	t[1] = SCALE8_STAGE1(
-		MULA(_sbc_proto_8[6], in[2],
-		MULA(_sbc_proto_8[7], in[18],
-		MULA(_sbc_proto_8[8], in[34],
-		MULA(_sbc_proto_8[9], in[50],
-		MUL(_sbc_proto_8[10], in[66]))))));
-
-	t[2] = SCALE8_STAGE1(
-		MULA(_sbc_proto_8[11], in[1],
-		MULA(_sbc_proto_8[12], in[17],
-		MULA(_sbc_proto_8[13], in[33],
-		MULA(_sbc_proto_8[14], in[49],
-		MULA(_sbc_proto_8[15], in[65],
-		MULA(_sbc_proto_8[16], in[3],
-		MULA(_sbc_proto_8[17], in[19],
-		MULA(_sbc_proto_8[18], in[35],
-		MULA(_sbc_proto_8[19], in[51],
-		MUL( _sbc_proto_8[20], in[67])))))))))));
-
-	t[3] = SCALE8_STAGE1(
-		MULA( _sbc_proto_8[21], in[5],
-		MULA( _sbc_proto_8[22], in[21],
-		MULA( _sbc_proto_8[23], in[37],
-		MULA( _sbc_proto_8[24], in[53],
-		MULA( _sbc_proto_8[25], in[69],
-		MULA(-_sbc_proto_8[15], in[15],
-		MULA(-_sbc_proto_8[14], in[31],
-		MULA(-_sbc_proto_8[13], in[47],
-		MULA(-_sbc_proto_8[12], in[63],
-		MUL( -_sbc_proto_8[11], in[79])))))))))));
-
-	t[4] = SCALE8_STAGE1(
-		MULA( _sbc_proto_8[26], in[6],
-		MULA( _sbc_proto_8[27], in[22],
-		MULA( _sbc_proto_8[28], in[38],
-		MULA( _sbc_proto_8[29], in[54],
-		MULA( _sbc_proto_8[30], in[70],
-		MULA(-_sbc_proto_8[10], in[14],
-		MULA(-_sbc_proto_8[9], in[30],
-		MULA(-_sbc_proto_8[8], in[46],
-		MULA(-_sbc_proto_8[7], in[62],
-		MUL( -_sbc_proto_8[6], in[78])))))))))));
-
-	t[5] = SCALE8_STAGE1(
-		MULA( _sbc_proto_8[31], in[7],
-		MULA( _sbc_proto_8[32], in[23],
-		MULA( _sbc_proto_8[33], in[39],
-		MULA( _sbc_proto_8[34], in[55],
-		MULA( _sbc_proto_8[35], in[71],
-		MULA(-_sbc_proto_8[20], in[13],
-		MULA(-_sbc_proto_8[19], in[29],
-		MULA(-_sbc_proto_8[18], in[45],
-		MULA(-_sbc_proto_8[17], in[61],
-		MUL( -_sbc_proto_8[16], in[77])))))))))));
-
-	t[6] = SCALE8_STAGE1(
-		MULA( _sbc_proto_8[36], (in[8] + in[72]),
-		MULA( _sbc_proto_8[37], (in[24] + in[56]),
-		MULA( _sbc_proto_8[38], in[40],
-		MULA(-_sbc_proto_8[39], in[12],
-		MULA(-_sbc_proto_8[5], in[28],
-		MULA(-_sbc_proto_8[4], in[44],
-		MULA(-_sbc_proto_8[3], in[60],
-		MUL( -_sbc_proto_8[2], in[76])))))))));
-
-	t[7] = SCALE8_STAGE1(
-		MULA( _sbc_proto_8[35], in[9],
-		MULA( _sbc_proto_8[34], in[25],
-		MULA( _sbc_proto_8[33], in[41],
-		MULA( _sbc_proto_8[32], in[57],
-		MULA( _sbc_proto_8[31], in[73],
-		MULA(-_sbc_proto_8[25], in[11],
-		MULA(-_sbc_proto_8[24], in[27],
-		MULA(-_sbc_proto_8[23], in[43],
-		MULA(-_sbc_proto_8[22], in[59],
-		MUL( -_sbc_proto_8[21], in[75])))))))))));
-
-	s[0] = MULA(  _anamatrix8[0], t[0],
-		MUL(  _anamatrix8[1], t[6]));
-	s[1] = MUL(   _anamatrix8[7], t[1]);
-	s[2] = MULA(  _anamatrix8[2], t[2],
-		MULA( _anamatrix8[3], t[3],
-		MULA( _anamatrix8[4], t[5],
-		MUL(  _anamatrix8[5], t[7]))));
-	s[3] = MUL(   _anamatrix8[6], t[4]);
-	s[4] = MULA(  _anamatrix8[3], t[2],
-		MULA(-_anamatrix8[5], t[3],
-		MULA(-_anamatrix8[2], t[5],
-		MUL( -_anamatrix8[4], t[7]))));
-	s[5] = MULA(  _anamatrix8[4], t[2],
-		MULA(-_anamatrix8[2], t[3],
-		MULA( _anamatrix8[5], t[5],
-		MUL(  _anamatrix8[3], t[7]))));
-	s[6] = MULA(  _anamatrix8[1], t[0],
-		MUL( -_anamatrix8[0], t[6]));
-	s[7] = MULA(  _anamatrix8[5], t[2],
-		MULA(-_anamatrix8[4], t[3],
-		MULA( _anamatrix8[3], t[5],
-		MUL( -_anamatrix8[2], t[7]))));
-
-	out[0] = SCALE8_STAGE2( s[0] + s[1] + s[2] + s[3]);
-	out[1] = SCALE8_STAGE2( s[1] - s[3] + s[4] + s[6]);
-	out[2] = SCALE8_STAGE2( s[1] - s[3] + s[5] - s[6]);
-	out[3] = SCALE8_STAGE2(-s[0] + s[1] + s[3] + s[7]);
-	out[4] = SCALE8_STAGE2(-s[0] + s[1] + s[3] - s[7]);
-	out[5] = SCALE8_STAGE2( s[1] - s[3] - s[5] - s[6]);
-	out[6] = SCALE8_STAGE2( s[1] - s[3] - s[4] + s[6]);
-	out[7] = SCALE8_STAGE2( s[0] + s[1] - s[2] + s[3]);
+	FIXED_A t1[8];
+	FIXED_T t2[8];
+	int i, hop;
+
+	/* rounding coefficient */
+	t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] =
+		(FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1);
+
+	/* low pass polyphase filter */
+	for (hop = 0; hop < 80; hop += 16) {
+		t1[0] += (FIXED_A) in[hop] * _sbc_proto_fixed8[hop];
+		t1[1] += (FIXED_A) in[hop + 1] * _sbc_proto_fixed8[hop + 1];
+		t1[2] += (FIXED_A) in[hop + 2] * _sbc_proto_fixed8[hop + 2];
+		t1[3] += (FIXED_A) in[hop + 3] * _sbc_proto_fixed8[hop + 3];
+		t1[4] += (FIXED_A) in[hop + 4] * _sbc_proto_fixed8[hop + 4];
+		t1[3] += (FIXED_A) in[hop + 5] * _sbc_proto_fixed8[hop + 5];
+		t1[2] += (FIXED_A) in[hop + 6] * _sbc_proto_fixed8[hop + 6];
+		t1[1] += (FIXED_A) in[hop + 7] * _sbc_proto_fixed8[hop + 7];
+		t1[0] += (FIXED_A) in[hop + 8] * _sbc_proto_fixed8[hop + 8];
+		t1[5] += (FIXED_A) in[hop + 9] * _sbc_proto_fixed8[hop + 9];
+		t1[6] += (FIXED_A) in[hop + 10] * _sbc_proto_fixed8[hop + 10];
+		t1[7] += (FIXED_A) in[hop + 11] * _sbc_proto_fixed8[hop + 11];
+		t1[7] += (FIXED_A) in[hop + 13] * _sbc_proto_fixed8[hop + 13];
+		t1[6] += (FIXED_A) in[hop + 14] * _sbc_proto_fixed8[hop + 14];
+		t1[5] += (FIXED_A) in[hop + 15] * _sbc_proto_fixed8[hop + 15];
+	}
+
+	/* scaling */
+	t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE;
+	t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE;
+	t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE;
+	t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE;
+	t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE;
+	t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE;
+	t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE;
+	t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE;
+
+	/* do the cos transform */
+	for (i = 0, hop = 0; i < 8; hop += 16, i++) {
+		out[i] = ((FIXED_A) t2[0] * cos_table_fixed_8[0 + hop] +
+			  (FIXED_A) t2[1] * cos_table_fixed_8[1 + hop] +
+			  (FIXED_A) t2[2] * cos_table_fixed_8[2 + hop] +
+			  (FIXED_A) t2[3] * cos_table_fixed_8[3 + hop] +
+			  (FIXED_A) t2[4] * cos_table_fixed_8[4 + hop] +
+			  (FIXED_A) t2[5] * cos_table_fixed_8[9 + hop] +
+			  (FIXED_A) t2[6] * cos_table_fixed_8[10 + hop] +
+			  (FIXED_A) t2[7] * cos_table_fixed_8[11 + hop]) >>
+			(SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS);
+	}
 }
 
 static inline void sbc_analyze_eight(struct sbc_encoder_state *state,
 					struct sbc_frame *frame, int ch,
 					int blk)
 {
-	int32_t *x = &state->X[ch][state->position[ch]];
+	int16_t *x = &state->X[ch][state->position[ch]];
 	int16_t *pcm = &frame->pcm_sample[ch][blk * 8];
 
 	/* Input 8 Audio Samples */
@@ -1004,7 +908,7 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
 	for (ch = 0; ch < frame->channels; ch++) {
 		for (sb = 0; sb < frame->subbands; sb++) {
 			frame->scale_factor[ch][sb] = 0;
-			scalefactor[ch][sb] = 2;
+			scalefactor[ch][sb] = 2 << SCALE_OUT_BITS;
 			for (blk = 0; blk < frame->blocks; blk++) {
 				while (scalefactor[ch][sb] < fabs(frame->sb_sample_f[blk][ch][sb])) {
 					frame->scale_factor[ch][sb]++;
@@ -1026,18 +930,18 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
 
 		for (sb = 0; sb < frame->subbands - 1; sb++) {
 			scale_factor_j[0] = 0;
-			scalefactor_j[0] = 2;
+			scalefactor_j[0] = 2 << SCALE_OUT_BITS;
 			scale_factor_j[1] = 0;
-			scalefactor_j[1] = 2;
+			scalefactor_j[1] = 2 << SCALE_OUT_BITS;
 
 			for (blk = 0; blk < frame->blocks; blk++) {
 				/* Calculate joint stereo signal */
 				sb_sample_j[blk][0] =
-					(frame->sb_sample_f[blk][0][sb] +
-						frame->sb_sample_f[blk][1][sb]) >> 1;
+					ASR(frame->sb_sample_f[blk][0][sb], 1) +
+					ASR(frame->sb_sample_f[blk][1][sb], 1);
 				sb_sample_j[blk][1] =
-					(frame->sb_sample_f[blk][0][sb] -
-						frame->sb_sample_f[blk][1][sb]) >> 1;
+					ASR(frame->sb_sample_f[blk][0][sb], 1) -
+					ASR(frame->sb_sample_f[blk][1][sb], 1);
 
 				/* calculate scale_factor_j and scalefactor_j for joint case */
 				while (scalefactor_j[0] < fabs(sb_sample_j[blk][0])) {
@@ -1099,13 +1003,19 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
 	for (blk = 0; blk < frame->blocks; blk++) {
 		for (ch = 0; ch < frame->channels; ch++) {
 			for (sb = 0; sb < frame->subbands; sb++) {
-				if (levels[ch][sb] > 0) {
-					audio_sample =
-						(uint16_t) (((((int64_t)frame->sb_sample_f[blk][ch][sb]*levels[ch][sb]) >>
-									(frame->scale_factor[ch][sb] + 1)) +
-								levels[ch][sb]) >> 1);
-					PUT_BITS(audio_sample & levels[ch][sb], bits[ch][sb]);
-				}
+
+				if (bits[ch][sb] == 0)
+					continue;
+
+				audio_sample = ((uint64_t) levels[ch][sb] *
+					(((uint32_t) 1 <<
+					(frame->scale_factor[ch][sb] +
+					SCALE_OUT_BITS + 1)) +
+					frame->sb_sample_f[blk][ch][sb])) >>
+						(frame->scale_factor[ch][sb] +
+						SCALE_OUT_BITS + 2);
+
+				PUT_BITS(audio_sample, bits[ch][sb]);
 			}
 		}
 	}
author	Siarhei Siamashka <siarhei.siamashka@nokia.com>	2008-12-28 03:22:59 +0200
committer	Marcel Holtmann <marcel@holtmann.org>	2012-07-29 19:48:27 -0700
commit	b6f0c4c40d02e09e37027c053052163bb7c8c90e (patch)
tree	7eaa6264867c2f5ab3ae088efdb877db903a7e38 /sbc/sbc.c
parent	396c35da7701b85e78fd907da32f8d17c691a05c (diff)
download	sbc-b6f0c4c40d02e09e37027c053052163bb7c8c90e.tar.gz