summaryrefslogtreecommitdiff
path: root/mdct.c
diff options
context:
space:
mode:
authorMonty <xiphmont@xiph.org>2002-09-20 00:10:31 +0000
committerMonty <xiphmont@xiph.org>2002-09-20 00:10:31 +0000
commit1dad9587e30c61a8458fdda12da6248bf130c691 (patch)
tree028314f1b8071251d480ea836eb1a14e12c9f1f5 /mdct.c
parent55b25a7aadfb989f50a32cc7d146910de8d6f6cb (diff)
downloadtremor-1dad9587e30c61a8458fdda12da6248bf130c691.tar.gz
Latest improvements from Nicolas Pitre. Reviewed by Monty
From Nicolas's notes: - Includes my previous patch with interpolation code for correct accuracy with all block sizes. - Interlaces sin and cos values in the lookup table to reduce register pressure since only one pointer is required to walk the table instead of two. This also accounts for better cache locality. - Split the lookup table into two tables since half of it (one value every two) is only used in separate section of the code and only with large block sizes. Therefore the table size used for the common case is reduced by 2 accounting for yet better cache usage. - Abstracted all cross products throughout the code so they can be easily optimized. First this prevents redundant register reloads on ARM due to the implicit memory access ordering, next this allowed for the opportunity to hook some inline assembly to perform the actual operation. - Fix layout of current assembly in asm_arm.h to match GCC's output (more enjoyable to read when inspecting the final assembly) plus some constraint correctness issues. - Added a memory barrier macro to force the compiler not to cache values into registers or on the stack in some cases. - Reordered some code for better ARM assembly generation by the compiler. git-svn-id: https://svn.xiph.org/trunk/Tremor@3923 0101bb08-14d6-0310-b084-bc0e0c8e3800
Diffstat (limited to 'mdct.c')
-rw-r--r--mdct.c598
1 files changed, 320 insertions, 278 deletions
diff --git a/mdct.c b/mdct.c
index 460b736..e3e7c75 100644
--- a/mdct.c
+++ b/mdct.c
@@ -13,7 +13,7 @@
function: normalized modified discrete cosine transform
power of two length transform only [64 <= n ]
- last mod: $Id: mdct.c,v 1.4 2002/09/13 16:37:56 xiphmont Exp $
+ last mod: $Id: mdct.c,v 1.5 2002/09/20 00:10:31 xiphmont Exp $
Original algorithm adapted long ago from _The use of multirate filter
banks for coding of high quality digital audio_, by T. Sporer,
@@ -42,60 +42,56 @@
#include "mdct_lookup.h"
#include "misc.h"
-/* 8 point butterfly (in place, 4 register) */
+
+/* 8 point butterfly (in place) */
STIN void mdct_butterfly_8(DATA_TYPE *x){
- REG_TYPE r0 = x[6] + x[2];
- REG_TYPE r1 = x[6] - x[2];
- REG_TYPE r2 = x[4] + x[0];
- REG_TYPE r3 = x[4] - x[0];
-
- x[6] = r0 + r2;
- x[4] = r0 - r2;
-
- r0 = x[5] - x[1];
- r2 = x[7] - x[3];
- x[0] = r1 + r0;
- x[2] = r1 - r0;
-
- r0 = x[5] + x[1];
- r1 = x[7] + x[3];
- x[3] = r2 + r3;
- x[1] = r2 - r3;
- x[7] = r1 + r0;
- x[5] = r1 - r0;
-
+
+ REG_TYPE r0 = x[4] + x[0];
+ REG_TYPE r1 = x[4] - x[0];
+ REG_TYPE r2 = x[5] + x[1];
+ REG_TYPE r3 = x[5] - x[1];
+ REG_TYPE r4 = x[6] + x[2];
+ REG_TYPE r5 = x[6] - x[2];
+ REG_TYPE r6 = x[7] + x[3];
+ REG_TYPE r7 = x[7] - x[3];
+
+ x[0] = r5 + r3;
+ x[1] = r7 - r1;
+ x[2] = r5 - r3;
+ x[3] = r7 + r1;
+ x[4] = r4 - r0;
+ x[5] = r6 - r2;
+ x[6] = r4 + r0;
+ x[7] = r6 + r2;
+ MB();
}
/* 16 point butterfly (in place, 4 register) */
STIN void mdct_butterfly_16(DATA_TYPE *x){
- REG_TYPE r0 = x[1] - x[9];
- REG_TYPE r1 = x[0] - x[8];
-
- x[8] += x[0];
- x[9] += x[1];
- x[0] = MULT31((r0 + r1) , cPI2_8);
- x[1] = MULT31((r0 - r1) , cPI2_8);
-
- r0 = x[3] - x[11];
- r1 = x[10] - x[2];
- x[10] += x[2];
- x[11] += x[3];
- x[2] = r0;
- x[3] = r1;
-
- r0 = x[12] - x[4];
- r1 = x[13] - x[5];
- x[12] += x[4];
- x[13] += x[5];
- x[4] = MULT31((r0 - r1) , cPI2_8);
- x[5] = MULT31((r0 + r1) , cPI2_8);
-
- r0 = x[14] - x[6];
- r1 = x[15] - x[7];
- x[14] += x[6];
- x[15] += x[7];
- x[6] = r0;
- x[7] = r1;
+
+ REG_TYPE r0, r1;
+
+ r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0];
+ r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1];
+ x[ 0] = MULT31((r0 + r1) , cPI2_8);
+ x[ 1] = MULT31((r1 - r0) , cPI2_8);
+ MB();
+
+ r0 = x[10] - x[ 2]; x[10] += x[ 2];
+ r1 = x[ 3] - x[11]; x[11] += x[ 3];
+ x[ 2] = r1; x[ 3] = r0;
+ MB();
+
+ r0 = x[12] - x[ 4]; x[12] += x[ 4];
+ r1 = x[13] - x[ 5]; x[13] += x[ 5];
+ x[ 4] = MULT31((r0 - r1) , cPI2_8);
+ x[ 5] = MULT31((r0 + r1) , cPI2_8);
+ MB();
+
+ r0 = x[14] - x[ 6]; x[14] += x[ 6];
+ r1 = x[15] - x[ 7]; x[15] += x[ 7];
+ x[ 6] = r0; x[ 7] = r1;
+ MB();
mdct_butterfly_8(x);
mdct_butterfly_8(x+8);
@@ -103,161 +99,140 @@ STIN void mdct_butterfly_16(DATA_TYPE *x){
/* 32 point butterfly (in place, 4 register) */
STIN void mdct_butterfly_32(DATA_TYPE *x){
- REG_TYPE r0 = x[30] - x[14];
- REG_TYPE r1 = x[31] - x[15];
-
- x[30] += x[14];
- x[31] += x[15];
- x[14] = r0;
- x[15] = r1;
-
- r0 = x[28] - x[12];
- r1 = x[29] - x[13];
- x[28] += x[12];
- x[29] += x[13];
- x[12] = MULT31( r0 , cPI1_8 ) - MULT31( r1 , cPI3_8 );
- x[13] = MULT31( r0 , cPI3_8 ) + MULT31( r1 , cPI1_8 );
-
- r0 = x[26] - x[10];
- r1 = x[27] - x[11];
- x[26] += x[10];
- x[27] += x[11];
- x[10] = MULT31(( r0 - r1 ) , cPI2_8);
- x[11] = MULT31(( r0 + r1 ) , cPI2_8);
-
- r0 = x[24] - x[8];
- r1 = x[25] - x[9];
- x[24] += x[8];
- x[25] += x[9];
- x[8] = MULT31( r0 , cPI3_8 ) - MULT31( r1 , cPI1_8 );
- x[9] = MULT31( r1 , cPI3_8 ) + MULT31( r0 , cPI1_8 );
-
- r0 = x[22] - x[6];
- r1 = x[7] - x[23];
- x[22] += x[6];
- x[23] += x[7];
- x[6] = r1;
- x[7] = r0;
-
- r0 = x[4] - x[20];
- r1 = x[5] - x[21];
- x[20] += x[4];
- x[21] += x[5];
- x[4] = MULT31( r1 , cPI1_8 ) + MULT31( r0 , cPI3_8 );
- x[5] = MULT31( r1 , cPI3_8 ) - MULT31( r0 , cPI1_8 );
-
- r0 = x[2] - x[18];
- r1 = x[3] - x[19];
- x[18] += x[2];
- x[19] += x[3];
- x[2] = MULT31(( r1 + r0 ) , cPI2_8);
- x[3] = MULT31(( r1 - r0 ) , cPI2_8);
-
- r0 = x[0] - x[16];
- r1 = x[1] - x[17];
- x[16] += x[0];
- x[17] += x[1];
- x[0] = MULT31( r1 , cPI3_8 ) + MULT31( r0 , cPI1_8 );
- x[1] = MULT31( r1 , cPI1_8 ) - MULT31( r0 , cPI3_8 );
+
+ REG_TYPE r0, r1;
+
+ r0 = x[30] - x[14]; x[30] += x[14];
+ r1 = x[31] - x[15]; x[31] += x[15];
+ x[14] = r0; x[15] = r1;
+ MB();
+
+ r0 = x[28] - x[12]; x[28] += x[12];
+ r1 = x[29] - x[13]; x[29] += x[13];
+ XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] );
+ MB();
+
+ r0 = x[26] - x[10]; x[26] += x[10];
+ r1 = x[27] - x[11]; x[27] += x[11];
+ x[10] = MULT31((r0 - r1) , cPI2_8);
+ x[11] = MULT31((r0 + r1) , cPI2_8);
+ MB();
+
+ r0 = x[24] - x[ 8]; x[24] += x[ 8];
+ r1 = x[25] - x[ 9]; x[25] += x[ 9];
+ XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] );
+ MB();
+
+ r0 = x[22] - x[ 6]; x[22] += x[ 6];
+ r1 = x[ 7] - x[23]; x[23] += x[ 7];
+ x[ 6] = r1; x[ 7] = r0;
+ MB();
+
+ r0 = x[ 4] - x[20]; x[20] += x[ 4];
+ r1 = x[ 5] - x[21]; x[21] += x[ 5];
+ XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] );
+ MB();
+
+ r0 = x[ 2] - x[18]; x[18] += x[ 2];
+ r1 = x[ 3] - x[19]; x[19] += x[ 3];
+ x[ 2] = MULT31((r1 + r0) , cPI2_8);
+ x[ 3] = MULT31((r1 - r0) , cPI2_8);
+ MB();
+
+ r0 = x[ 0] - x[16]; x[16] += x[ 0];
+ r1 = x[ 1] - x[17]; x[17] += x[ 1];
+ XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] );
+ MB();
mdct_butterfly_16(x);
mdct_butterfly_16(x+16);
-
}
/* N/stage point generic N stage butterfly (in place, 2 register) */
STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
- DATA_TYPE *T=sin_lookup+2048;
- DATA_TYPE *V=sin_lookup;
- DATA_TYPE *x1 = x + points - 8;
- DATA_TYPE *x2 = x + (points>>1) - 8;
+ DATA_TYPE *T = sincos_lookup0;
+ DATA_TYPE *x1 = x + points - 8;
+ DATA_TYPE *x2 = x + (points>>1) - 8;
REG_TYPE r0;
REG_TYPE r1;
do{
+ r0 = x1[6] - x2[6]; x1[6] += x2[6];
+ r1 = x2[7] - x1[7]; x1[7] += x2[7];
+ XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
+
+ r0 = x1[4] - x2[4]; x1[4] += x2[4];
+ r1 = x2[5] - x1[5]; x1[5] += x2[5];
+ XPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T+=step;
+
+ r0 = x1[2] - x2[2]; x1[2] += x2[2];
+ r1 = x2[3] - x1[3]; x1[3] += x2[3];
+ XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
+
+ r0 = x1[0] - x2[0]; x1[0] += x2[0];
+ r1 = x2[1] - x1[1]; x1[1] += x2[1];
+ XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T+=step;
+
+ x1-=8; x2-=8;
+ }while(T<sincos_lookup0+1024);
+ do{
+ r0 = x1[6] - x2[6]; x1[6] += x2[6];
+ r1 = x1[7] - x2[7]; x1[7] += x2[7];
+ XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
+
+ r0 = x1[4] - x2[4]; x1[4] += x2[4];
+ r1 = x1[5] - x2[5]; x1[5] += x2[5];
+ XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step;
+
+ r0 = x1[2] - x2[2]; x1[2] += x2[2];
+ r1 = x1[3] - x2[3]; x1[3] += x2[3];
+ XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
+
+ r0 = x1[0] - x2[0]; x1[0] += x2[0];
+ r1 = x1[1] - x2[1]; x1[1] += x2[1];
+ XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T-=step;
+
+ x1-=8; x2-=8;
+ }while(T>sincos_lookup0);
+ do{
+ r0 = x2[6] - x1[6]; x1[6] += x2[6];
+ r1 = x2[7] - x1[7]; x1[7] += x2[7];
+ XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
+
+ r0 = x2[4] - x1[4]; x1[4] += x2[4];
+ r1 = x2[5] - x1[5]; x1[5] += x2[5];
+ XPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T+=step;
- r0 = x1[6] - x2[6];
- r1 = x1[7] - x2[7];
- x1[6] += x2[6];
- x1[7] += x2[7];
- x2[6] = MULT31(r0 , *T) - MULT31(r1 , *V);
- x2[7] = MULT31(r1 , *T) + MULT31(r0 , *V);
- T -= step;
- V += step;
-
- r0 = x1[4] - x2[4];
- r1 = x1[5] - x2[5];
- x1[4] += x2[4];
- x1[5] += x2[5];
- x2[4] = MULT31(r0 , *T) - MULT31(r1 , *V);
- x2[5] = MULT31(r1 , *T) + MULT31(r0 , *V);
- T -= step;
- V += step;
-
- r0 = x1[2] - x2[2];
- r1 = x1[3] - x2[3];
- x1[2] += x2[2];
- x1[3] += x2[3];
- x2[2] = MULT31(r0 , *T) - MULT31(r1 , *V);
- x2[3] = MULT31(r1 , *T) + MULT31(r0 , *V);
- T -= step;
- V += step;
-
- r0 = x1[0] - x2[0];
- r1 = x1[1] - x2[1];
- x1[0] += x2[0];
- x1[1] += x2[1];
- x2[0] = MULT31(r0 , *T) - MULT31(r1 , *V);
- x2[1] = MULT31(r1 , *T) + MULT31(r0 , *V);
- T -= step;
- V += step;
-
- x1-=8;
- x2-=8;
- }while(T>sin_lookup);
+ r0 = x2[2] - x1[2]; x1[2] += x2[2];
+ r1 = x2[3] - x1[3]; x1[3] += x2[3];
+ XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
+ r0 = x2[0] - x1[0]; x1[0] += x2[0];
+ r1 = x2[1] - x1[1]; x1[1] += x2[1];
+ XPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T+=step;
+
+ x1-=8; x2-=8;
+ }while(T<sincos_lookup0+1024);
do{
-
- r0 = x2[6] - x1[6];
- r1 = x2[7] - x1[7];
- x1[6] += x2[6];
- x1[7] += x2[7];
- x2[6] = MULT31(r0 , *T) + MULT31(r1 , *V);
- x2[7] = MULT31(r1 , *T) - MULT31(r0 , *V);
- T += step;
- V -= step;
-
- r0 = x2[4] - x1[4];
- r1 = x2[5] - x1[5];
- x1[4] += x2[4];
- x1[5] += x2[5];
- x2[4] = MULT31(r0 , *T) + MULT31(r1 , *V);
- x2[5] = MULT31(r1 , *T) - MULT31(r0 , *V);
- T += step;
- V -= step;
-
- r0 = x2[2] - x1[2];
- r1 = x2[3] - x1[3];
- x1[2] += x2[2];
- x1[3] += x2[3];
- x2[2] = MULT31(r0 , *T) + MULT31(r1 , *V);
- x2[3] = MULT31(r1 , *T) - MULT31(r0 , *V);
- T += step;
- V -= step;
-
- r0 = x2[0] - x1[0];
- r1 = x2[1] - x1[1];
- x1[0] += x2[0];
- x1[1] += x2[1];
- x2[0] = MULT31(r0 , *T) + MULT31(r1 , *V);
- x2[1] = MULT31(r1 , *T) - MULT31(r0 , *V);
- T += step;
- V -= step;
-
- x1-=8;
- x2-=8;
- }while(x2>=x);
+ r0 = x1[6] - x2[6]; x1[6] += x2[6];
+ r1 = x2[7] - x1[7]; x1[7] += x2[7];
+ XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
+
+ r0 = x1[4] - x2[4]; x1[4] += x2[4];
+ r1 = x2[5] - x1[5]; x1[5] += x2[5];
+ XNPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T-=step;
+
+ r0 = x1[2] - x2[2]; x1[2] += x2[2];
+ r1 = x2[3] - x1[3]; x1[3] += x2[3];
+ XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
+
+ r0 = x1[0] - x2[0]; x1[0] += x2[0];
+ r1 = x2[1] - x1[1]; x1[1] += x2[1];
+ XNPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T-=step;
+
+ x1-=8; x2-=8;
+ }while(T>sincos_lookup0);
}
STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
@@ -286,53 +261,83 @@ STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){
int bit = 0;
DATA_TYPE *w0 = x;
DATA_TYPE *w1 = x = w0+(n>>1);
- DATA_TYPE *T = sin_lookup-(step>>1);
- DATA_TYPE *V = sin_lookup+2048+(step>>1);
- REG_TYPE r2;
+ DATA_TYPE *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
+ DATA_TYPE *Ttop = T+1024;
+ DATA_TYPE r2;
do{
- REG_TYPE r3 = bitrev12(bit++);
+ DATA_TYPE r3 = bitrev12(bit++);
DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
DATA_TYPE *x1 = x + (r3>>shift);
- REG_TYPE r0 = x0[1] - x1[1];
+ REG_TYPE r0 = x1[1] - x0[1];
REG_TYPE r1 = x0[0] + x1[0];
- T += step;
- V -= step;
- r2 = MULT32(r0 , *T) - MULT32(r1 , *V);
- r3 = MULT32(r1 , *T) + MULT32(r0 , *V);
+
+ XPROD32( T[0], T[1], r0, r1, &r2, &r3 ); T+=step;
w1 -= 4;
- r0 = (x0[1] + x1[1])>>1;
+ r0 = (x0[1] + x1[1])>>1;
r1 = (x0[0] - x1[0])>>1;
-
- w0[0] = r0 - r2;
- w1[2] = r0 + r2;
- w0[1] = r1 - r3;
- w1[3] =-r1 - r3;
+ w0[0] = r0 + r2;
+ w0[1] = r1 + r3;
+ w1[2] = r0 - r2;
+ w1[3] = r3 - r1;
r3 = bitrev12(bit++);
x0 = x + ((r3 ^ 0xfff)>>shift) -1;
x1 = x + (r3>>shift);
- r0 = x0[1] - x1[1];
+ r0 = x1[1] - x0[1];
r1 = x0[0] + x1[0];
- T += step;
- V -= step;
- r2 = MULT32(r0 , *T) - MULT32(r1 , *V);
- r3 = MULT32(r1 , *T) + MULT32(r0 , *V);
+
+ XPROD32( T[0], T[1], r0, r1, &r2, &r3 ); T+=step;
r0 = (x0[1] + x1[1])>>1;
r1 = (x0[0] - x1[0])>>1;
-
- w0[2] = r0 - r2;
- w1[0] = r0 + r2;
- w0[3] = r1 - r3;
- w1[1] =-r1 - r3;
+ w0[2] = r0 + r2;
+ w0[3] = r1 + r3;
+ w1[0] = r0 - r2;
+ w1[1] = r3 - r1;
w0 += 4;
+ }while(T<Ttop);
+ do{
+ DATA_TYPE r3 = bitrev12(bit++);
+ DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1;
+ DATA_TYPE *x1 = x + (r3>>shift);
+
+ REG_TYPE r0 = x0[0] + x1[0];
+ REG_TYPE r1 = x1[1] - x0[1];
+ T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
+
+ w1 -= 4;
+
+ r0 = (x0[1] + x1[1])>>1;
+ r1 = (x0[0] - x1[0])>>1;
+ w0[0] = r0 + r2;
+ w0[1] = r1 + r3;
+ w1[2] = r0 - r2;
+ w1[3] = r3 - r1;
+
+ r3 = bitrev12(bit++);
+ x0 = x + ((r3 ^ 0xfff)>>shift) -1;
+ x1 = x + (r3>>shift);
+
+ r0 = x0[0] + x1[0];
+ r1 = x1[1] - x0[1];
+
+ T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
+
+ r0 = (x0[1] + x1[1])>>1;
+ r1 = (x0[0] - x1[0])>>1;
+ w0[2] = r0 + r2;
+ w0[3] = r1 + r3;
+ w1[0] = r0 - r2;
+ w1[1] = r3 - r1;
+
+ w0 += 4;
}while(w0<w1);
}
@@ -354,45 +359,36 @@ void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){
iX = in+n2-7;
oX = out+n2+n4;
- T = sin_lookup-step;
- V = sin_lookup+2048+step;
+ T = sincos_lookup0;
do{
-
- oX -= 4;
-
- T += step;
- V -= step;
- oX[2] = MULT31(iX[4] , *T) + MULT31(iX[6] , *V);
- oX[3] = MULT31(iX[6] , *T) - MULT31(iX[4] , *V);
- T += step;
- V -= step;
- oX[0] = MULT31(iX[0] , *T) + MULT31(iX[2] , *V);
- oX[1] = MULT31(iX[2] , *T) - MULT31(iX[0] , *V);
-
- iX -= 8;
-
+ oX-=4;
+ XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step;
+ XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step;
+ iX-=8;
+ }while(iX>=in+n4);
+ do{
+ oX-=4;
+ XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step;
+ XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step;
+ iX-=8;
}while(iX>=in);
iX = in+n2-8;
oX = out+n2+n4;
- T = sin_lookup;
- V = sin_lookup+2048;
+ T = sincos_lookup0;
do{
-
- T += step;
- V -= step;
- oX[0] = MULT31(iX[6] , *T) - MULT31(iX[4] , *V);
- oX[1] = MULT31(iX[4] , *T) + MULT31(iX[6] , *V);
- T += step;
- V -= step;
- oX[2] = MULT31(iX[2] , *T) - MULT31(iX[0] , *V);
- oX[3] = MULT31(iX[0] , *T) + MULT31(iX[2] , *V);
-
- iX -= 8;
- oX += 4;
-
+ T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] );
+ T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] );
+ iX-=8;
+ oX+=4;
+ }while(iX>=in+n4);
+ do{
+ T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] );
+ T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] );
+ iX-=8;
+ oX+=4;
}while(iX>=in);
mdct_butterflies(out+n2,n2,shift);
@@ -405,39 +401,85 @@ void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){
DATA_TYPE *oX1=out+n2+n4;
DATA_TYPE *oX2=out+n2+n4;
DATA_TYPE *iX =out;
- T =sin_lookup-(step>>1);
- V =sin_lookup+2048+(step>>1);
-
- do{
- oX1-=4;
-
- T += step;
- V -= step;
- oX1[3] = MULT31 (iX[0] , *T) - MULT31(iX[1] , *V);
- oX2[0] =-(MULT31 (iX[0] , *V) + MULT31(iX[1] , *T));
-
- T += step;
- V -= step;
- oX1[2] = MULT31 (iX[2] , *T) - MULT31(iX[3] , *V);
- oX2[1] =-(MULT31 (iX[2] , *V) + MULT31(iX[3] , *T));
-
- if(!step) T++,V--;
- T += step;
- V -= step;
- oX1[1] = MULT31 (iX[4] , *T) - MULT31(iX[5] , *V);
- oX2[2] =-(MULT31 (iX[4] , *V) + MULT31(iX[5] , *T));
-
- T += step;
- V -= step;
- oX1[0] = MULT31 (iX[6] , *T) - MULT31(iX[7] , *V);
- oX2[3] =-(MULT31 (iX[6] , *V) + MULT31(iX[7] , *T));
-
- if(!step) T++,V--;
-
- oX2+=4;
- iX += 8;
- }while(iX<oX1);
+ switch(step) {
+ default: {
+ T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
+ do{
+ oX1-=4;
+ XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step;
+ XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step;
+ XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step;
+ XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step;
+ oX2+=4;
+ iX+=8;
+ }while(iX<oX1);
+ break;
+ }
+
+ case 1: {
+ /* linear interpolation between table values: offset=0.5, step=1 */
+ REG_TYPE t0,t1,v0,v1;
+ T = sincos_lookup0;
+ V = sincos_lookup1;
+ t0 = (*T++)>>1;
+ t1 = (*T++)>>1;
+ do{
+ oX1-=4;
+
+ t0 += (v0 = (*V++)>>1);
+ t1 += (v1 = (*V++)>>1);
+ XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] ); T+=step;
+ v0 += (t0 = (*T++)>>1);
+ v1 += (t1 = (*T++)>>1);
+ XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] ); T+=step;
+ t0 += (v0 = (*V++)>>1);
+ t1 += (v1 = (*V++)>>1);
+ XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] ); T+=step;
+ v0 += (t0 = (*T++)>>1);
+ v1 += (t1 = (*T++)>>1);
+ XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] ); T+=step;
+
+ oX2+=4;
+ iX+=8;
+ }while(iX<oX1);
+ break;
+ }
+
+ case 0: {
+ /* linear interpolation between table values: offset=0.25, step=0.5 */
+ REG_TYPE t0,t1,v0,v1,q0,q1;
+ T = sincos_lookup0;
+ V = sincos_lookup1;
+ t0 = *T++;
+ t1 = *T++;
+ do{
+ oX1-=4;
+
+ v0 = *V++;
+ v1 = *V++;
+ t0 += (q0 = (v0-t0)>>2);
+ t1 += (q1 = (v1-t1)>>2);
+ XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] ); T+=step;
+ t0 = v0-q0;
+ t1 = v1-q1;
+ XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] ); T+=step;
+
+ t0 = *T++;
+ t1 = *T++;
+ v0 += (q0 = (t0-v0)>>2);
+ v1 += (q1 = (t1-v1)>>2);
+ XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] ); T+=step;
+ v0 = t0-q0;
+ v1 = t1-q1;
+ XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] ); T+=step;
+
+ oX2+=4;
+ iX+=8;
+ }while(iX<oX1);
+ break;
+ }
+ }
iX=out+n2+n4;
oX1=out+n4;