diff options
author | Monty <xiphmont@xiph.org> | 2002-09-20 00:10:31 +0000 |
---|---|---|
committer | Monty <xiphmont@xiph.org> | 2002-09-20 00:10:31 +0000 |
commit | 1dad9587e30c61a8458fdda12da6248bf130c691 (patch) | |
tree | 028314f1b8071251d480ea836eb1a14e12c9f1f5 /mdct.c | |
parent | 55b25a7aadfb989f50a32cc7d146910de8d6f6cb (diff) | |
download | tremor-1dad9587e30c61a8458fdda12da6248bf130c691.tar.gz |
Latest improvements from Nicolas Pitre. Reviewed by Monty
From Nicolas's notes:
- Includes my previous patch with interpolation code for correct accuracy
with all block sizes.
- Interlaces sin and cos values in the lookup table to reduce register
pressure since only one pointer is required to walk the table instead of
two. This also accounts for better cache locality.
- Split the lookup table into two tables since half of it (one value every
two) is only used in separate section of the code and only with large
block sizes. Therefore the table size used for the common case is reduced
by 2 accounting for yet better cache usage.
- Abstracted all cross products throughout the code so they can be easily
optimized. First this prevents redundant register reloads on ARM due to
the implicit memory access ordering, next this allowed for the
opportunity to hook some inline assembly to perform the actual operation.
- Fix layout of current assembly in asm_arm.h to match GCC's output (more
enjoyable to read when inspecting the final assembly) plus some
constraint correctness issues.
- Added a memory barrier macro to force the compiler not to cache values
into registers or on the stack in some cases.
- Reordered some code for better ARM assembly generation by
the compiler.
git-svn-id: https://svn.xiph.org/trunk/Tremor@3923 0101bb08-14d6-0310-b084-bc0e0c8e3800
Diffstat (limited to 'mdct.c')
-rw-r--r-- | mdct.c | 598 |
1 files changed, 320 insertions, 278 deletions
@@ -13,7 +13,7 @@ function: normalized modified discrete cosine transform power of two length transform only [64 <= n ] - last mod: $Id: mdct.c,v 1.4 2002/09/13 16:37:56 xiphmont Exp $ + last mod: $Id: mdct.c,v 1.5 2002/09/20 00:10:31 xiphmont Exp $ Original algorithm adapted long ago from _The use of multirate filter banks for coding of high quality digital audio_, by T. Sporer, @@ -42,60 +42,56 @@ #include "mdct_lookup.h" #include "misc.h" -/* 8 point butterfly (in place, 4 register) */ + +/* 8 point butterfly (in place) */ STIN void mdct_butterfly_8(DATA_TYPE *x){ - REG_TYPE r0 = x[6] + x[2]; - REG_TYPE r1 = x[6] - x[2]; - REG_TYPE r2 = x[4] + x[0]; - REG_TYPE r3 = x[4] - x[0]; - - x[6] = r0 + r2; - x[4] = r0 - r2; - - r0 = x[5] - x[1]; - r2 = x[7] - x[3]; - x[0] = r1 + r0; - x[2] = r1 - r0; - - r0 = x[5] + x[1]; - r1 = x[7] + x[3]; - x[3] = r2 + r3; - x[1] = r2 - r3; - x[7] = r1 + r0; - x[5] = r1 - r0; - + + REG_TYPE r0 = x[4] + x[0]; + REG_TYPE r1 = x[4] - x[0]; + REG_TYPE r2 = x[5] + x[1]; + REG_TYPE r3 = x[5] - x[1]; + REG_TYPE r4 = x[6] + x[2]; + REG_TYPE r5 = x[6] - x[2]; + REG_TYPE r6 = x[7] + x[3]; + REG_TYPE r7 = x[7] - x[3]; + + x[0] = r5 + r3; + x[1] = r7 - r1; + x[2] = r5 - r3; + x[3] = r7 + r1; + x[4] = r4 - r0; + x[5] = r6 - r2; + x[6] = r4 + r0; + x[7] = r6 + r2; + MB(); } /* 16 point butterfly (in place, 4 register) */ STIN void mdct_butterfly_16(DATA_TYPE *x){ - REG_TYPE r0 = x[1] - x[9]; - REG_TYPE r1 = x[0] - x[8]; - - x[8] += x[0]; - x[9] += x[1]; - x[0] = MULT31((r0 + r1) , cPI2_8); - x[1] = MULT31((r0 - r1) , cPI2_8); - - r0 = x[3] - x[11]; - r1 = x[10] - x[2]; - x[10] += x[2]; - x[11] += x[3]; - x[2] = r0; - x[3] = r1; - - r0 = x[12] - x[4]; - r1 = x[13] - x[5]; - x[12] += x[4]; - x[13] += x[5]; - x[4] = MULT31((r0 - r1) , cPI2_8); - x[5] = MULT31((r0 + r1) , cPI2_8); - - r0 = x[14] - x[6]; - r1 = x[15] - x[7]; - x[14] += x[6]; - x[15] += x[7]; - x[6] = r0; - x[7] = r1; + + REG_TYPE r0, r1; + + r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0]; + r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1]; + x[ 0] = MULT31((r0 + r1) , cPI2_8); + x[ 1] = MULT31((r1 - r0) , cPI2_8); + MB(); + + r0 = x[10] - x[ 2]; x[10] += x[ 2]; + r1 = x[ 3] - x[11]; x[11] += x[ 3]; + x[ 2] = r1; x[ 3] = r0; + MB(); + + r0 = x[12] - x[ 4]; x[12] += x[ 4]; + r1 = x[13] - x[ 5]; x[13] += x[ 5]; + x[ 4] = MULT31((r0 - r1) , cPI2_8); + x[ 5] = MULT31((r0 + r1) , cPI2_8); + MB(); + + r0 = x[14] - x[ 6]; x[14] += x[ 6]; + r1 = x[15] - x[ 7]; x[15] += x[ 7]; + x[ 6] = r0; x[ 7] = r1; + MB(); mdct_butterfly_8(x); mdct_butterfly_8(x+8); @@ -103,161 +99,140 @@ STIN void mdct_butterfly_16(DATA_TYPE *x){ /* 32 point butterfly (in place, 4 register) */ STIN void mdct_butterfly_32(DATA_TYPE *x){ - REG_TYPE r0 = x[30] - x[14]; - REG_TYPE r1 = x[31] - x[15]; - - x[30] += x[14]; - x[31] += x[15]; - x[14] = r0; - x[15] = r1; - - r0 = x[28] - x[12]; - r1 = x[29] - x[13]; - x[28] += x[12]; - x[29] += x[13]; - x[12] = MULT31( r0 , cPI1_8 ) - MULT31( r1 , cPI3_8 ); - x[13] = MULT31( r0 , cPI3_8 ) + MULT31( r1 , cPI1_8 ); - - r0 = x[26] - x[10]; - r1 = x[27] - x[11]; - x[26] += x[10]; - x[27] += x[11]; - x[10] = MULT31(( r0 - r1 ) , cPI2_8); - x[11] = MULT31(( r0 + r1 ) , cPI2_8); - - r0 = x[24] - x[8]; - r1 = x[25] - x[9]; - x[24] += x[8]; - x[25] += x[9]; - x[8] = MULT31( r0 , cPI3_8 ) - MULT31( r1 , cPI1_8 ); - x[9] = MULT31( r1 , cPI3_8 ) + MULT31( r0 , cPI1_8 ); - - r0 = x[22] - x[6]; - r1 = x[7] - x[23]; - x[22] += x[6]; - x[23] += x[7]; - x[6] = r1; - x[7] = r0; - - r0 = x[4] - x[20]; - r1 = x[5] - x[21]; - x[20] += x[4]; - x[21] += x[5]; - x[4] = MULT31( r1 , cPI1_8 ) + MULT31( r0 , cPI3_8 ); - x[5] = MULT31( r1 , cPI3_8 ) - MULT31( r0 , cPI1_8 ); - - r0 = x[2] - x[18]; - r1 = x[3] - x[19]; - x[18] += x[2]; - x[19] += x[3]; - x[2] = MULT31(( r1 + r0 ) , cPI2_8); - x[3] = MULT31(( r1 - r0 ) , cPI2_8); - - r0 = x[0] - x[16]; - r1 = x[1] - x[17]; - x[16] += x[0]; - x[17] += x[1]; - x[0] = MULT31( r1 , cPI3_8 ) + MULT31( r0 , cPI1_8 ); - x[1] = MULT31( r1 , cPI1_8 ) - MULT31( r0 , cPI3_8 ); + + REG_TYPE r0, r1; + + r0 = x[30] - x[14]; x[30] += x[14]; + r1 = x[31] - x[15]; x[31] += x[15]; + x[14] = r0; x[15] = r1; + MB(); + + r0 = x[28] - x[12]; x[28] += x[12]; + r1 = x[29] - x[13]; x[29] += x[13]; + XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] ); + MB(); + + r0 = x[26] - x[10]; x[26] += x[10]; + r1 = x[27] - x[11]; x[27] += x[11]; + x[10] = MULT31((r0 - r1) , cPI2_8); + x[11] = MULT31((r0 + r1) , cPI2_8); + MB(); + + r0 = x[24] - x[ 8]; x[24] += x[ 8]; + r1 = x[25] - x[ 9]; x[25] += x[ 9]; + XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] ); + MB(); + + r0 = x[22] - x[ 6]; x[22] += x[ 6]; + r1 = x[ 7] - x[23]; x[23] += x[ 7]; + x[ 6] = r1; x[ 7] = r0; + MB(); + + r0 = x[ 4] - x[20]; x[20] += x[ 4]; + r1 = x[ 5] - x[21]; x[21] += x[ 5]; + XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] ); + MB(); + + r0 = x[ 2] - x[18]; x[18] += x[ 2]; + r1 = x[ 3] - x[19]; x[19] += x[ 3]; + x[ 2] = MULT31((r1 + r0) , cPI2_8); + x[ 3] = MULT31((r1 - r0) , cPI2_8); + MB(); + + r0 = x[ 0] - x[16]; x[16] += x[ 0]; + r1 = x[ 1] - x[17]; x[17] += x[ 1]; + XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] ); + MB(); mdct_butterfly_16(x); mdct_butterfly_16(x+16); - } /* N/stage point generic N stage butterfly (in place, 2 register) */ STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){ - DATA_TYPE *T=sin_lookup+2048; - DATA_TYPE *V=sin_lookup; - DATA_TYPE *x1 = x + points - 8; - DATA_TYPE *x2 = x + (points>>1) - 8; + DATA_TYPE *T = sincos_lookup0; + DATA_TYPE *x1 = x + points - 8; + DATA_TYPE *x2 = x + (points>>1) - 8; REG_TYPE r0; REG_TYPE r1; do{ + r0 = x1[6] - x2[6]; x1[6] += x2[6]; + r1 = x2[7] - x1[7]; x1[7] += x2[7]; + XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step; + + r0 = x1[4] - x2[4]; x1[4] += x2[4]; + r1 = x2[5] - x1[5]; x1[5] += x2[5]; + XPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T+=step; + + r0 = x1[2] - x2[2]; x1[2] += x2[2]; + r1 = x2[3] - x1[3]; x1[3] += x2[3]; + XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step; + + r0 = x1[0] - x2[0]; x1[0] += x2[0]; + r1 = x2[1] - x1[1]; x1[1] += x2[1]; + XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T+=step; + + x1-=8; x2-=8; + }while(T<sincos_lookup0+1024); + do{ + r0 = x1[6] - x2[6]; x1[6] += x2[6]; + r1 = x1[7] - x2[7]; x1[7] += x2[7]; + XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step; + + r0 = x1[4] - x2[4]; x1[4] += x2[4]; + r1 = x1[5] - x2[5]; x1[5] += x2[5]; + XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step; + + r0 = x1[2] - x2[2]; x1[2] += x2[2]; + r1 = x1[3] - x2[3]; x1[3] += x2[3]; + XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step; + + r0 = x1[0] - x2[0]; x1[0] += x2[0]; + r1 = x1[1] - x2[1]; x1[1] += x2[1]; + XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T-=step; + + x1-=8; x2-=8; + }while(T>sincos_lookup0); + do{ + r0 = x2[6] - x1[6]; x1[6] += x2[6]; + r1 = x2[7] - x1[7]; x1[7] += x2[7]; + XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step; + + r0 = x2[4] - x1[4]; x1[4] += x2[4]; + r1 = x2[5] - x1[5]; x1[5] += x2[5]; + XPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T+=step; - r0 = x1[6] - x2[6]; - r1 = x1[7] - x2[7]; - x1[6] += x2[6]; - x1[7] += x2[7]; - x2[6] = MULT31(r0 , *T) - MULT31(r1 , *V); - x2[7] = MULT31(r1 , *T) + MULT31(r0 , *V); - T -= step; - V += step; - - r0 = x1[4] - x2[4]; - r1 = x1[5] - x2[5]; - x1[4] += x2[4]; - x1[5] += x2[5]; - x2[4] = MULT31(r0 , *T) - MULT31(r1 , *V); - x2[5] = MULT31(r1 , *T) + MULT31(r0 , *V); - T -= step; - V += step; - - r0 = x1[2] - x2[2]; - r1 = x1[3] - x2[3]; - x1[2] += x2[2]; - x1[3] += x2[3]; - x2[2] = MULT31(r0 , *T) - MULT31(r1 , *V); - x2[3] = MULT31(r1 , *T) + MULT31(r0 , *V); - T -= step; - V += step; - - r0 = x1[0] - x2[0]; - r1 = x1[1] - x2[1]; - x1[0] += x2[0]; - x1[1] += x2[1]; - x2[0] = MULT31(r0 , *T) - MULT31(r1 , *V); - x2[1] = MULT31(r1 , *T) + MULT31(r0 , *V); - T -= step; - V += step; - - x1-=8; - x2-=8; - }while(T>sin_lookup); + r0 = x2[2] - x1[2]; x1[2] += x2[2]; + r1 = x2[3] - x1[3]; x1[3] += x2[3]; + XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step; + r0 = x2[0] - x1[0]; x1[0] += x2[0]; + r1 = x2[1] - x1[1]; x1[1] += x2[1]; + XPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T+=step; + + x1-=8; x2-=8; + }while(T<sincos_lookup0+1024); do{ - - r0 = x2[6] - x1[6]; - r1 = x2[7] - x1[7]; - x1[6] += x2[6]; - x1[7] += x2[7]; - x2[6] = MULT31(r0 , *T) + MULT31(r1 , *V); - x2[7] = MULT31(r1 , *T) - MULT31(r0 , *V); - T += step; - V -= step; - - r0 = x2[4] - x1[4]; - r1 = x2[5] - x1[5]; - x1[4] += x2[4]; - x1[5] += x2[5]; - x2[4] = MULT31(r0 , *T) + MULT31(r1 , *V); - x2[5] = MULT31(r1 , *T) - MULT31(r0 , *V); - T += step; - V -= step; - - r0 = x2[2] - x1[2]; - r1 = x2[3] - x1[3]; - x1[2] += x2[2]; - x1[3] += x2[3]; - x2[2] = MULT31(r0 , *T) + MULT31(r1 , *V); - x2[3] = MULT31(r1 , *T) - MULT31(r0 , *V); - T += step; - V -= step; - - r0 = x2[0] - x1[0]; - r1 = x2[1] - x1[1]; - x1[0] += x2[0]; - x1[1] += x2[1]; - x2[0] = MULT31(r0 , *T) + MULT31(r1 , *V); - x2[1] = MULT31(r1 , *T) - MULT31(r0 , *V); - T += step; - V -= step; - - x1-=8; - x2-=8; - }while(x2>=x); + r0 = x1[6] - x2[6]; x1[6] += x2[6]; + r1 = x2[7] - x1[7]; x1[7] += x2[7]; + XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step; + + r0 = x1[4] - x2[4]; x1[4] += x2[4]; + r1 = x2[5] - x1[5]; x1[5] += x2[5]; + XNPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T-=step; + + r0 = x1[2] - x2[2]; x1[2] += x2[2]; + r1 = x2[3] - x1[3]; x1[3] += x2[3]; + XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step; + + r0 = x1[0] - x2[0]; x1[0] += x2[0]; + r1 = x2[1] - x1[1]; x1[1] += x2[1]; + XNPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T-=step; + + x1-=8; x2-=8; + }while(T>sincos_lookup0); } STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){ @@ -286,53 +261,83 @@ STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){ int bit = 0; DATA_TYPE *w0 = x; DATA_TYPE *w1 = x = w0+(n>>1); - DATA_TYPE *T = sin_lookup-(step>>1); - DATA_TYPE *V = sin_lookup+2048+(step>>1); - REG_TYPE r2; + DATA_TYPE *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1; + DATA_TYPE *Ttop = T+1024; + DATA_TYPE r2; do{ - REG_TYPE r3 = bitrev12(bit++); + DATA_TYPE r3 = bitrev12(bit++); DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1; DATA_TYPE *x1 = x + (r3>>shift); - REG_TYPE r0 = x0[1] - x1[1]; + REG_TYPE r0 = x1[1] - x0[1]; REG_TYPE r1 = x0[0] + x1[0]; - T += step; - V -= step; - r2 = MULT32(r0 , *T) - MULT32(r1 , *V); - r3 = MULT32(r1 , *T) + MULT32(r0 , *V); + + XPROD32( T[0], T[1], r0, r1, &r2, &r3 ); T+=step; w1 -= 4; - r0 = (x0[1] + x1[1])>>1; + r0 = (x0[1] + x1[1])>>1; r1 = (x0[0] - x1[0])>>1; - - w0[0] = r0 - r2; - w1[2] = r0 + r2; - w0[1] = r1 - r3; - w1[3] =-r1 - r3; + w0[0] = r0 + r2; + w0[1] = r1 + r3; + w1[2] = r0 - r2; + w1[3] = r3 - r1; r3 = bitrev12(bit++); x0 = x + ((r3 ^ 0xfff)>>shift) -1; x1 = x + (r3>>shift); - r0 = x0[1] - x1[1]; + r0 = x1[1] - x0[1]; r1 = x0[0] + x1[0]; - T += step; - V -= step; - r2 = MULT32(r0 , *T) - MULT32(r1 , *V); - r3 = MULT32(r1 , *T) + MULT32(r0 , *V); + + XPROD32( T[0], T[1], r0, r1, &r2, &r3 ); T+=step; r0 = (x0[1] + x1[1])>>1; r1 = (x0[0] - x1[0])>>1; - - w0[2] = r0 - r2; - w1[0] = r0 + r2; - w0[3] = r1 - r3; - w1[1] =-r1 - r3; + w0[2] = r0 + r2; + w0[3] = r1 + r3; + w1[0] = r0 - r2; + w1[1] = r3 - r1; w0 += 4; + }while(T<Ttop); + do{ + DATA_TYPE r3 = bitrev12(bit++); + DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1; + DATA_TYPE *x1 = x + (r3>>shift); + + REG_TYPE r0 = x0[0] + x1[0]; + REG_TYPE r1 = x1[1] - x0[1]; + T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 ); + + w1 -= 4; + + r0 = (x0[1] + x1[1])>>1; + r1 = (x0[0] - x1[0])>>1; + w0[0] = r0 + r2; + w0[1] = r1 + r3; + w1[2] = r0 - r2; + w1[3] = r3 - r1; + + r3 = bitrev12(bit++); + x0 = x + ((r3 ^ 0xfff)>>shift) -1; + x1 = x + (r3>>shift); + + r0 = x0[0] + x1[0]; + r1 = x1[1] - x0[1]; + + T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 ); + + r0 = (x0[1] + x1[1])>>1; + r1 = (x0[0] - x1[0])>>1; + w0[2] = r0 + r2; + w0[3] = r1 + r3; + w1[0] = r0 - r2; + w1[1] = r3 - r1; + + w0 += 4; }while(w0<w1); } @@ -354,45 +359,36 @@ void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){ iX = in+n2-7; oX = out+n2+n4; - T = sin_lookup-step; - V = sin_lookup+2048+step; + T = sincos_lookup0; do{ - - oX -= 4; - - T += step; - V -= step; - oX[2] = MULT31(iX[4] , *T) + MULT31(iX[6] , *V); - oX[3] = MULT31(iX[6] , *T) - MULT31(iX[4] , *V); - T += step; - V -= step; - oX[0] = MULT31(iX[0] , *T) + MULT31(iX[2] , *V); - oX[1] = MULT31(iX[2] , *T) - MULT31(iX[0] , *V); - - iX -= 8; - + oX-=4; + XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step; + XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step; + iX-=8; + }while(iX>=in+n4); + do{ + oX-=4; + XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step; + XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step; + iX-=8; }while(iX>=in); iX = in+n2-8; oX = out+n2+n4; - T = sin_lookup; - V = sin_lookup+2048; + T = sincos_lookup0; do{ - - T += step; - V -= step; - oX[0] = MULT31(iX[6] , *T) - MULT31(iX[4] , *V); - oX[1] = MULT31(iX[4] , *T) + MULT31(iX[6] , *V); - T += step; - V -= step; - oX[2] = MULT31(iX[2] , *T) - MULT31(iX[0] , *V); - oX[3] = MULT31(iX[0] , *T) + MULT31(iX[2] , *V); - - iX -= 8; - oX += 4; - + T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] ); + T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] ); + iX-=8; + oX+=4; + }while(iX>=in+n4); + do{ + T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] ); + T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] ); + iX-=8; + oX+=4; }while(iX>=in); mdct_butterflies(out+n2,n2,shift); @@ -405,39 +401,85 @@ void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){ DATA_TYPE *oX1=out+n2+n4; DATA_TYPE *oX2=out+n2+n4; DATA_TYPE *iX =out; - T =sin_lookup-(step>>1); - V =sin_lookup+2048+(step>>1); - - do{ - oX1-=4; - - T += step; - V -= step; - oX1[3] = MULT31 (iX[0] , *T) - MULT31(iX[1] , *V); - oX2[0] =-(MULT31 (iX[0] , *V) + MULT31(iX[1] , *T)); - - T += step; - V -= step; - oX1[2] = MULT31 (iX[2] , *T) - MULT31(iX[3] , *V); - oX2[1] =-(MULT31 (iX[2] , *V) + MULT31(iX[3] , *T)); - - if(!step) T++,V--; - T += step; - V -= step; - oX1[1] = MULT31 (iX[4] , *T) - MULT31(iX[5] , *V); - oX2[2] =-(MULT31 (iX[4] , *V) + MULT31(iX[5] , *T)); - - T += step; - V -= step; - oX1[0] = MULT31 (iX[6] , *T) - MULT31(iX[7] , *V); - oX2[3] =-(MULT31 (iX[6] , *V) + MULT31(iX[7] , *T)); - - if(!step) T++,V--; - - oX2+=4; - iX += 8; - }while(iX<oX1); + switch(step) { + default: { + T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1; + do{ + oX1-=4; + XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step; + XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step; + XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step; + XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step; + oX2+=4; + iX+=8; + }while(iX<oX1); + break; + } + + case 1: { + /* linear interpolation between table values: offset=0.5, step=1 */ + REG_TYPE t0,t1,v0,v1; + T = sincos_lookup0; + V = sincos_lookup1; + t0 = (*T++)>>1; + t1 = (*T++)>>1; + do{ + oX1-=4; + + t0 += (v0 = (*V++)>>1); + t1 += (v1 = (*V++)>>1); + XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] ); T+=step; + v0 += (t0 = (*T++)>>1); + v1 += (t1 = (*T++)>>1); + XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] ); T+=step; + t0 += (v0 = (*V++)>>1); + t1 += (v1 = (*V++)>>1); + XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] ); T+=step; + v0 += (t0 = (*T++)>>1); + v1 += (t1 = (*T++)>>1); + XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] ); T+=step; + + oX2+=4; + iX+=8; + }while(iX<oX1); + break; + } + + case 0: { + /* linear interpolation between table values: offset=0.25, step=0.5 */ + REG_TYPE t0,t1,v0,v1,q0,q1; + T = sincos_lookup0; + V = sincos_lookup1; + t0 = *T++; + t1 = *T++; + do{ + oX1-=4; + + v0 = *V++; + v1 = *V++; + t0 += (q0 = (v0-t0)>>2); + t1 += (q1 = (v1-t1)>>2); + XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] ); T+=step; + t0 = v0-q0; + t1 = v1-q1; + XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] ); T+=step; + + t0 = *T++; + t1 = *T++; + v0 += (q0 = (t0-v0)>>2); + v1 += (q1 = (t1-v1)>>2); + XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] ); T+=step; + v0 = t0-q0; + v1 = t1-q1; + XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] ); T+=step; + + oX2+=4; + iX+=8; + }while(iX<oX1); + break; + } + } iX=out+n2+n4; oX1=out+n4; |