diff options
author | Jack Moffitt <jack@xiph.org> | 2000-10-19 21:56:41 +0000 |
---|---|---|
committer | Jack Moffitt <jack@xiph.org> | 2000-10-19 21:56:41 +0000 |
commit | 176cd40cc4004507c7a8f0264dc4244588275e7a (patch) | |
tree | 526f06adbf3670f0b3bea0211b77767c5cfed362 | |
parent | b074c6e416dcabef56ebbcce6ae4e5e1c12af088 (diff) | |
download | libvorbis-git-176cd40cc4004507c7a8f0264dc4244588275e7a.tar.gz |
merged yesterdays optimizations with yesterday's other optimizations :)
also commited Chris Hanson's fix :)
svn path=/branches/branch_beta3/vorbis/; revision=739
-rw-r--r-- | lib/lsp.c | 16 | ||||
-rw-r--r-- | lib/mdct.c | 365 | ||||
-rw-r--r-- | lib/os.h | 4 |
3 files changed, 378 insertions, 7 deletions
@@ -12,7 +12,7 @@ ******************************************************************** function: LSP (also called LSF) conversion routines - last mod: $Id: lsp.c,v 1.10.2.1 2000/10/19 10:21:02 xiphmont Exp $ + last mod: $Id: lsp.c,v 1.10.2.2 2000/10/19 21:56:40 jack Exp $ The LSP generation code is taken (with minimal modification) from "On the Computation of the LSP Frequencies" by Joseph Rothweiler @@ -77,9 +77,14 @@ void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m, float p=.7071067812; float q=.7071067812; float w=vorbis_coslook(wdel*k); + float *ftmp=lsp; + int c=m>>1; - for(j=0;j<m;j+=2) p *= lsp[j]-w; - for(j=1;j<m;j+=2) q *= lsp[j]-w; + do{ + p*=ftmp[0]-w; + q*=ftmp[1]-w; + ftmp+=2; + }while(--c); q=frexp(p*p*(1.+w)+q*q*(1.-w),&qexp); q=vorbis_fromdBlook(amp* @@ -87,8 +92,9 @@ void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m, vorbis_invsq2explook(qexp+m)- ampoffset); - curve[i++]=q; - while(map[i]==k)curve[i++]=q; + do{ + curve[i++]=q; + }while(map[i]==k); } vorbis_fpu_restore(fpu); } diff --git a/lib/mdct.c b/lib/mdct.c new file mode 100644 index 00000000..34eda1e7 --- /dev/null +++ b/lib/mdct.c @@ -0,0 +1,365 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY * + * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. * + * PLEASE READ THESE TERMS DISTRIBUTING. * + * * + * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 * + * by Monty <monty@xiph.org> and The XIPHOPHORUS Company * + * http://www.xiph.org/ * + * * + ******************************************************************** + + function: normalized modified discrete cosine transform + power of two length transform only [16 <= n ] + last mod: $Id: mdct.c,v 1.17.2.1 2000/10/19 21:56:40 jack Exp $ + + Algorithm adapted from _The use of multirate filter banks for coding + of high quality digital audio_, by T. Sporer, K. Brandenburg and + B. Edler, collection of the European Signal Processing Conference + (EUSIPCO), Amsterdam, June 1992, Vol.1, pp 211-214 + + Note that the below code won't make much sense without the paper; + The presented algorithm was already fairly polished, and the code + once followed it closely. The current code both corrects several + typos in the paper and goes beyond the presented optimizations + (steps 4 through 6 are, for example, entirely eliminated). + + This module DOES NOT INCLUDE code to generate the window function. + Everybody has their own weird favorite including me... I happen to + like the properties of y=sin(2PI*sin^2(x)), but others may vehemently + disagree. + + ********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include "mdct.h" +#include "os.h" +#include "misc.h" + +/* build lookups for trig functions; also pre-figure scaling and + some window function algebra. */ + +void mdct_init(mdct_lookup *lookup,int n){ + int *bitrev=malloc(sizeof(int)*(n/4)); + float *trig=malloc(sizeof(float)*(n+n/4)); + float *AE=trig; + float *AO=trig+1; + float *BE=AE+n/2; + float *BO=BE+1; + float *CE=BE+n/2; + float *CO=CE+1; + + int i; + int log2n=lookup->log2n=rint(log(n)/log(2)); + lookup->n=n; + lookup->trig=trig; + lookup->bitrev=bitrev; + + /* trig lookups... */ + + for(i=0;i<n/4;i++){ + AE[i*2]=cos((M_PI/n)*(4*i)); + AO[i*2]=-sin((M_PI/n)*(4*i)); + BE[i*2]=cos((M_PI/(2*n))*(2*i+1)); + BO[i*2]=sin((M_PI/(2*n))*(2*i+1)); + } + for(i=0;i<n/8;i++){ + CE[i*2]=cos((M_PI/n)*(4*i+2)); + CO[i*2]=-sin((M_PI/n)*(4*i+2)); + } + + /* bitreverse lookup... */ + + { + int mask=(1<<(log2n-1))-1,i,j; + int msb=1<<(log2n-2); + for(i=0;i<n/8;i++){ + int acc=0; + for(j=0;msb>>j;j++) + if((msb>>j)&i)acc|=1<<j; + bitrev[i*2]=((~acc)&mask); + bitrev[i*2+1]=acc; + } + } +} + +void mdct_clear(mdct_lookup *l){ + if(l){ + if(l->trig)free(l->trig); + if(l->bitrev)free(l->bitrev); + memset(l,0,sizeof(mdct_lookup)); + } +} + +static float *_mdct_kernel(float *x, float *w, + int n, int n2, int n4, int n8, + mdct_lookup *init){ + int i; + /* step 2 */ + + { + float *xA=x+n4; + float *xB=x; + float *w2=w+n4; + float *A=init->trig+n2; + + float x0,x1; + i=0; + do{ + x0=*xA - *xB; + w2[i]= *xA++ + *xB++; + x1= *xA - *xB; + A-=4; + w[i++]= x0 * A[0] + x1 * A[1]; + w[i]= x1 * A[0] - x0 * A[1]; + w2[i++]= *xA++ + *xB++; + }while(i<n4); + } + + /* step 3 */ + + { + int r,s; + for(i=0;i<init->log2n-3;i++){ + int k0=n>>(i+2); + int k1=1<<(i+3); + int wbase=n2-2; + float *A=init->trig; + float *temp; + + for(r=0;r<(k0>>2);r++){ + int w1=wbase; + int w2=w1-(k0>>1); + float AEv= A[0],wA; + float AOv= A[1],wB; + int blah=1; + wbase-=2; + + k0++; + blah--; + if(blah>0){ + s=2<<blah; + s>>=1; + do{ + wB =w[w1] -w[w2]; + x[w1] =w[w1] +w[w2]; + wA =w[++w1] -w[++w2]; + x[w1] =w[w1] +w[w2]; + x[w2] =wA*AEv - wB*AOv; + x[w2-1]=wB*AEv + wA*AOv; + w1-=k0; + w2-=k0; + wB =w[w1] -w[w2]; + x[w1] =w[w1] +w[w2]; + wA =w[++w1] -w[++w2]; + x[w1] =w[w1] +w[w2]; + x[w2] =wA*AEv - wB*AOv; + x[w2-1]=wB*AEv + wA*AOv; + w1-=k0; + w2-=k0; + wB =w[w1] -w[w2]; + x[w1] =w[w1] +w[w2]; + wA =w[++w1] -w[++w2]; + x[w1] =w[w1] +w[w2]; + x[w2] =wA*AEv - wB*AOv; + x[w2-1]=wB*AEv + wA*AOv; + w1-=k0; + w2-=k0; + wB =w[w1] -w[w2]; + x[w1] =w[w1] +w[w2]; + wA =w[++w1] -w[++w2]; + x[w1] =w[w1] +w[w2]; + x[w2] =wA*AEv - wB*AOv; + x[w2-1]=wB*AEv + wA*AOv; + w1-=k0; + w2-=k0; + }while(--s); + }else{ + s=2<<i; + do{ + wB =w[w1] -w[w2]; + x[w1] =w[w1] +w[w2]; + wA =w[++w1] -w[++w2]; + x[w1] =w[w1] +w[w2]; + x[w2] =wA*AEv - wB*AOv; + x[w2-1]=wB*AEv + wA*AOv; + w1-=k0; + w2-=k0; + }while(--s); + } + k0--; + + A+=k1; + } + + temp=w; + w=x; + x=temp; + } + } + + /* step 4, 5, 6, 7 */ + { + float *C=init->trig+n; + int *bit=init->bitrev; + float *x1=x; + float *x2=x+n2-1; + i=n8-1; + do{ + int t1=*bit++; + int t2=*bit++; + + float wA=w[t1]-w[t2+1]; + float wB=w[t1-1]+w[t2]; + float wC=w[t1]+w[t2+1]; + float wD=w[t1-1]-w[t2]; + + float wACE=wA* *C; + float wBCE=wB* *C++; + float wACO=wA* *C; + float wBCO=wB* *C++; + + *x1++=( wC+wACO+wBCE)*.5; + *x2--=(-wD+wBCO-wACE)*.5; + *x1++=( wD+wBCO-wACE)*.5; + *x2--=( wC-wACO-wBCE)*.5; + }while(i--); + } + return(x); +} + +void mdct_forward(mdct_lookup *init, float *in, float *out){ + int n=init->n; + float *x=alloca(sizeof(float)*(n/2)); + float *w=alloca(sizeof(float)*(n/2)); + float *xx; + int n2=n>>1; + int n4=n>>2; + int n8=n>>3; + int i; + + /* window + rotate + step 1 */ + { + float tempA,tempB; + int in1=n2+n4-4; + int in2=in1+5; + float *A=init->trig+n2; + + i=0; + + for(i=0;i<n8;i+=2){ + A-=2; + tempA= in[in1+2] + in[in2]; + tempB= in[in1] + in[in2+2]; + in1 -=4;in2 +=4; + x[i]= tempB*A[1] + tempA*A[0]; + x[i+1]= tempB*A[0] - tempA*A[1]; + } + + in2=1; + + for(;i<n2-n8;i+=2){ + A-=2; + tempA= in[in1+2] - in[in2]; + tempB= in[in1] - in[in2+2]; + in1 -=4;in2 +=4; + x[i]= tempB*A[1] + tempA*A[0]; + x[i+1]= tempB*A[0] - tempA*A[1]; + } + + in1=n-4; + + for(;i<n2;i+=2){ + A-=2; + tempA= -in[in1+2] - in[in2]; + tempB= -in[in1] - in[in2+2]; + in1 -=4;in2 +=4; + x[i]= tempB*A[1] + tempA*A[0]; + x[i+1]= tempB*A[0] - tempA*A[1]; + } + } + + xx=_mdct_kernel(x,w,n,n2,n4,n8,init); + + /* step 8 */ + + { + float *B=init->trig+n2; + float *out2=out+n2; + float scale=4./n; + for(i=0;i<n4;i++){ + out[i] =(xx[0]*B[0]+xx[1]*B[1])*scale; + *(--out2)=(xx[0]*B[1]-xx[1]*B[0])*scale; + + xx+=2; + B+=2; + } + } +} + +void mdct_backward(mdct_lookup *init, float *in, float *out){ + int n=init->n; + float *x=alloca(sizeof(float)*(n/2)); + float *w=alloca(sizeof(float)*(n/2)); + float *xx; + int n2=n>>1; + int n4=n>>2; + int n8=n>>3; + int i; + + /* rotate + step 1 */ + { + float *inO=in+1; + float *xO= x; + float *A=init->trig+n2; + + for(i=0;i<n8;i++){ + A-=2; + *xO++=-*(inO+2)*A[1] - *inO*A[0]; + *xO++= *inO*A[1] - *(inO+2)*A[0]; + inO+=4; + } + + inO=in+n2-4; + + for(i=0;i<n8;i++){ + A-=2; + *xO++=*inO*A[1] + *(inO+2)*A[0]; + *xO++=*inO*A[0] - *(inO+2)*A[1]; + inO-=4; + } + + } + + xx=_mdct_kernel(x,w,n,n2,n4,n8,init); + + /* step 8 */ + + { + float *B=init->trig+n2; + int o1=n4,o2=o1-1; + int o3=n4+n2,o4=o3-1; + + for(i=0;i<n4;i++){ + float temp1= (*xx * B[1] - *(xx+1) * B[0]); + float temp2=-(*xx * B[0] + *(xx+1) * B[1]); + + out[o1]=-temp1; + out[o2]= temp1; + out[o3]= temp2; + out[o4]= temp2; + + o1++; + o2--; + o3++; + o4--; + xx+=2; + B+=2; + } + } +} @@ -14,7 +14,7 @@ ******************************************************************** function: #ifdef jail to whip a few platforms into the UNIX ideal. - last mod: $Id: os.h,v 1.10.2.1 2000/10/19 10:21:02 xiphmont Exp $ + last mod: $Id: os.h,v 1.10.2.2 2000/10/19 21:56:41 jack Exp $ ********************************************************************/ @@ -102,7 +102,7 @@ static int vorbis_ftoi(double f){ } -typedef vorbis_fpu_control int; +typedef int vorbis_fpu_control; static inline void vorbis_fpu_setround(vorbis_fpu_control *fpu){ } |