merged yesterdays optimizations with yesterday's other optimizations :)

also commited Chris Hanson's fix :) svn path=/branches/branch_beta3/vorbis/; revision=739
author: Jack Moffitt <jack@xiph.org> 2000-10-19 21:56:41 +0000
committer: Jack Moffitt <jack@xiph.org> 2000-10-19 21:56:41 +0000
commit: 176cd40cc4004507c7a8f0264dc4244588275e7a (patch)
tree: 526f06adbf3670f0b3bea0211b77767c5cfed362
parent: b074c6e416dcabef56ebbcce6ae4e5e1c12af088 (diff)
download: libvorbis-git-176cd40cc4004507c7a8f0264dc4244588275e7a.tar.gz
3 files changed, 378 insertions, 7 deletions
diff --git a/lib/lsp.c b/lib/lsp.c
index d78be58e..6e474900 100644
--- a/lib/lsp.c
+++ b/lib/lsp.c
@@ -12,7 +12,7 @@
  ********************************************************************
 
   function: LSP (also called LSF) conversion routines
-  last mod: $Id: lsp.c,v 1.10.2.1 2000/10/19 10:21:02 xiphmont Exp $
+  last mod: $Id: lsp.c,v 1.10.2.2 2000/10/19 21:56:40 jack Exp $
 
   The LSP generation code is taken (with minimal modification) from
   "On the Computation of the LSP Frequencies" by Joseph Rothweiler
@@ -77,9 +77,14 @@ void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m,
     float p=.7071067812;
     float q=.7071067812;
     float w=vorbis_coslook(wdel*k);
+    float *ftmp=lsp;
+    int c=m>>1;
 
-    for(j=0;j<m;j+=2)    p *= lsp[j]-w;
-    for(j=1;j<m;j+=2)    q *= lsp[j]-w;
+    do{
+      p*=ftmp[0]-w;
+      q*=ftmp[1]-w;
+      ftmp+=2;
+    }while(--c);
 
     q=frexp(p*p*(1.+w)+q*q*(1.-w),&qexp);
     q=vorbis_fromdBlook(amp*             
@@ -87,8 +92,9 @@ void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m,
 			vorbis_invsq2explook(qexp+m)- 
 			ampoffset);
 
-    curve[i++]=q;
-    while(map[i]==k)curve[i++]=q;
+    do{
+      curve[i++]=q;
+    }while(map[i]==k);
   }
   vorbis_fpu_restore(fpu);
 }
diff --git a/lib/mdct.c b/lib/mdct.c
new file mode 100644
index 00000000..34eda1e7
--- /dev/null
+++ b/lib/mdct.c
@@ -0,0 +1,365 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE.  *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
+ * THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE.    *
+ * PLEASE READ THESE TERMS DISTRIBUTING.                            *
+ *                                                                  *
+ * THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000             *
+ * by Monty <monty@xiph.org> and The XIPHOPHORUS Company            *
+ * http://www.xiph.org/                                             *
+ *                                                                  *
+ ********************************************************************
+
+ function: normalized modified discrete cosine transform
+           power of two length transform only [16 <= n ]
+ last mod: $Id: mdct.c,v 1.17.2.1 2000/10/19 21:56:40 jack Exp $
+
+ Algorithm adapted from _The use of multirate filter banks for coding
+ of high quality digital audio_, by T. Sporer, K. Brandenburg and
+ B. Edler, collection of the European Signal Processing Conference
+ (EUSIPCO), Amsterdam, June 1992, Vol.1, pp 211-214 
+
+ Note that the below code won't make much sense without the paper;
+ The presented algorithm was already fairly polished, and the code
+ once followed it closely.  The current code both corrects several
+ typos in the paper and goes beyond the presented optimizations 
+ (steps 4 through 6 are, for example, entirely eliminated).
+
+ This module DOES NOT INCLUDE code to generate the window function.
+ Everybody has their own weird favorite including me... I happen to
+ like the properties of y=sin(2PI*sin^2(x)), but others may vehemently
+ disagree.
+
+ ********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "mdct.h"
+#include "os.h"
+#include "misc.h"
+
+/* build lookups for trig functions; also pre-figure scaling and
+   some window function algebra. */
+
+void mdct_init(mdct_lookup *lookup,int n){
+  int    *bitrev=malloc(sizeof(int)*(n/4));
+  float *trig=malloc(sizeof(float)*(n+n/4));
+  float *AE=trig;
+  float *AO=trig+1;
+  float *BE=AE+n/2;
+  float *BO=BE+1;
+  float *CE=BE+n/2;
+  float *CO=CE+1;
+  
+  int i;
+  int log2n=lookup->log2n=rint(log(n)/log(2));
+  lookup->n=n;
+  lookup->trig=trig;
+  lookup->bitrev=bitrev;
+
+  /* trig lookups... */
+
+  for(i=0;i<n/4;i++){
+    AE[i*2]=cos((M_PI/n)*(4*i));
+    AO[i*2]=-sin((M_PI/n)*(4*i));
+    BE[i*2]=cos((M_PI/(2*n))*(2*i+1));
+    BO[i*2]=sin((M_PI/(2*n))*(2*i+1));
+  }
+  for(i=0;i<n/8;i++){
+    CE[i*2]=cos((M_PI/n)*(4*i+2));
+    CO[i*2]=-sin((M_PI/n)*(4*i+2));
+  }
+
+  /* bitreverse lookup... */
+
+  {
+    int mask=(1<<(log2n-1))-1,i,j;
+    int msb=1<<(log2n-2);
+    for(i=0;i<n/8;i++){
+      int acc=0;
+      for(j=0;msb>>j;j++)
+	if((msb>>j)&i)acc|=1<<j;
+      bitrev[i*2]=((~acc)&mask);
+      bitrev[i*2+1]=acc;
+    }
+  }
+}
+
+void mdct_clear(mdct_lookup *l){
+  if(l){
+    if(l->trig)free(l->trig);
+    if(l->bitrev)free(l->bitrev);
+    memset(l,0,sizeof(mdct_lookup));
+  }
+}
+
+static float *_mdct_kernel(float *x, float *w,
+			    int n, int n2, int n4, int n8,
+			    mdct_lookup *init){
+  int i;
+  /* step 2 */
+
+  {
+    float *xA=x+n4;
+    float *xB=x;
+    float *w2=w+n4;
+    float *A=init->trig+n2;
+
+    float x0,x1;
+    i=0;
+    do{
+      x0=*xA - *xB;
+      w2[i]=    *xA++ + *xB++;
+      x1=       *xA - *xB;
+      A-=4;
+      w[i++]=   x0 * A[0] + x1 * A[1];
+      w[i]=     x1 * A[0] - x0 * A[1];
+      w2[i++]=  *xA++ + *xB++;
+    }while(i<n4);
+  }
+
+  /* step 3 */
+
+  {
+    int r,s;
+    for(i=0;i<init->log2n-3;i++){
+      int k0=n>>(i+2);
+      int k1=1<<(i+3);
+      int wbase=n2-2;
+      float *A=init->trig;
+      float *temp;
+
+      for(r=0;r<(k0>>2);r++){
+        int w1=wbase;
+	int w2=w1-(k0>>1);
+	float AEv= A[0],wA;
+	float AOv= A[1],wB;
+        int blah=1;
+	wbase-=2;
+
+	k0++;
+        blah--;
+        if(blah>0){
+          s=2<<blah;
+          s>>=1;
+          do{
+	  wB     =w[w1]   -w[w2];
+	  x[w1]  =w[w1]   +w[w2];
+	  wA     =w[++w1] -w[++w2];
+	  x[w1]  =w[w1]   +w[w2];
+	  x[w2]  =wA*AEv  - wB*AOv;
+	  x[w2-1]=wB*AEv  + wA*AOv;
+	  w1-=k0;
+	  w2-=k0;
+	  wB     =w[w1]   -w[w2];
+	  x[w1]  =w[w1]   +w[w2];
+	  wA     =w[++w1] -w[++w2];
+	  x[w1]  =w[w1]   +w[w2];
+	  x[w2]  =wA*AEv  - wB*AOv;
+	  x[w2-1]=wB*AEv  + wA*AOv;
+	  w1-=k0;
+	  w2-=k0;
+	  wB     =w[w1]   -w[w2];
+	  x[w1]  =w[w1]   +w[w2];
+	  wA     =w[++w1] -w[++w2];
+	  x[w1]  =w[w1]   +w[w2];
+	  x[w2]  =wA*AEv  - wB*AOv;
+	  x[w2-1]=wB*AEv  + wA*AOv;
+	  w1-=k0;
+	  w2-=k0;
+	  wB     =w[w1]   -w[w2];
+	  x[w1]  =w[w1]   +w[w2];
+	  wA     =w[++w1] -w[++w2];
+	  x[w1]  =w[w1]   +w[w2];
+	  x[w2]  =wA*AEv  - wB*AOv;
+	  x[w2-1]=wB*AEv  + wA*AOv;
+	  w1-=k0;
+	  w2-=k0;
+          }while(--s);
+        }else{
+          s=2<<i;
+          do{
+          wB     =w[w1]   -w[w2];
+          x[w1]  =w[w1]   +w[w2];
+          wA     =w[++w1] -w[++w2];
+          x[w1]  =w[w1]   +w[w2];
+          x[w2]  =wA*AEv  - wB*AOv;
+          x[w2-1]=wB*AEv  + wA*AOv;
+          w1-=k0;
+          w2-=k0;
+          }while(--s);
+	}
+	k0--;
+
+	A+=k1;
+      }
+
+      temp=w;
+      w=x;
+      x=temp;
+    }
+  }
+
+  /* step 4, 5, 6, 7 */
+  {
+    float *C=init->trig+n;
+    int *bit=init->bitrev;
+    float *x1=x;
+    float *x2=x+n2-1;
+    i=n8-1;
+    do{
+      int t1=*bit++;
+      int t2=*bit++;
+
+      float wA=w[t1]-w[t2+1];
+      float wB=w[t1-1]+w[t2];
+      float wC=w[t1]+w[t2+1];
+      float wD=w[t1-1]-w[t2];
+
+      float wACE=wA* *C;
+      float wBCE=wB* *C++;
+      float wACO=wA* *C;
+      float wBCO=wB* *C++;
+      
+      *x1++=( wC+wACO+wBCE)*.5;
+      *x2--=(-wD+wBCO-wACE)*.5;
+      *x1++=( wD+wBCO-wACE)*.5; 
+      *x2--=( wC-wACO-wBCE)*.5;
+    }while(i--);
+  }
+  return(x);
+}
+
+void mdct_forward(mdct_lookup *init, float *in, float *out){
+  int n=init->n;
+  float *x=alloca(sizeof(float)*(n/2));
+  float *w=alloca(sizeof(float)*(n/2));
+  float *xx;
+  int n2=n>>1;
+  int n4=n>>2;
+  int n8=n>>3;
+  int i;
+
+  /* window + rotate + step 1 */
+  {
+    float tempA,tempB;
+    int in1=n2+n4-4;
+    int in2=in1+5;
+    float *A=init->trig+n2;
+
+    i=0;
+    
+    for(i=0;i<n8;i+=2){
+      A-=2;
+      tempA= in[in1+2] + in[in2];
+      tempB= in[in1] + in[in2+2];       
+      in1 -=4;in2 +=4;
+      x[i]=   tempB*A[1] + tempA*A[0];
+      x[i+1]= tempB*A[0] - tempA*A[1];
+    }
+
+    in2=1;
+
+    for(;i<n2-n8;i+=2){
+      A-=2;
+      tempA= in[in1+2] - in[in2];
+      tempB= in[in1] - in[in2+2];       
+      in1 -=4;in2 +=4;
+      x[i]=   tempB*A[1] + tempA*A[0];
+      x[i+1]= tempB*A[0] - tempA*A[1];
+    }
+    
+    in1=n-4;
+
+    for(;i<n2;i+=2){
+      A-=2;
+      tempA= -in[in1+2] - in[in2];
+      tempB= -in[in1] - in[in2+2];       
+      in1 -=4;in2 +=4;
+      x[i]=   tempB*A[1] + tempA*A[0];
+      x[i+1]= tempB*A[0] - tempA*A[1];
+    }
+  }
+
+  xx=_mdct_kernel(x,w,n,n2,n4,n8,init);
+
+  /* step 8 */
+
+  {
+    float *B=init->trig+n2;
+    float *out2=out+n2;
+    float scale=4./n;
+    for(i=0;i<n4;i++){
+      out[i]   =(xx[0]*B[0]+xx[1]*B[1])*scale;
+      *(--out2)=(xx[0]*B[1]-xx[1]*B[0])*scale;
+
+      xx+=2;
+      B+=2;
+    }
+  }
+}
+
+void mdct_backward(mdct_lookup *init, float *in, float *out){
+  int n=init->n;
+  float *x=alloca(sizeof(float)*(n/2));
+  float *w=alloca(sizeof(float)*(n/2));
+  float *xx;
+  int n2=n>>1;
+  int n4=n>>2;
+  int n8=n>>3;
+  int i;
+
+  /* rotate + step 1 */
+  {
+    float *inO=in+1;
+    float  *xO= x;
+    float  *A=init->trig+n2;
+
+    for(i=0;i<n8;i++){
+      A-=2;
+      *xO++=-*(inO+2)*A[1] - *inO*A[0];
+      *xO++= *inO*A[1] - *(inO+2)*A[0];
+      inO+=4;
+    }
+
+    inO=in+n2-4;
+
+    for(i=0;i<n8;i++){
+      A-=2;
+      *xO++=*inO*A[1] + *(inO+2)*A[0];
+      *xO++=*inO*A[0] - *(inO+2)*A[1];
+      inO-=4;
+    }
+
+  }
+
+  xx=_mdct_kernel(x,w,n,n2,n4,n8,init);
+
+  /* step 8 */
+
+  {
+    float *B=init->trig+n2;
+    int o1=n4,o2=o1-1;
+    int o3=n4+n2,o4=o3-1;
+    
+    for(i=0;i<n4;i++){
+      float temp1= (*xx * B[1] - *(xx+1) * B[0]);
+      float temp2=-(*xx * B[0] + *(xx+1) * B[1]);
+    
+      out[o1]=-temp1;
+      out[o2]= temp1;
+      out[o3]= temp2;
+      out[o4]= temp2;
+
+      o1++;
+      o2--;
+      o3++;
+      o4--;
+      xx+=2;
+      B+=2;
+    }
+  }
+}
diff --git a/lib/os.h b/lib/os.h
index 1b3cf6a0..c304d889 100644
--- a/lib/os.h
+++ b/lib/os.h
@@ -14,7 +14,7 @@
  ********************************************************************
 
  function: #ifdef jail to whip a few platforms into the UNIX ideal.
- last mod: $Id: os.h,v 1.10.2.1 2000/10/19 10:21:02 xiphmont Exp $
+ last mod: $Id: os.h,v 1.10.2.2 2000/10/19 21:56:41 jack Exp $
 
  ********************************************************************/
 
@@ -102,7 +102,7 @@ static int vorbis_ftoi(double f){
 }
 
 
-typedef vorbis_fpu_control int;
+typedef int vorbis_fpu_control;
 
 static inline void vorbis_fpu_setround(vorbis_fpu_control *fpu){
 }
author	Jack Moffitt <jack@xiph.org>	2000-10-19 21:56:41 +0000
committer	Jack Moffitt <jack@xiph.org>	2000-10-19 21:56:41 +0000
commit	176cd40cc4004507c7a8f0264dc4244588275e7a (patch)
tree	526f06adbf3670f0b3bea0211b77767c5cfed362
parent	b074c6e416dcabef56ebbcce6ae4e5e1c12af088 (diff)
download	libvorbis-git-176cd40cc4004507c7a8f0264dc4244588275e7a.tar.gz