The psychoacoustics should be ready. Added noise/tone masking, which

still needs some work (peaks should not be counted so that we can use true RMS0 but it functions if not optimally. Still a big benefit. Monty svn path=/branches/new_acoustics_pending_merge_20000328/vorbis/; revision=357
author: Monty <xiphmont@xiph.org> 2000-05-04 06:13:28 +0000
committer: Monty <xiphmont@xiph.org> 2000-05-04 06:13:28 +0000
commit: f258221ec2e0b127c91673516485878a2a2e5d7a (patch)
tree: 560a06990e8c3e154c5fb7b677d532ec534398b3
parent: 75b30f91b772e5b0af627e9015b302c615b74da4 (diff)
download: libvorbis-git-f258221ec2e0b127c91673516485878a2a2e5d7a.tar.gz
5 files changed, 353 insertions, 288 deletions
diff --git a/lib/mapping0.c b/lib/mapping0.c
index 381ed62b..0dce7ed3 100644
--- a/lib/mapping0.c
+++ b/lib/mapping0.c
@@ -12,7 +12,7 @@
  ********************************************************************
 
  function: channel mapping 0 implementation
- last mod: $Id: mapping0.c,v 1.11.2.2.2.5 2000/04/21 16:35:39 xiphmont Exp $
+ last mod: $Id: mapping0.c,v 1.11.2.2.2.6 2000/05/04 06:13:28 xiphmont Exp $
 
  ********************************************************************/
 
@@ -232,7 +232,6 @@ static int forward(vorbis_block *vb,vorbis_look_mapping *l){
 
   {
     double *floor=_vorbis_block_alloc(vb,n*sizeof(double)/2);
-    double *mask=_vorbis_block_alloc(vb,n*sizeof(double)/2);
     
     for(i=0;i<vi->channels;i++){
       double *pcm=vb->pcm[i];
@@ -245,10 +244,9 @@ static int forward(vorbis_block *vb,vorbis_look_mapping *l){
 	memset(decay,0,n*sizeof(double)/2);
 
       /* perform psychoacoustics; do masking */
-      _vp_tone_tone_mask(look->psy_look+submap,pcm,floor,mask,decay);
+      _vp_compute_mask(look->psy_look+submap,pcm,floor,decay);
  
       _analysis_output("mdct",vb->sequence,pcm,n/2,0,1);
-      _analysis_output("mask",vb->sequence,mask,n/2,0,1);
 
       /* perform floor encoding */
       nonzero[i]=look->floor_func[submap]->
@@ -257,7 +255,7 @@ static int forward(vorbis_block *vb,vorbis_look_mapping *l){
       _analysis_output("floor",vb->sequence,floor,n/2,0,1);
 
       /* apply the floor, do optional noise levelling */
-      _vp_apply_floor(look->psy_look+submap,pcm,floor,mask);
+      _vp_apply_floor(look->psy_look+submap,pcm,floor);
 
       _analysis_output("res",vb->sequence,pcm,n/2,0,0);
 
diff --git a/lib/masking.h b/lib/masking.h
index 08271a5a..11f2e3e9 100644
--- a/lib/masking.h
+++ b/lib/masking.h
@@ -12,7 +12,7 @@
  ********************************************************************
 
  function: masking curve data for psychoacoustics
- last mod: $Id: masking.h,v 1.1.2.2.2.1 2000/05/02 00:28:58 xiphmont Exp $
+ last mod: $Id: masking.h,v 1.1.2.2.2.2 2000/05/04 06:13:28 xiphmont Exp $
 
  ********************************************************************/
 
@@ -42,18 +42,18 @@ double tone_250_40dB_SL[EHMER_MAX]={
  -10, -13, -16, -19, -21, -24, -28, -32,  -900,-900,-900,-900,-900,-900,-900,-900,
 -900,-900,-900,-900,-900,-900,-900,-900};
 double tone_250_60dB_SL[EHMER_MAX]={
--900,-900,-900,-900,-900,-900,-900,-900,   -5,  1,  7, 13, 19, 25, 30, 33,  
+-900,-900,-900,-900,-900,-900,-900, -10,   -5,  1,  7, 13, 19, 25, 30, 33,  
   36,  39,  38,  37,  38,  39,  39,  40,   38, 36, 35, 34, 33, 31, 29, 28,
   28,  28,  25,  20,  14,  10,   5,   0,   -5,-10,-15,-20,-25,-30,-35,-40,  
 -900,-900,-900,-900,-900,-900,-900,-900};
 double tone_250_80dB_SL[EHMER_MAX]={
--900,-900,-900,-900,-900,-900,-900,-900,   10, 17, 24, 30, 37, 41, 48, 49,  
+-900,-900,-900,-900,-900,-900,-900, -10,   10, 17, 24, 30, 37, 41, 48, 49,  
   50,  53,  54,  53,  53,  54,  55,  57,   57, 57, 58, 59, 60, 58, 57, 58,
   59,  58,  57,  54,  52,  50,  49,  47,   46, 47, 46, 44, 43, 42, 41, 40,  
   38,  32,  27,  22,  17,  11,   6,   0};
 
 double tone_500_40dB_SL[EHMER_MAX]={
--900,-900,-900,-900,-900,-900,-900,-900,   -26, -20, -14,  -8,  -2,   4,  10,  17,
+-900,-900,-900,-900,-900,-900,-900, -10,   -26, -20, -14,  -8,  -2,   4,  10,  17,
   23,  16,  12,   9,   6,   3,   0,  -3,    -7, -10, -13, -16, -20, -23, -26, -30,
 -900,-900,-900,-900,-900,-900,-900,-900,  -900,-900,-900,-900,-900,-900,-900,-900,
 -900,-900,-900,-900,-900,-900,-900,-900};
@@ -68,7 +68,7 @@ double tone_500_80dB_SL[EHMER_MAX]={
   38,  34,  32,  29,  29,  28,  25,  23,   20, 16, 10,  7,  4,  2, -1, -4,  
   -7, -10, -15, -20, -25, -30, -35, -40};
 double tone_500_100dB_SL[EHMER_MAX]={
--900,-900,-900,-900,-900,-900,-900,-900,   -7,  2, 10, 19, 27, 35, 55, 56,  
+-900,-900,-900,-900,-900,-900,-900, -10,   -7,  2, 10, 19, 27, 35, 55, 56,  
   62,  61,  60,  58,  57,  57,  59,  63,   65, 66, 62, 60, 57, 57, 58, 58,
   57,  56,  56,  56,  57,  57,  56,  57,   57, 54, 47, 41, 37, 28, 21, 16,  
   10,   3,  -3,  -8, -13, -18, -23, -28};
@@ -141,13 +141,13 @@ double tone_4000_100dB_SL[EHMER_MAX]={
 
 double tone_8000_60dB_SL[EHMER_MAX]={
 -900,-900,-900,-900,-900,-900,-900,-900,   -40, -30, -21, -12,  -5,   0,  15,  35,
-  43,  40,  37,  36,  37,  39,  41,  43,    45,  45,  35,  25,  15,   5,  -5, -15,
+  43,  40,  37,  36,  36,  36,  36,  36,    36,  36,  35,  25,  15,   5,  -5, -15,
  -25, -35,-900,-900,-900,-900,-900,-900,  -900,-900,-900,-900,-900,-900,-900,-900,
 -900,-900,-900,-900,-900,-900,-900,-900};
 
 double tone_8000_80dB_SL[EHMER_MAX]={
--900,-900,-900,-900,-900,-900,-900,-900,    -1,   2,   6,  10,  13,  19,  25,  35,
-  63,  60,  56,  56,  57,  60,  61,  63,    65,  65,  55,  45,  35,  25,  15,   5,
+-900,-900,-900,-900,-900,-900,-900, -10,    -1,   2,   6,  10,  13,  19,  25,  35,
+  63,  60,  56,  56,  57,  57,  57,  57,    57,  57,  55,  45,  35,  25,  15,   5,
   -5, -15, -25, -35,-900,-900,-900,-900,  -900,-900,-900,-900,-900,-900,-900,-900,
 -900,-900,-900,-900,-900,-900,-900,-900};
 
@@ -189,7 +189,7 @@ double noise_2000_60dB_SL[EHMER_MAX]={
 
 double noise_2000_80dB_SL[EHMER_MAX]={
 -900, -26, -17,  -8,   1,  10,  19,  28,    33,  38,  43,  48,  53,  62,  70,  77,
-  77,  75,  70,  67,  68,  66,  62,  61,    60,  69,  52,  47,  39,  35,  34,  35,
+  77,  75,  70,  67,  68,  66,  62,  61,    60,  59,  52,  47,  39,  35,  34,  35,
   35,  33,  30,  27,  20,  10,   0, -10,   -20, -30,-900,-900,-900,-900,-900,-900,
 -900,-900,-900,-900,-900,-900,-900,-900};
 
diff --git a/lib/psy.c b/lib/psy.c
index 40d16030..bbd75610 100644
--- a/lib/psy.c
+++ b/lib/psy.c
@@ -12,7 +12,7 @@
  ********************************************************************
 
  function: psychoacoustics not including preecho
- last mod: $Id: psy.c,v 1.16.2.2.2.10 2000/05/02 00:28:58 xiphmont Exp $
+ last mod: $Id: psy.c,v 1.16.2.2.2.11 2000/05/04 06:13:28 xiphmont Exp $
 
  ********************************************************************/
 
@@ -28,9 +28,11 @@
 #include "smallft.h"
 #include "scales.h"
 
+/* Why Bark scale for encoding but not masking? Because masking has a
+   strong harmonic dependancy */
+
 /* the beginnings of real psychoacoustic infrastructure.  This is
    still not tightly tuned */
-
 void _vi_psy_free(vorbis_info_psy *i){
   if(i){
     memset(i,0,sizeof(vorbis_info_psy));
@@ -39,6 +41,7 @@ void _vi_psy_free(vorbis_info_psy *i){
 }
 
 /* Set up decibel threshhold slopes on a Bark frequency scale */
+/* the only bit left on a Bark scale.  No reason to change it right now */
 static void set_curve(double *ref,double *c,int n, double crate){
   int i,j=0;
 
@@ -93,9 +96,7 @@ static void interp_curve(double *c,double *c1,double *c2,double del){
 
 static void setup_curve(double **c,
 			int oc,
-			double *curveatt_dB,
-			double peaklowrolloff,
-			double peakhighrolloff){
+			double *curveatt_dB){
   int i,j;
   double tempc[9][EHMER_MAX];
   double ath[EHMER_MAX];
@@ -114,23 +115,23 @@ static void setup_curve(double **c,
 
   /* the temp curves are a bit roundabout, but this is only in
      init. */
-
-  for(i=0;i<9;i++){
-    memcpy(tempc[i],c[i],sizeof(double)*EHMER_MAX);
-    max_curve(tempc[i],ath);
+  for(i=0;i<5;i++){
+    memcpy(tempc[i*2],c[i*2],sizeof(double)*EHMER_MAX);
+    attenuate_curve(tempc[i*2],curveatt_dB[i]+(i+1)*20);
+    max_curve(tempc[i*2],ath);
+    attenuate_curve(tempc[i*2],-(i+1)*20);
   }
 
   /* normalize them so the driving amplitude is 0dB */
   for(i=0;i<5;i++){
     attenuate_curve(c[i*2],curveatt_dB[i]);
-    attenuate_curve(tempc[i*2],curveatt_dB[i]);
   }
 
   /* The c array is comes in as dB curves at 20 40 60 80 100 dB.
      interpolate intermediate dB curves */
   for(i=0;i<7;i+=2){
-  interp_curve(c[i+1],c[i],c[i+2],.5);
-  interp_curve(tempc[i+1],tempc[i],tempc[i+2],.5);
+    interp_curve(c[i+1],c[i],c[i+2],.5);
+    interp_curve(tempc[i+1],tempc[i],tempc[i+2],.5);
   }
 
   /* take things out of dB domain into linear amplitude */
@@ -161,10 +162,7 @@ void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){
   double rate2=rate/2.;
   memset(p,0,sizeof(vorbis_look_psy));
   p->ath=malloc(n*sizeof(double));
-  p->pre=malloc(n*sizeof(int));
-  p->octave=malloc(n*sizeof(double));
-  p->post=malloc(n*sizeof(int));
-  p->curves=malloc(11*sizeof(double));
+  p->octave=malloc(n*sizeof(int));
   p->vi=vi;
   p->n=n;
 
@@ -175,63 +173,109 @@ void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){
     p->ath[i]=fromdB(p->ath[i]+vi->ath_att);
 
   for(i=0;i<n;i++){
-    double oc=toOC((i+.5)*rate2/n);
-    int pre=fromOC(oc-.0625)/rate2*n;
-    int post=fromOC(oc+.0625)/rate2*n;
-    p->pre[i]=(pre<0?0:pre);
+    int oc=rint(toOC((i+.5)*rate2/n)*2.);
+    if(oc<0)oc=0;
+    if(oc>10)oc=10;
     p->octave[i]=oc;
-    p->post[i]=(post<0?0:post);
   }  
 
-  p->curves=malloc(11*sizeof(double **));
-  for(i=0;i<11;i++)
-    p->curves[i]=malloc(9*sizeof(double *));
+  p->tonecurves=malloc(11*sizeof(double **));
+  p->noisecurves=malloc(11*sizeof(double **));
+  for(i=0;i<11;i++){
+    p->tonecurves[i]=malloc(9*sizeof(double *));
+    p->noisecurves[i]=malloc(9*sizeof(double *));
+  }
 
   for(i=0;i<11;i++)
-    for(j=0;j<9;j++)
-      p->curves[i][j]=malloc(EHMER_MAX*sizeof(double));
-
-  memcpy(p->curves[0][2],tone_250_40dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[0][4],tone_250_60dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[0][6],tone_250_80dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[0][8],tone_250_80dB_SL,sizeof(double)*EHMER_MAX);
-
-  memcpy(p->curves[2][2],tone_500_40dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[2][4],tone_500_60dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[2][6],tone_500_80dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[2][8],tone_500_100dB_SL,sizeof(double)*EHMER_MAX);
-
-  memcpy(p->curves[4][2],tone_1000_40dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[4][4],tone_1000_60dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[4][6],tone_1000_80dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[4][8],tone_1000_100dB_SL,sizeof(double)*EHMER_MAX);
-
-  memcpy(p->curves[6][2],tone_2000_40dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[6][4],tone_2000_60dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[6][6],tone_2000_80dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[6][8],tone_2000_100dB_SL,sizeof(double)*EHMER_MAX);
-
-  memcpy(p->curves[8][2],tone_4000_40dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[8][4],tone_4000_60dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[8][6],tone_4000_80dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[8][8],tone_4000_100dB_SL,sizeof(double)*EHMER_MAX);
-
-  memcpy(p->curves[10][2],tone_8000_60dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[10][4],tone_8000_60dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[10][6],tone_8000_80dB_SL,sizeof(double)*EHMER_MAX);
-  memcpy(p->curves[10][8],tone_8000_100dB_SL,sizeof(double)*EHMER_MAX);
-
-  setup_curve(p->curves[0],0,vi->curveatt_250Hz,vi->peakpre,vi->peakpost);
-  setup_curve(p->curves[2],2,vi->curveatt_500Hz,vi->peakpre,vi->peakpost);
-  setup_curve(p->curves[4],4,vi->curveatt_1000Hz,vi->peakpre,vi->peakpost);
-  setup_curve(p->curves[6],6,vi->curveatt_2000Hz,vi->peakpre,vi->peakpost);
-  setup_curve(p->curves[8],8,vi->curveatt_4000Hz,vi->peakpre,vi->peakpost);
-  setup_curve(p->curves[10],10,vi->curveatt_8000Hz,vi->peakpre,vi->peakpost);
+    for(j=0;j<9;j++){
+      p->tonecurves[i][j]=malloc(EHMER_MAX*sizeof(double));
+      p->noisecurves[i][j]=malloc(EHMER_MAX*sizeof(double));
+    }
 
-  for(i=1;i<11;i+=2)
-    for(j=0;j<9;j++)
-      interp_curve_dB(p->curves[i][j],p->curves[i-1][j],p->curves[i+1][j],.5);
+  memcpy(p->tonecurves[0][2],tone_250_40dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[0][4],tone_250_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[0][6],tone_250_80dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[0][8],tone_250_80dB_SL,sizeof(double)*EHMER_MAX);
+
+  memcpy(p->tonecurves[2][2],tone_500_40dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[2][4],tone_500_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[2][6],tone_500_80dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[2][8],tone_500_100dB_SL,sizeof(double)*EHMER_MAX);
+
+  memcpy(p->tonecurves[4][2],tone_1000_40dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[4][4],tone_1000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[4][6],tone_1000_80dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[4][8],tone_1000_100dB_SL,sizeof(double)*EHMER_MAX);
+
+  memcpy(p->tonecurves[6][2],tone_2000_40dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[6][4],tone_2000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[6][6],tone_2000_80dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[6][8],tone_2000_100dB_SL,sizeof(double)*EHMER_MAX);
+
+  memcpy(p->tonecurves[8][2],tone_4000_40dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[8][4],tone_4000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[8][6],tone_4000_80dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[8][8],tone_4000_100dB_SL,sizeof(double)*EHMER_MAX);
+
+  memcpy(p->tonecurves[10][2],tone_8000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[10][4],tone_8000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[10][6],tone_8000_80dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->tonecurves[10][8],tone_8000_100dB_SL,sizeof(double)*EHMER_MAX);
+
+
+  memcpy(p->noisecurves[0][2],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[0][4],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[0][6],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[0][8],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
+
+  memcpy(p->noisecurves[2][2],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[2][4],noise_500_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[2][6],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[2][8],noise_500_80dB_SL,sizeof(double)*EHMER_MAX);
+
+  memcpy(p->noisecurves[4][2],noise_1000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[4][4],noise_1000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[4][6],noise_1000_80dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[4][8],noise_1000_80dB_SL,sizeof(double)*EHMER_MAX);
+
+  memcpy(p->noisecurves[6][2],noise_2000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[6][4],noise_2000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[6][6],noise_2000_80dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[6][8],noise_2000_80dB_SL,sizeof(double)*EHMER_MAX);
+
+  memcpy(p->noisecurves[8][2],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[8][4],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[8][6],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[8][8],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
+
+  memcpy(p->noisecurves[10][2],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[10][4],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[10][6],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
+  memcpy(p->noisecurves[10][8],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX);
+
+  setup_curve(p->tonecurves[0],0,vi->toneatt_250Hz);
+  setup_curve(p->tonecurves[2],2,vi->toneatt_500Hz);
+  setup_curve(p->tonecurves[4],4,vi->toneatt_1000Hz);
+  setup_curve(p->tonecurves[6],6,vi->toneatt_2000Hz);
+  setup_curve(p->tonecurves[8],8,vi->toneatt_4000Hz);
+  setup_curve(p->tonecurves[10],10,vi->toneatt_8000Hz);
+
+  setup_curve(p->noisecurves[0],0,vi->noiseatt_250Hz);
+  setup_curve(p->noisecurves[2],2,vi->noiseatt_500Hz);
+  setup_curve(p->noisecurves[4],4,vi->noiseatt_1000Hz);
+  setup_curve(p->noisecurves[6],6,vi->noiseatt_2000Hz);
+  setup_curve(p->noisecurves[8],8,vi->noiseatt_4000Hz);
+  setup_curve(p->noisecurves[10],10,vi->noiseatt_8000Hz);
 
+  for(i=1;i<11;i+=2)
+    for(j=0;j<9;j++){
+      interp_curve_dB(p->tonecurves[i][j],
+		      p->tonecurves[i-1][j],
+		      p->tonecurves[i+1][j],.5);
+      interp_curve_dB(p->noisecurves[i][j],
+		      p->noisecurves[i-1][j],
+		      p->noisecurves[i+1][j],.5);
+    }
 }
 
 void _vp_psy_clear(vorbis_look_psy *p){
@@ -239,21 +283,42 @@ void _vp_psy_clear(vorbis_look_psy *p){
   if(p){
     if(p->ath)free(p->ath);
     if(p->octave)free(p->octave);
-    if(p->pre)free(p->pre);
-    if(p->post)free(p->post);
-    if(p->curves){
+    if(p->noisecurves){
       for(i=0;i<11;i++){
-	for(j=0;j<9;j++)
-	  free(p->curves[i][j]);
-	free(p->curves[i]);
+	for(j=0;j<9;j++){
+	  free(p->tonecurves[i][j]);
+	  free(p->noisecurves[i][j]);
+	}
+	free(p->noisecurves[i]);
+	free(p->tonecurves[i]);
       }
-      free(p->curves);
+      free(p->tonecurves);
+      free(p->noisecurves);
     }
     memset(p,0,sizeof(vorbis_look_psy));
   }
 }
 
-static double _eights[EHMER_MAX]={
+static void compute_decay(vorbis_look_psy *p,double *f, double *decay, int n){
+  int i;
+  /* handle decay */
+  if(p->vi->decayp && decay){
+    double decscale=1.-pow(p->vi->decay_coeff,n); 
+    double attscale=1.-pow(p->vi->attack_coeff,n); 
+    for(i=0;i<n;i++){
+      double del=f[i]-decay[i];
+      if(del>0)
+	/* add energy */
+	decay[i]+=del*attscale;
+      else
+	/* remove energy */
+	decay[i]+=del*decscale;
+      if(decay[i]>f[i])f[i]=decay[i];
+    }
+  }
+}
+
+static double _eights[EHMER_MAX+1]={
   .2500000000000000000,.2726269331663144148,
   .2973017787506802667,.3242098886627524165,
   .3535533905932737622,.3855527063519852059,
@@ -283,15 +348,14 @@ static double _eights[EHMER_MAX]={
   22.62741699796952076,24.67537320652705316,
   26.90868528811886536,29.34412938254947939};
 
-static double seed_peaks(double *floor,double **curve,
-		       double amp,double specmax,
-		       int *pre,int *post,
-		       int x,int n,double specatt){
+static void seed_peaks(double *floor,
+			 double **curves,
+			 double amp,double specmax,
+			 int x,int n,double specatt){
   int i;
-  int ix=x*_eights[0];
-  int prevx=pre[ix];
+  double x16=x*(1./16.);
+  int prevx=x*_eights[0]-x16;
   int nextx;
-  double ret=0.;
 
   /* make this attenuation adjustable */
   int choice=rint((todB(amp)-specmax+specatt)/10.)-2;
@@ -300,67 +364,108 @@ static double seed_peaks(double *floor,double **curve,
 
   for(i=0;i<EHMER_MAX;i++){
     if(prevx<n){
-      double lin=curve[choice][i];
-      ix=x*_eights[i];
-      nextx=(ix<n?post[ix]:n);
+      double lin=curves[choice][i];
+      nextx=x*_eights[i]+x16;
+      nextx=(nextx<n?nextx:n);
       if(lin){
-	/* Currently uses a n+n = +3dB additivity */
-	lin*=amp;
-	lin*=lin;
-	
-	floor[prevx]+=lin;
-	if(nextx==prevx){
-	  if(nextx+1<n)floor[nextx+1]-=lin;
-	}else{
-	  if(nextx<n)floor[nextx]-=lin;
-	}
-	if(i==EHMER_OFFSET || prevx==x)ret+=lin;
-	if(nextx==x)ret-=lin;
+	lin*=amp;	
+	if(floor[prevx]<lin)floor[prevx]=lin;
       }
       prevx=nextx;
     }
   }
-  return(ret);
 }
 
-static void add_seeds(double *floor,int n){
-  int i;
-  double acc=0.;
-  for(i=0;i<n;i++){
-    acc+=floor[i];
-    floor[i]=(acc<=0.?0.:sqrt(acc));
-  }
-}
-
-/* Why Bark scale for encoding but not masking? Because masking has a
-   strong harmonic dependancy */
-static void _vp_tone_tone_iter(vorbis_look_psy *p,
-			       double *f, 
-			       double *flr, double *mask,
-			       double specmax){
+static void seed_generic(vorbis_look_psy *p,
+			 double ***curves,
+			 double *f, 
+			 double *flr,
+			 double specmax){
   vorbis_info_psy *vi=p->vi;
   long n=p->n,i;
-  double acc=0.;
-
-  memset(flr,0,sizeof(double)*n);
-
+  
   /* prime the working vector with peak values */
   /* Use the 250 Hz curve up to 250 Hz and 8kHz curve after 8kHz. */
+  for(i=0;i<n;i++)
+    if(f[i]>flr[i])
+      seed_peaks(flr,curves[p->octave[i]],f[i],
+		 specmax,i,n,vi->max_curve_dB);
+}
+
+/* bleaugh, this is more complicated than it needs to be */
+static void max_seeds(vorbis_look_psy *p,double *flr){
+  long n=p->n,i,j;
+  long *posstack=alloca(n*sizeof(long));
+  double *ampstack=alloca(n*sizeof(double));
+  long stack=0;
+
   for(i=0;i<n;i++){
-    /*acc+=flr[i]; XXX acc is behaving incorrectly.  Check it */
-    if(f[i]>mask[i]){
-      int o=rint(p->octave[i]*2.);
-      if(o<0)o=0;
-      if(o>10)o=10;
-
-      /*acc+=*/seed_peaks(flr,p->curves[o],f[i],
-		      specmax,p->pre,p->post,i,n,vi->max_curve_dB);
+    if(stack<2){
+      posstack[stack]=i;
+      ampstack[stack++]=flr[i];
+    }else{
+      while(1){
+	if(flr[i]<ampstack[stack-1]){
+	  posstack[stack]=i;
+	  ampstack[stack++]=flr[i];
+	  break;
+	}else{
+	  if(i<posstack[stack-1]*17/15){
+	    if(stack>1 && ampstack[stack-1]<ampstack[stack-2] &&
+	       i<posstack[stack-2]*17/15){
+	      /* we completely overlap, making stack-1 irrelevant.  pop it */
+	      stack--;
+	      continue;
+	    }
+	  }
+	  posstack[stack]=i;
+	  ampstack[stack++]=flr[i];
+	  break;
+
+	}
+      }
     }
   }
 
-  /* chase curves down from the peak seeds */
-  add_seeds(flr,n);
-  
+  /* the stack now contains only the positions that are relevant. Scan
+     'em straight through */
+  {
+    long pos=0;
+    for(i=0;i<stack;i++){
+      long endpos;
+      if(i<stack-1 && ampstack[i+1]>ampstack[i]){
+	endpos=posstack[i+1];
+      }else{
+	endpos=posstack[i]*17/15;
+      }
+      for(j=pos;j<endpos;j++)flr[j]=ampstack[i];
+      pos=endpos;
+    }
+  }   
+
+  /* there.  Linear time.  I now remember this was on a problem set I
+     had in Grad Skool... I didn't solve it at the time ;-) */
+}
+
+#define noiseBIAS 5
+static void third_octave_noise(vorbis_look_psy *p,double *f,double *noise){
+  long i,n=p->n;
+  long lo=0,hi=0;
+  double acc=0.;
+
+  for(i=0;i<n;i++){
+    /* not exactly correct, (the center frequency should be centered
+       on a *log* scale), but not worth quibbling */
+    long newhi=i*7/5+noiseBIAS;
+    long newlo=i*5/7-noiseBIAS;
+    if(newhi>n)newhi=n;
+
+    for(;lo<newlo;lo++)
+      acc-=todB(f[lo]); /* yeah, this ain't RMS */
+    for(;hi<newhi;hi++)
+      acc+=todB(f[hi]);
+    noise[i]=fromdB(acc/(hi-lo));
+  }
 }
 
 /* stability doesn't matter */
@@ -371,11 +476,75 @@ static int comp(const void *a,const void *b){
     return(-1);
 }
 
+static int frameno=-1;
+void _vp_compute_mask(vorbis_look_psy *p,double *f, 
+		      double *flr, 
+		      double *decay){
+  double *noise=alloca(sizeof(double)*p->n);
+  double *work=alloca(sizeof(double)*p->n);
+  int i,n=p->n;
+  double specmax=0.;
+
+  frameno++;
+
+  /* don't use the smoothed data for noise */
+  third_octave_noise(p,f,noise);
+
+  /* compute, update and apply decay accumulator */
+  for(i=0;i<n;i++)work[i]=fabs(f[i]);
+  compute_decay(p,work,decay,n);
+  
+  if(p->vi->smoothp){
+    /* compute power^.5 of three neighboring bins to smooth for peaks
+       that get split twixt bins/peaks that nail the bin.  This evens
+       out treatment as we're not doing additive masking any longer. */
+    double acc=work[0]*work[0]+work[1]*work[1];
+    double prev=work[0];
+
+    work[0]=sqrt(acc);
+    for(i=1;i<n-1;i++){
+      double this=work[i];
+      acc+=work[i+1]*work[i+1];
+      work[i]=sqrt(acc);
+      acc-=prev*prev;
+      prev=this;
+    }
+    work[n-1]=sqrt(acc);
+  }
+  
+  /* find the highest peak so we know the limits */
+  for(i=0;i<n;i++){
+    if(work[i]>specmax)specmax=work[i];
+  }
+  specmax=todB(specmax);
+
+  /* mask off the ATH */
+  if(p->vi->athp)
+    for(i=0;i<n;i++)
+      flr[i]=p->ath[i];
+  else
+    for(i=0;i<n;i++)
+      flr[i]=0.;
+    
+  /* seed the tone masking */
+  if(p->vi->tonemaskp)
+    seed_generic(p,p->tonecurves,work,flr,specmax);
+  
+  /* seed the noise masking */
+  if(p->vi->noisemaskp)
+    seed_generic(p,p->noisecurves,noise,flr,specmax);
+  
+  /* chase the seeds */
+  max_seeds(p,flr);
+
+}
+
+
 /* this applies the floor and (optionally) tries to preserve noise
    energy in low resolution portions of the spectrum */
 /* f and flr are *linear* scale, not dB */
 void _vp_apply_floor(vorbis_look_psy *p,double *f, 
-		      double *flr,double *mask){
+		      double *flr){
   double *work=alloca(p->n*sizeof(double));
   double thresh=fromdB(p->vi->noisefit_threshdB);
   int i,j,addcount=0;
@@ -385,14 +554,8 @@ void _vp_apply_floor(vorbis_look_psy *p,double *f,
   for(j=0;j<p->n;j++){
     if(flr[j]<=0)
       work[j]=0.;
-    else{
-      double val=rint(f[j]/flr[j]);
-      if(mask[j]>flr[j] && fabs(f[j])<mask[j]){
-	work[j]=0;
-      }else{	  
-	work[j]=val;
-      }
-    }
+    else
+      work[j]=rint(f[j]/flr[j]);
   }
 
   /* look at spectral energy levels.  Noise is noise; sensation level
@@ -416,7 +579,8 @@ void _vp_apply_floor(vorbis_look_psy *p,double *f,
 	double y=(f[i]*f[i]);
 	original_SL+=y;
 	if(work[i]){
-	  current_SL+=y;
+	  double qy=(work[i]*flr[i]);
+	  current_SL+=qy*qy;
 	}else{
 	  index[z++]=f+i;
 	}	
@@ -431,17 +595,14 @@ void _vp_apply_floor(vorbis_look_psy *p,double *f,
 	for(j=0;j<z;j++){
 	  int p=index[j]-f;
 
-	  /* yes, I mean *mask* here, not floor.  This should only be
-             being applied in areas where noise dominates masking;
-             otherwise we don't want to be adding tones back. */
-	  double val=mask[p]*mask[p]+current_SL;
+	  double val=flr[p]*flr[p]+current_SL;
 	  
 	  if(val<original_SL){
 	    addcount++;
 	    if(f[p]>0)
-	      work[p]=mask[p]/flr[p];
+	      work[p]=flr[p];
 	    else
-	      work[p]=-mask[p]/flr[p];
+	      work[p]=-flr[p];
 	    current_SL=val;
 	  }else
 	    break;
@@ -452,98 +613,3 @@ void _vp_apply_floor(vorbis_look_psy *p,double *f,
   memcpy(f,work,p->n*sizeof(double));
 }
 
-void _vp_tone_tone_mask(vorbis_look_psy *p,double *f, 
-			double *flr, double *mask,
-			double *decay){
-  double *iter=alloca(sizeof(double)*p->n);
-  int i,j,n=p->n;
-  double specmax=0.;
-
-  for(i=0;i<n;i++)iter[i]=fabs(f[i]);  
-  f=iter;
-  iter=alloca(sizeof(double)*p->n);
-
-  /* handle decay */
-  if(p->vi->decayp && decay){
-    double decscale=1.-pow(p->vi->decay_coeff,n); 
-    double attscale=1.-pow(p->vi->attack_coeff,n); 
-    for(i=0;i<n;i++){
-      double del=f[i]-decay[i];
-      if(del>0)
-	/* add energy */
-	decay[i]+=del*attscale;
-      else
-	/* remove energy */
-	decay[i]+=del*decscale;
-      if(decay[i]>f[i])f[i]=decay[i];
-    }
-  }
-
-  for(i=0;i<n;i++){
-    if(f[i]>specmax)specmax=f[i];
-  }
-  specmax=todB(specmax);
-
-  /* do the peak att with rolloff hack to the mask */
-  memset(mask,0,sizeof(double)*n);
-  if(p->vi->peakattp){
-    double curmask;
-
-    /* chase down from peaks forward */
-    curmask=0.;
-    for(i=0;i<n-1;i++){
-      if(f[i]>curmask){
-	double val=todB(f[i]);
-	int o=p->octave[i];
-	int choice=rint((val-specmax+p->vi->max_curve_dB)/20.)-1;
-	if(o<0)o=0;
-	if(o>5)o=5;
-	if(choice<0)choice=0;
-	if(choice>4)choice=4;
-	val=fromdB(val+p->vi->peakatt[o][choice]);
-	if(val>curmask)curmask=val;
-      }
-      mask[i]=curmask;
-      /* roll off the curmask */
-      curmask*=fromdB(p->vi->peakpost*(p->octave[i+1]-p->octave[i]));
-    }
-    /* chase down from peaks backward */
-    curmask=0.;
-    for(i=n-1;i>0;i--){
-      if(mask[i]>curmask)
-	curmask=mask[i];
-      else
-	mask[i]=curmask;
-      /* roll off the curmask */
-      curmask*=fromdB(p->vi->peakpre*(p->octave[i]-p->octave[i-1]));
-    }
-  }
-  /* mask off the ATH */
-  if(p->vi->athp)
-    for(i=0;i<n;i++)
-      if(mask[i]<p->ath[i])
-	mask[i]=p->ath[i];
-
-  /* perform iterative additive tone-tone masking */
-  
-  for(i=0;i<p->vi->curve_fit_iterations;i++){
-    if(i==0)
-      _vp_tone_tone_iter(p,f,flr,mask,specmax);
-    else{
-      _vp_tone_tone_iter(p,iter,flr,mask,specmax);
-    }
-    if(i!=p->vi->curve_fit_iterations-1){
-      for(j=0;j<n;j++)
-	if(f[j]<mask[j] || f[j]<flr[j])
-	  iter[j]=0.;
-	else
-	  iter[j]=f[j];
-    }
-  }
-
-  for(i=0;i<n;i++)
-    if(mask[i]<flr[i])
-      mask[i]=flr[i];
-
-}
-
diff --git a/lib/psy.h b/lib/psy.h
index 1b7bd1b4..3fa9982e 100644
--- a/lib/psy.h
+++ b/lib/psy.h
@@ -12,7 +12,7 @@
  ********************************************************************
 
  function: random psychoacoustics (not including preecho)
- last mod: $Id: psy.h,v 1.11.2.2.2.3 2000/04/21 16:35:39 xiphmont Exp $
+ last mod: $Id: psy.h,v 1.11.2.2.2.4 2000/05/04 06:13:28 xiphmont Exp $
 
  ********************************************************************/
 
@@ -28,12 +28,11 @@ typedef struct {
   int n;
   struct vorbis_info_psy *vi;
 
-  double ***curves;
+  double ***tonecurves;
+  double ***noisecurves;
 
   double *ath;
-  int *pre;
-  double *octave;
-  int *post;
+  int    *octave;
 
 } vorbis_look_psy;
 
@@ -41,11 +40,11 @@ extern void   _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rat
 extern void   _vp_psy_clear(vorbis_look_psy *p);
 extern void  *_vi_psy_dup(void *source);
 extern void   _vi_psy_free(vorbis_info_psy *i);
-extern void   _vp_tone_tone_mask(vorbis_look_psy *p,double *f, 
-				 double *floor, double *mask,
+extern void   _vp_compute_mask(vorbis_look_psy *p,double *f, 
+				 double *floor,
 				 double *decay);
 extern void _vp_apply_floor(vorbis_look_psy *p,double *f,
-			    double *flr,double *mask);
+			    double *flr);
 
 #endif
 
diff --git a/lib/psytune.c b/lib/psytune.c
index 0221553e..3903aab6 100644
--- a/lib/psytune.c
+++ b/lib/psytune.c
@@ -13,7 +13,7 @@
 
  function: simple utility that runs audio through the psychoacoustics
            without encoding
- last mod: $Id: psytune.c,v 1.1.2.2.2.7 2000/05/02 00:28:58 xiphmont Exp $
+ last mod: $Id: psytune.c,v 1.1.2.2.2.8 2000/05/04 06:13:28 xiphmont Exp $
 
  ********************************************************************/
 
@@ -31,29 +31,31 @@
 #include "lpc.h"
 
 static vorbis_info_psy _psy_set0={
-  1,1,1,
-
-  0,8,4.,
+  1,/*athp*/
+  1,/*decayp*/
+  1,/*smoothp*/
+  1,8,0.,
 
   -130.,
 
-  {-40.,-40.,-60.,-80.,-85.},
-  {-40.,-40.,-60.,-80.,-100.},
-  {-40.,-40.,-60.,-80.,-100.},
-  {-40.,-40.,-60.,-80.,-100.},
-  {-40.,-40.,-60.,-80.,-100.},
-  {-70.,-70.,-70.,-80.,-100.},
-
-  1,
-  {{-6.,-8.,-12.,-16.,-18.},
-   {-6.,-8.,-12.,-16.,-18.},
-   {-6.,-8.,-12.,-16.,-18.},
-   {-6.,-8.,-12.,-16.,-20.},
-   {-6.,-8.,-12.,-16.,-20.},
-   {-6.,-8.,-12.,-16.,-18.},},
-  -80.,-40.,
-
-  100.,
+  1,/* tonemaskp*/
+  {-35.,-40.,-60.,-80.,-80.}, /* remember that el 4 is an 80 dB curve, not 100 */
+  {-35.,-40.,-60.,-80.,-95.},
+  {-35.,-40.,-60.,-80.,-95.},
+  {-35.,-40.,-60.,-80.,-95.},
+  {-35.,-40.,-60.,-80.,-95.},
+  {-65.,-60.,-60.,-80.,-95.},  /* remember that el 1 is a 60 dB curve, not 40 */
+
+  1,/*noisemaskp*/
+  {-100.,-100.,-100.,-200.,-200.}, /* this is the 500 Hz curve, which
+                                      is too wrong to work */
+  {-60.,-60.,-60.,-80.,-80.},
+  {-60.,-60.,-60.,-80.,-80.},
+  {-60.,-60.,-60.,-80.,-80.},
+  {-60.,-60.,-60.,-80.,-80.},
+  {-50.,-55.,-60.,-80.,-80.},
+
+  110.,
 
   .9998, .9997  /* attack/decay control */
 };
@@ -132,7 +134,7 @@ int main(int argc,char *argv[]){
   int framesize=2048;
   int order=32;
 
-  double *pcm[2],*out[2],*window,*mask,*decay[2],*lpc,*floor;
+  double *pcm[2],*out[2],*window,*decay[2],*lpc,*floor;
   signed char *buffer,*buffer2;
   mdct_lookup m_look;
   vorbis_look_psy p_look;
@@ -187,7 +189,6 @@ int main(int argc,char *argv[]){
   out[1]=calloc(framesize/2,sizeof(double));
   decay[0]=calloc(framesize/2,sizeof(double));
   decay[1]=calloc(framesize/2,sizeof(double));
-  mask=malloc(framesize*sizeof(double));
   floor=malloc(framesize*sizeof(double));
   lpc=malloc(order*sizeof(double));
   buffer=malloc(framesize*4);
@@ -199,7 +200,10 @@ int main(int argc,char *argv[]){
 
   for(i=0;i<11;i++)
     for(j=0;j<9;j++)
-      analysis("Pcurve",i*10+j,p_look.curves[i][j],EHMER_MAX,0,1);
+      analysis("Ptonecurve",i*10+j,p_look.tonecurves[i][j],EHMER_MAX,0,1);
+  for(i=0;i<11;i++)
+    for(j=0;j<9;j++)
+      analysis("Pnoisecurve",i*10+j,p_look.noisecurves[i][j],EHMER_MAX,0,1);
 
   /* we cheat on the WAV header; we just bypass 44 bytes and never
      verify that it matches 16bit/stereo/44.1kHz. */
@@ -233,7 +237,6 @@ int main(int argc,char *argv[]){
 	analysis("pre",frameno,pcm[i],framesize,0,0);
 	
 	/* do the psychacoustics */
-	memset(mask,0,sizeof(double)*framesize/2);
 	for(j=0;j<framesize;j++)
 	  pcm[i][j]*=window[j];
 
@@ -241,9 +244,8 @@ int main(int argc,char *argv[]){
 
 	analysis("mdct",frameno,pcm[i],framesize/2,1,1);
 
-	_vp_tone_tone_mask(&p_look,pcm[i],floor,mask,decay[i]);
+	_vp_compute_mask(&p_look,pcm[i],floor,decay[i]);
 	
-	analysis("mask",frameno,mask,framesize/2,1,1);
 	analysis("prefloor",frameno,floor,framesize/2,1,1);
 	analysis("decay",frameno,decay[i],framesize/2,1,1);
 	
@@ -251,7 +253,7 @@ int main(int argc,char *argv[]){
 	_lpc_to_curve(floor,lpc,sqrt(amp),&floorlook,"Ffloor",frameno);
 	analysis("floor",frameno,floor,framesize/2,1,1);
 
-	_vp_apply_floor(&p_look,pcm[i],floor,mask);
+	_vp_apply_floor(&p_look,pcm[i],floor);
 
 	/* re-add floor */
 	for(j=0;j<framesize/2;j++){
author	Monty <xiphmont@xiph.org>	2000-05-04 06:13:28 +0000
committer	Monty <xiphmont@xiph.org>	2000-05-04 06:13:28 +0000
commit	f258221ec2e0b127c91673516485878a2a2e5d7a (patch)
tree	560a06990e8c3e154c5fb7b677d532ec534398b3
parent	75b30f91b772e5b0af627e9015b302c615b74da4 (diff)
download	libvorbis-git-f258221ec2e0b127c91673516485878a2a2e5d7a.tar.gz