diff options
author | Monty <xiphmont@xiph.org> | 2000-05-04 06:13:28 +0000 |
---|---|---|
committer | Monty <xiphmont@xiph.org> | 2000-05-04 06:13:28 +0000 |
commit | f258221ec2e0b127c91673516485878a2a2e5d7a (patch) | |
tree | 560a06990e8c3e154c5fb7b677d532ec534398b3 | |
parent | 75b30f91b772e5b0af627e9015b302c615b74da4 (diff) | |
download | libvorbis-git-f258221ec2e0b127c91673516485878a2a2e5d7a.tar.gz |
The psychoacoustics should be ready. Added noise/tone masking, which
still needs some work (peaks should not be counted so that we can use
true RMS0 but it functions if not optimally. Still a big benefit.
Monty
svn path=/branches/new_acoustics_pending_merge_20000328/vorbis/; revision=357
-rw-r--r-- | lib/mapping0.c | 8 | ||||
-rw-r--r-- | lib/masking.h | 18 | ||||
-rw-r--r-- | lib/psy.c | 542 | ||||
-rw-r--r-- | lib/psy.h | 15 | ||||
-rw-r--r-- | lib/psytune.c | 58 |
5 files changed, 353 insertions, 288 deletions
diff --git a/lib/mapping0.c b/lib/mapping0.c index 381ed62b..0dce7ed3 100644 --- a/lib/mapping0.c +++ b/lib/mapping0.c @@ -12,7 +12,7 @@ ******************************************************************** function: channel mapping 0 implementation - last mod: $Id: mapping0.c,v 1.11.2.2.2.5 2000/04/21 16:35:39 xiphmont Exp $ + last mod: $Id: mapping0.c,v 1.11.2.2.2.6 2000/05/04 06:13:28 xiphmont Exp $ ********************************************************************/ @@ -232,7 +232,6 @@ static int forward(vorbis_block *vb,vorbis_look_mapping *l){ { double *floor=_vorbis_block_alloc(vb,n*sizeof(double)/2); - double *mask=_vorbis_block_alloc(vb,n*sizeof(double)/2); for(i=0;i<vi->channels;i++){ double *pcm=vb->pcm[i]; @@ -245,10 +244,9 @@ static int forward(vorbis_block *vb,vorbis_look_mapping *l){ memset(decay,0,n*sizeof(double)/2); /* perform psychoacoustics; do masking */ - _vp_tone_tone_mask(look->psy_look+submap,pcm,floor,mask,decay); + _vp_compute_mask(look->psy_look+submap,pcm,floor,decay); _analysis_output("mdct",vb->sequence,pcm,n/2,0,1); - _analysis_output("mask",vb->sequence,mask,n/2,0,1); /* perform floor encoding */ nonzero[i]=look->floor_func[submap]-> @@ -257,7 +255,7 @@ static int forward(vorbis_block *vb,vorbis_look_mapping *l){ _analysis_output("floor",vb->sequence,floor,n/2,0,1); /* apply the floor, do optional noise levelling */ - _vp_apply_floor(look->psy_look+submap,pcm,floor,mask); + _vp_apply_floor(look->psy_look+submap,pcm,floor); _analysis_output("res",vb->sequence,pcm,n/2,0,0); diff --git a/lib/masking.h b/lib/masking.h index 08271a5a..11f2e3e9 100644 --- a/lib/masking.h +++ b/lib/masking.h @@ -12,7 +12,7 @@ ******************************************************************** function: masking curve data for psychoacoustics - last mod: $Id: masking.h,v 1.1.2.2.2.1 2000/05/02 00:28:58 xiphmont Exp $ + last mod: $Id: masking.h,v 1.1.2.2.2.2 2000/05/04 06:13:28 xiphmont Exp $ ********************************************************************/ @@ -42,18 +42,18 @@ double tone_250_40dB_SL[EHMER_MAX]={ -10, -13, -16, -19, -21, -24, -28, -32, -900,-900,-900,-900,-900,-900,-900,-900, -900,-900,-900,-900,-900,-900,-900,-900}; double tone_250_60dB_SL[EHMER_MAX]={ --900,-900,-900,-900,-900,-900,-900,-900, -5, 1, 7, 13, 19, 25, 30, 33, +-900,-900,-900,-900,-900,-900,-900, -10, -5, 1, 7, 13, 19, 25, 30, 33, 36, 39, 38, 37, 38, 39, 39, 40, 38, 36, 35, 34, 33, 31, 29, 28, 28, 28, 25, 20, 14, 10, 5, 0, -5,-10,-15,-20,-25,-30,-35,-40, -900,-900,-900,-900,-900,-900,-900,-900}; double tone_250_80dB_SL[EHMER_MAX]={ --900,-900,-900,-900,-900,-900,-900,-900, 10, 17, 24, 30, 37, 41, 48, 49, +-900,-900,-900,-900,-900,-900,-900, -10, 10, 17, 24, 30, 37, 41, 48, 49, 50, 53, 54, 53, 53, 54, 55, 57, 57, 57, 58, 59, 60, 58, 57, 58, 59, 58, 57, 54, 52, 50, 49, 47, 46, 47, 46, 44, 43, 42, 41, 40, 38, 32, 27, 22, 17, 11, 6, 0}; double tone_500_40dB_SL[EHMER_MAX]={ --900,-900,-900,-900,-900,-900,-900,-900, -26, -20, -14, -8, -2, 4, 10, 17, +-900,-900,-900,-900,-900,-900,-900, -10, -26, -20, -14, -8, -2, 4, 10, 17, 23, 16, 12, 9, 6, 3, 0, -3, -7, -10, -13, -16, -20, -23, -26, -30, -900,-900,-900,-900,-900,-900,-900,-900, -900,-900,-900,-900,-900,-900,-900,-900, -900,-900,-900,-900,-900,-900,-900,-900}; @@ -68,7 +68,7 @@ double tone_500_80dB_SL[EHMER_MAX]={ 38, 34, 32, 29, 29, 28, 25, 23, 20, 16, 10, 7, 4, 2, -1, -4, -7, -10, -15, -20, -25, -30, -35, -40}; double tone_500_100dB_SL[EHMER_MAX]={ --900,-900,-900,-900,-900,-900,-900,-900, -7, 2, 10, 19, 27, 35, 55, 56, +-900,-900,-900,-900,-900,-900,-900, -10, -7, 2, 10, 19, 27, 35, 55, 56, 62, 61, 60, 58, 57, 57, 59, 63, 65, 66, 62, 60, 57, 57, 58, 58, 57, 56, 56, 56, 57, 57, 56, 57, 57, 54, 47, 41, 37, 28, 21, 16, 10, 3, -3, -8, -13, -18, -23, -28}; @@ -141,13 +141,13 @@ double tone_4000_100dB_SL[EHMER_MAX]={ double tone_8000_60dB_SL[EHMER_MAX]={ -900,-900,-900,-900,-900,-900,-900,-900, -40, -30, -21, -12, -5, 0, 15, 35, - 43, 40, 37, 36, 37, 39, 41, 43, 45, 45, 35, 25, 15, 5, -5, -15, + 43, 40, 37, 36, 36, 36, 36, 36, 36, 36, 35, 25, 15, 5, -5, -15, -25, -35,-900,-900,-900,-900,-900,-900, -900,-900,-900,-900,-900,-900,-900,-900, -900,-900,-900,-900,-900,-900,-900,-900}; double tone_8000_80dB_SL[EHMER_MAX]={ --900,-900,-900,-900,-900,-900,-900,-900, -1, 2, 6, 10, 13, 19, 25, 35, - 63, 60, 56, 56, 57, 60, 61, 63, 65, 65, 55, 45, 35, 25, 15, 5, +-900,-900,-900,-900,-900,-900,-900, -10, -1, 2, 6, 10, 13, 19, 25, 35, + 63, 60, 56, 56, 57, 57, 57, 57, 57, 57, 55, 45, 35, 25, 15, 5, -5, -15, -25, -35,-900,-900,-900,-900, -900,-900,-900,-900,-900,-900,-900,-900, -900,-900,-900,-900,-900,-900,-900,-900}; @@ -189,7 +189,7 @@ double noise_2000_60dB_SL[EHMER_MAX]={ double noise_2000_80dB_SL[EHMER_MAX]={ -900, -26, -17, -8, 1, 10, 19, 28, 33, 38, 43, 48, 53, 62, 70, 77, - 77, 75, 70, 67, 68, 66, 62, 61, 60, 69, 52, 47, 39, 35, 34, 35, + 77, 75, 70, 67, 68, 66, 62, 61, 60, 59, 52, 47, 39, 35, 34, 35, 35, 33, 30, 27, 20, 10, 0, -10, -20, -30,-900,-900,-900,-900,-900,-900, -900,-900,-900,-900,-900,-900,-900,-900}; @@ -12,7 +12,7 @@ ******************************************************************** function: psychoacoustics not including preecho - last mod: $Id: psy.c,v 1.16.2.2.2.10 2000/05/02 00:28:58 xiphmont Exp $ + last mod: $Id: psy.c,v 1.16.2.2.2.11 2000/05/04 06:13:28 xiphmont Exp $ ********************************************************************/ @@ -28,9 +28,11 @@ #include "smallft.h" #include "scales.h" +/* Why Bark scale for encoding but not masking? Because masking has a + strong harmonic dependancy */ + /* the beginnings of real psychoacoustic infrastructure. This is still not tightly tuned */ - void _vi_psy_free(vorbis_info_psy *i){ if(i){ memset(i,0,sizeof(vorbis_info_psy)); @@ -39,6 +41,7 @@ void _vi_psy_free(vorbis_info_psy *i){ } /* Set up decibel threshhold slopes on a Bark frequency scale */ +/* the only bit left on a Bark scale. No reason to change it right now */ static void set_curve(double *ref,double *c,int n, double crate){ int i,j=0; @@ -93,9 +96,7 @@ static void interp_curve(double *c,double *c1,double *c2,double del){ static void setup_curve(double **c, int oc, - double *curveatt_dB, - double peaklowrolloff, - double peakhighrolloff){ + double *curveatt_dB){ int i,j; double tempc[9][EHMER_MAX]; double ath[EHMER_MAX]; @@ -114,23 +115,23 @@ static void setup_curve(double **c, /* the temp curves are a bit roundabout, but this is only in init. */ - - for(i=0;i<9;i++){ - memcpy(tempc[i],c[i],sizeof(double)*EHMER_MAX); - max_curve(tempc[i],ath); + for(i=0;i<5;i++){ + memcpy(tempc[i*2],c[i*2],sizeof(double)*EHMER_MAX); + attenuate_curve(tempc[i*2],curveatt_dB[i]+(i+1)*20); + max_curve(tempc[i*2],ath); + attenuate_curve(tempc[i*2],-(i+1)*20); } /* normalize them so the driving amplitude is 0dB */ for(i=0;i<5;i++){ attenuate_curve(c[i*2],curveatt_dB[i]); - attenuate_curve(tempc[i*2],curveatt_dB[i]); } /* The c array is comes in as dB curves at 20 40 60 80 100 dB. interpolate intermediate dB curves */ for(i=0;i<7;i+=2){ - interp_curve(c[i+1],c[i],c[i+2],.5); - interp_curve(tempc[i+1],tempc[i],tempc[i+2],.5); + interp_curve(c[i+1],c[i],c[i+2],.5); + interp_curve(tempc[i+1],tempc[i],tempc[i+2],.5); } /* take things out of dB domain into linear amplitude */ @@ -161,10 +162,7 @@ void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){ double rate2=rate/2.; memset(p,0,sizeof(vorbis_look_psy)); p->ath=malloc(n*sizeof(double)); - p->pre=malloc(n*sizeof(int)); - p->octave=malloc(n*sizeof(double)); - p->post=malloc(n*sizeof(int)); - p->curves=malloc(11*sizeof(double)); + p->octave=malloc(n*sizeof(int)); p->vi=vi; p->n=n; @@ -175,63 +173,109 @@ void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rate){ p->ath[i]=fromdB(p->ath[i]+vi->ath_att); for(i=0;i<n;i++){ - double oc=toOC((i+.5)*rate2/n); - int pre=fromOC(oc-.0625)/rate2*n; - int post=fromOC(oc+.0625)/rate2*n; - p->pre[i]=(pre<0?0:pre); + int oc=rint(toOC((i+.5)*rate2/n)*2.); + if(oc<0)oc=0; + if(oc>10)oc=10; p->octave[i]=oc; - p->post[i]=(post<0?0:post); } - p->curves=malloc(11*sizeof(double **)); - for(i=0;i<11;i++) - p->curves[i]=malloc(9*sizeof(double *)); + p->tonecurves=malloc(11*sizeof(double **)); + p->noisecurves=malloc(11*sizeof(double **)); + for(i=0;i<11;i++){ + p->tonecurves[i]=malloc(9*sizeof(double *)); + p->noisecurves[i]=malloc(9*sizeof(double *)); + } for(i=0;i<11;i++) - for(j=0;j<9;j++) - p->curves[i][j]=malloc(EHMER_MAX*sizeof(double)); - - memcpy(p->curves[0][2],tone_250_40dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[0][4],tone_250_60dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[0][6],tone_250_80dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[0][8],tone_250_80dB_SL,sizeof(double)*EHMER_MAX); - - memcpy(p->curves[2][2],tone_500_40dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[2][4],tone_500_60dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[2][6],tone_500_80dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[2][8],tone_500_100dB_SL,sizeof(double)*EHMER_MAX); - - memcpy(p->curves[4][2],tone_1000_40dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[4][4],tone_1000_60dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[4][6],tone_1000_80dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[4][8],tone_1000_100dB_SL,sizeof(double)*EHMER_MAX); - - memcpy(p->curves[6][2],tone_2000_40dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[6][4],tone_2000_60dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[6][6],tone_2000_80dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[6][8],tone_2000_100dB_SL,sizeof(double)*EHMER_MAX); - - memcpy(p->curves[8][2],tone_4000_40dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[8][4],tone_4000_60dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[8][6],tone_4000_80dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[8][8],tone_4000_100dB_SL,sizeof(double)*EHMER_MAX); - - memcpy(p->curves[10][2],tone_8000_60dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[10][4],tone_8000_60dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[10][6],tone_8000_80dB_SL,sizeof(double)*EHMER_MAX); - memcpy(p->curves[10][8],tone_8000_100dB_SL,sizeof(double)*EHMER_MAX); - - setup_curve(p->curves[0],0,vi->curveatt_250Hz,vi->peakpre,vi->peakpost); - setup_curve(p->curves[2],2,vi->curveatt_500Hz,vi->peakpre,vi->peakpost); - setup_curve(p->curves[4],4,vi->curveatt_1000Hz,vi->peakpre,vi->peakpost); - setup_curve(p->curves[6],6,vi->curveatt_2000Hz,vi->peakpre,vi->peakpost); - setup_curve(p->curves[8],8,vi->curveatt_4000Hz,vi->peakpre,vi->peakpost); - setup_curve(p->curves[10],10,vi->curveatt_8000Hz,vi->peakpre,vi->peakpost); + for(j=0;j<9;j++){ + p->tonecurves[i][j]=malloc(EHMER_MAX*sizeof(double)); + p->noisecurves[i][j]=malloc(EHMER_MAX*sizeof(double)); + } - for(i=1;i<11;i+=2) - for(j=0;j<9;j++) - interp_curve_dB(p->curves[i][j],p->curves[i-1][j],p->curves[i+1][j],.5); + memcpy(p->tonecurves[0][2],tone_250_40dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[0][4],tone_250_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[0][6],tone_250_80dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[0][8],tone_250_80dB_SL,sizeof(double)*EHMER_MAX); + + memcpy(p->tonecurves[2][2],tone_500_40dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[2][4],tone_500_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[2][6],tone_500_80dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[2][8],tone_500_100dB_SL,sizeof(double)*EHMER_MAX); + + memcpy(p->tonecurves[4][2],tone_1000_40dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[4][4],tone_1000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[4][6],tone_1000_80dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[4][8],tone_1000_100dB_SL,sizeof(double)*EHMER_MAX); + + memcpy(p->tonecurves[6][2],tone_2000_40dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[6][4],tone_2000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[6][6],tone_2000_80dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[6][8],tone_2000_100dB_SL,sizeof(double)*EHMER_MAX); + + memcpy(p->tonecurves[8][2],tone_4000_40dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[8][4],tone_4000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[8][6],tone_4000_80dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[8][8],tone_4000_100dB_SL,sizeof(double)*EHMER_MAX); + + memcpy(p->tonecurves[10][2],tone_8000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[10][4],tone_8000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[10][6],tone_8000_80dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->tonecurves[10][8],tone_8000_100dB_SL,sizeof(double)*EHMER_MAX); + + + memcpy(p->noisecurves[0][2],noise_500_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[0][4],noise_500_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[0][6],noise_500_80dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[0][8],noise_500_80dB_SL,sizeof(double)*EHMER_MAX); + + memcpy(p->noisecurves[2][2],noise_500_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[2][4],noise_500_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[2][6],noise_500_80dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[2][8],noise_500_80dB_SL,sizeof(double)*EHMER_MAX); + + memcpy(p->noisecurves[4][2],noise_1000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[4][4],noise_1000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[4][6],noise_1000_80dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[4][8],noise_1000_80dB_SL,sizeof(double)*EHMER_MAX); + + memcpy(p->noisecurves[6][2],noise_2000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[6][4],noise_2000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[6][6],noise_2000_80dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[6][8],noise_2000_80dB_SL,sizeof(double)*EHMER_MAX); + + memcpy(p->noisecurves[8][2],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[8][4],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[8][6],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[8][8],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX); + + memcpy(p->noisecurves[10][2],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[10][4],noise_4000_60dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[10][6],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX); + memcpy(p->noisecurves[10][8],noise_4000_80dB_SL,sizeof(double)*EHMER_MAX); + + setup_curve(p->tonecurves[0],0,vi->toneatt_250Hz); + setup_curve(p->tonecurves[2],2,vi->toneatt_500Hz); + setup_curve(p->tonecurves[4],4,vi->toneatt_1000Hz); + setup_curve(p->tonecurves[6],6,vi->toneatt_2000Hz); + setup_curve(p->tonecurves[8],8,vi->toneatt_4000Hz); + setup_curve(p->tonecurves[10],10,vi->toneatt_8000Hz); + + setup_curve(p->noisecurves[0],0,vi->noiseatt_250Hz); + setup_curve(p->noisecurves[2],2,vi->noiseatt_500Hz); + setup_curve(p->noisecurves[4],4,vi->noiseatt_1000Hz); + setup_curve(p->noisecurves[6],6,vi->noiseatt_2000Hz); + setup_curve(p->noisecurves[8],8,vi->noiseatt_4000Hz); + setup_curve(p->noisecurves[10],10,vi->noiseatt_8000Hz); + for(i=1;i<11;i+=2) + for(j=0;j<9;j++){ + interp_curve_dB(p->tonecurves[i][j], + p->tonecurves[i-1][j], + p->tonecurves[i+1][j],.5); + interp_curve_dB(p->noisecurves[i][j], + p->noisecurves[i-1][j], + p->noisecurves[i+1][j],.5); + } } void _vp_psy_clear(vorbis_look_psy *p){ @@ -239,21 +283,42 @@ void _vp_psy_clear(vorbis_look_psy *p){ if(p){ if(p->ath)free(p->ath); if(p->octave)free(p->octave); - if(p->pre)free(p->pre); - if(p->post)free(p->post); - if(p->curves){ + if(p->noisecurves){ for(i=0;i<11;i++){ - for(j=0;j<9;j++) - free(p->curves[i][j]); - free(p->curves[i]); + for(j=0;j<9;j++){ + free(p->tonecurves[i][j]); + free(p->noisecurves[i][j]); + } + free(p->noisecurves[i]); + free(p->tonecurves[i]); } - free(p->curves); + free(p->tonecurves); + free(p->noisecurves); } memset(p,0,sizeof(vorbis_look_psy)); } } -static double _eights[EHMER_MAX]={ +static void compute_decay(vorbis_look_psy *p,double *f, double *decay, int n){ + int i; + /* handle decay */ + if(p->vi->decayp && decay){ + double decscale=1.-pow(p->vi->decay_coeff,n); + double attscale=1.-pow(p->vi->attack_coeff,n); + for(i=0;i<n;i++){ + double del=f[i]-decay[i]; + if(del>0) + /* add energy */ + decay[i]+=del*attscale; + else + /* remove energy */ + decay[i]+=del*decscale; + if(decay[i]>f[i])f[i]=decay[i]; + } + } +} + +static double _eights[EHMER_MAX+1]={ .2500000000000000000,.2726269331663144148, .2973017787506802667,.3242098886627524165, .3535533905932737622,.3855527063519852059, @@ -283,15 +348,14 @@ static double _eights[EHMER_MAX]={ 22.62741699796952076,24.67537320652705316, 26.90868528811886536,29.34412938254947939}; -static double seed_peaks(double *floor,double **curve, - double amp,double specmax, - int *pre,int *post, - int x,int n,double specatt){ +static void seed_peaks(double *floor, + double **curves, + double amp,double specmax, + int x,int n,double specatt){ int i; - int ix=x*_eights[0]; - int prevx=pre[ix]; + double x16=x*(1./16.); + int prevx=x*_eights[0]-x16; int nextx; - double ret=0.; /* make this attenuation adjustable */ int choice=rint((todB(amp)-specmax+specatt)/10.)-2; @@ -300,67 +364,108 @@ static double seed_peaks(double *floor,double **curve, for(i=0;i<EHMER_MAX;i++){ if(prevx<n){ - double lin=curve[choice][i]; - ix=x*_eights[i]; - nextx=(ix<n?post[ix]:n); + double lin=curves[choice][i]; + nextx=x*_eights[i]+x16; + nextx=(nextx<n?nextx:n); if(lin){ - /* Currently uses a n+n = +3dB additivity */ - lin*=amp; - lin*=lin; - - floor[prevx]+=lin; - if(nextx==prevx){ - if(nextx+1<n)floor[nextx+1]-=lin; - }else{ - if(nextx<n)floor[nextx]-=lin; - } - if(i==EHMER_OFFSET || prevx==x)ret+=lin; - if(nextx==x)ret-=lin; + lin*=amp; + if(floor[prevx]<lin)floor[prevx]=lin; } prevx=nextx; } } - return(ret); } -static void add_seeds(double *floor,int n){ - int i; - double acc=0.; - for(i=0;i<n;i++){ - acc+=floor[i]; - floor[i]=(acc<=0.?0.:sqrt(acc)); - } -} - -/* Why Bark scale for encoding but not masking? Because masking has a - strong harmonic dependancy */ -static void _vp_tone_tone_iter(vorbis_look_psy *p, - double *f, - double *flr, double *mask, - double specmax){ +static void seed_generic(vorbis_look_psy *p, + double ***curves, + double *f, + double *flr, + double specmax){ vorbis_info_psy *vi=p->vi; long n=p->n,i; - double acc=0.; - - memset(flr,0,sizeof(double)*n); - + /* prime the working vector with peak values */ /* Use the 250 Hz curve up to 250 Hz and 8kHz curve after 8kHz. */ + for(i=0;i<n;i++) + if(f[i]>flr[i]) + seed_peaks(flr,curves[p->octave[i]],f[i], + specmax,i,n,vi->max_curve_dB); +} + +/* bleaugh, this is more complicated than it needs to be */ +static void max_seeds(vorbis_look_psy *p,double *flr){ + long n=p->n,i,j; + long *posstack=alloca(n*sizeof(long)); + double *ampstack=alloca(n*sizeof(double)); + long stack=0; + for(i=0;i<n;i++){ - /*acc+=flr[i]; XXX acc is behaving incorrectly. Check it */ - if(f[i]>mask[i]){ - int o=rint(p->octave[i]*2.); - if(o<0)o=0; - if(o>10)o=10; - - /*acc+=*/seed_peaks(flr,p->curves[o],f[i], - specmax,p->pre,p->post,i,n,vi->max_curve_dB); + if(stack<2){ + posstack[stack]=i; + ampstack[stack++]=flr[i]; + }else{ + while(1){ + if(flr[i]<ampstack[stack-1]){ + posstack[stack]=i; + ampstack[stack++]=flr[i]; + break; + }else{ + if(i<posstack[stack-1]*17/15){ + if(stack>1 && ampstack[stack-1]<ampstack[stack-2] && + i<posstack[stack-2]*17/15){ + /* we completely overlap, making stack-1 irrelevant. pop it */ + stack--; + continue; + } + } + posstack[stack]=i; + ampstack[stack++]=flr[i]; + break; + + } + } } } - /* chase curves down from the peak seeds */ - add_seeds(flr,n); - + /* the stack now contains only the positions that are relevant. Scan + 'em straight through */ + { + long pos=0; + for(i=0;i<stack;i++){ + long endpos; + if(i<stack-1 && ampstack[i+1]>ampstack[i]){ + endpos=posstack[i+1]; + }else{ + endpos=posstack[i]*17/15; + } + for(j=pos;j<endpos;j++)flr[j]=ampstack[i]; + pos=endpos; + } + } + + /* there. Linear time. I now remember this was on a problem set I + had in Grad Skool... I didn't solve it at the time ;-) */ +} + +#define noiseBIAS 5 +static void third_octave_noise(vorbis_look_psy *p,double *f,double *noise){ + long i,n=p->n; + long lo=0,hi=0; + double acc=0.; + + for(i=0;i<n;i++){ + /* not exactly correct, (the center frequency should be centered + on a *log* scale), but not worth quibbling */ + long newhi=i*7/5+noiseBIAS; + long newlo=i*5/7-noiseBIAS; + if(newhi>n)newhi=n; + + for(;lo<newlo;lo++) + acc-=todB(f[lo]); /* yeah, this ain't RMS */ + for(;hi<newhi;hi++) + acc+=todB(f[hi]); + noise[i]=fromdB(acc/(hi-lo)); + } } /* stability doesn't matter */ @@ -371,11 +476,75 @@ static int comp(const void *a,const void *b){ return(-1); } +static int frameno=-1; +void _vp_compute_mask(vorbis_look_psy *p,double *f, + double *flr, + double *decay){ + double *noise=alloca(sizeof(double)*p->n); + double *work=alloca(sizeof(double)*p->n); + int i,n=p->n; + double specmax=0.; + + frameno++; + + /* don't use the smoothed data for noise */ + third_octave_noise(p,f,noise); + + /* compute, update and apply decay accumulator */ + for(i=0;i<n;i++)work[i]=fabs(f[i]); + compute_decay(p,work,decay,n); + + if(p->vi->smoothp){ + /* compute power^.5 of three neighboring bins to smooth for peaks + that get split twixt bins/peaks that nail the bin. This evens + out treatment as we're not doing additive masking any longer. */ + double acc=work[0]*work[0]+work[1]*work[1]; + double prev=work[0]; + + work[0]=sqrt(acc); + for(i=1;i<n-1;i++){ + double this=work[i]; + acc+=work[i+1]*work[i+1]; + work[i]=sqrt(acc); + acc-=prev*prev; + prev=this; + } + work[n-1]=sqrt(acc); + } + + /* find the highest peak so we know the limits */ + for(i=0;i<n;i++){ + if(work[i]>specmax)specmax=work[i]; + } + specmax=todB(specmax); + + /* mask off the ATH */ + if(p->vi->athp) + for(i=0;i<n;i++) + flr[i]=p->ath[i]; + else + for(i=0;i<n;i++) + flr[i]=0.; + + /* seed the tone masking */ + if(p->vi->tonemaskp) + seed_generic(p,p->tonecurves,work,flr,specmax); + + /* seed the noise masking */ + if(p->vi->noisemaskp) + seed_generic(p,p->noisecurves,noise,flr,specmax); + + /* chase the seeds */ + max_seeds(p,flr); + +} + + /* this applies the floor and (optionally) tries to preserve noise energy in low resolution portions of the spectrum */ /* f and flr are *linear* scale, not dB */ void _vp_apply_floor(vorbis_look_psy *p,double *f, - double *flr,double *mask){ + double *flr){ double *work=alloca(p->n*sizeof(double)); double thresh=fromdB(p->vi->noisefit_threshdB); int i,j,addcount=0; @@ -385,14 +554,8 @@ void _vp_apply_floor(vorbis_look_psy *p,double *f, for(j=0;j<p->n;j++){ if(flr[j]<=0) work[j]=0.; - else{ - double val=rint(f[j]/flr[j]); - if(mask[j]>flr[j] && fabs(f[j])<mask[j]){ - work[j]=0; - }else{ - work[j]=val; - } - } + else + work[j]=rint(f[j]/flr[j]); } /* look at spectral energy levels. Noise is noise; sensation level @@ -416,7 +579,8 @@ void _vp_apply_floor(vorbis_look_psy *p,double *f, double y=(f[i]*f[i]); original_SL+=y; if(work[i]){ - current_SL+=y; + double qy=(work[i]*flr[i]); + current_SL+=qy*qy; }else{ index[z++]=f+i; } @@ -431,17 +595,14 @@ void _vp_apply_floor(vorbis_look_psy *p,double *f, for(j=0;j<z;j++){ int p=index[j]-f; - /* yes, I mean *mask* here, not floor. This should only be - being applied in areas where noise dominates masking; - otherwise we don't want to be adding tones back. */ - double val=mask[p]*mask[p]+current_SL; + double val=flr[p]*flr[p]+current_SL; if(val<original_SL){ addcount++; if(f[p]>0) - work[p]=mask[p]/flr[p]; + work[p]=flr[p]; else - work[p]=-mask[p]/flr[p]; + work[p]=-flr[p]; current_SL=val; }else break; @@ -452,98 +613,3 @@ void _vp_apply_floor(vorbis_look_psy *p,double *f, memcpy(f,work,p->n*sizeof(double)); } -void _vp_tone_tone_mask(vorbis_look_psy *p,double *f, - double *flr, double *mask, - double *decay){ - double *iter=alloca(sizeof(double)*p->n); - int i,j,n=p->n; - double specmax=0.; - - for(i=0;i<n;i++)iter[i]=fabs(f[i]); - f=iter; - iter=alloca(sizeof(double)*p->n); - - /* handle decay */ - if(p->vi->decayp && decay){ - double decscale=1.-pow(p->vi->decay_coeff,n); - double attscale=1.-pow(p->vi->attack_coeff,n); - for(i=0;i<n;i++){ - double del=f[i]-decay[i]; - if(del>0) - /* add energy */ - decay[i]+=del*attscale; - else - /* remove energy */ - decay[i]+=del*decscale; - if(decay[i]>f[i])f[i]=decay[i]; - } - } - - for(i=0;i<n;i++){ - if(f[i]>specmax)specmax=f[i]; - } - specmax=todB(specmax); - - /* do the peak att with rolloff hack to the mask */ - memset(mask,0,sizeof(double)*n); - if(p->vi->peakattp){ - double curmask; - - /* chase down from peaks forward */ - curmask=0.; - for(i=0;i<n-1;i++){ - if(f[i]>curmask){ - double val=todB(f[i]); - int o=p->octave[i]; - int choice=rint((val-specmax+p->vi->max_curve_dB)/20.)-1; - if(o<0)o=0; - if(o>5)o=5; - if(choice<0)choice=0; - if(choice>4)choice=4; - val=fromdB(val+p->vi->peakatt[o][choice]); - if(val>curmask)curmask=val; - } - mask[i]=curmask; - /* roll off the curmask */ - curmask*=fromdB(p->vi->peakpost*(p->octave[i+1]-p->octave[i])); - } - /* chase down from peaks backward */ - curmask=0.; - for(i=n-1;i>0;i--){ - if(mask[i]>curmask) - curmask=mask[i]; - else - mask[i]=curmask; - /* roll off the curmask */ - curmask*=fromdB(p->vi->peakpre*(p->octave[i]-p->octave[i-1])); - } - } - /* mask off the ATH */ - if(p->vi->athp) - for(i=0;i<n;i++) - if(mask[i]<p->ath[i]) - mask[i]=p->ath[i]; - - /* perform iterative additive tone-tone masking */ - - for(i=0;i<p->vi->curve_fit_iterations;i++){ - if(i==0) - _vp_tone_tone_iter(p,f,flr,mask,specmax); - else{ - _vp_tone_tone_iter(p,iter,flr,mask,specmax); - } - if(i!=p->vi->curve_fit_iterations-1){ - for(j=0;j<n;j++) - if(f[j]<mask[j] || f[j]<flr[j]) - iter[j]=0.; - else - iter[j]=f[j]; - } - } - - for(i=0;i<n;i++) - if(mask[i]<flr[i]) - mask[i]=flr[i]; - -} - @@ -12,7 +12,7 @@ ******************************************************************** function: random psychoacoustics (not including preecho) - last mod: $Id: psy.h,v 1.11.2.2.2.3 2000/04/21 16:35:39 xiphmont Exp $ + last mod: $Id: psy.h,v 1.11.2.2.2.4 2000/05/04 06:13:28 xiphmont Exp $ ********************************************************************/ @@ -28,12 +28,11 @@ typedef struct { int n; struct vorbis_info_psy *vi; - double ***curves; + double ***tonecurves; + double ***noisecurves; double *ath; - int *pre; - double *octave; - int *post; + int *octave; } vorbis_look_psy; @@ -41,11 +40,11 @@ extern void _vp_psy_init(vorbis_look_psy *p,vorbis_info_psy *vi,int n,long rat extern void _vp_psy_clear(vorbis_look_psy *p); extern void *_vi_psy_dup(void *source); extern void _vi_psy_free(vorbis_info_psy *i); -extern void _vp_tone_tone_mask(vorbis_look_psy *p,double *f, - double *floor, double *mask, +extern void _vp_compute_mask(vorbis_look_psy *p,double *f, + double *floor, double *decay); extern void _vp_apply_floor(vorbis_look_psy *p,double *f, - double *flr,double *mask); + double *flr); #endif diff --git a/lib/psytune.c b/lib/psytune.c index 0221553e..3903aab6 100644 --- a/lib/psytune.c +++ b/lib/psytune.c @@ -13,7 +13,7 @@ function: simple utility that runs audio through the psychoacoustics without encoding - last mod: $Id: psytune.c,v 1.1.2.2.2.7 2000/05/02 00:28:58 xiphmont Exp $ + last mod: $Id: psytune.c,v 1.1.2.2.2.8 2000/05/04 06:13:28 xiphmont Exp $ ********************************************************************/ @@ -31,29 +31,31 @@ #include "lpc.h" static vorbis_info_psy _psy_set0={ - 1,1,1, - - 0,8,4., + 1,/*athp*/ + 1,/*decayp*/ + 1,/*smoothp*/ + 1,8,0., -130., - {-40.,-40.,-60.,-80.,-85.}, - {-40.,-40.,-60.,-80.,-100.}, - {-40.,-40.,-60.,-80.,-100.}, - {-40.,-40.,-60.,-80.,-100.}, - {-40.,-40.,-60.,-80.,-100.}, - {-70.,-70.,-70.,-80.,-100.}, - - 1, - {{-6.,-8.,-12.,-16.,-18.}, - {-6.,-8.,-12.,-16.,-18.}, - {-6.,-8.,-12.,-16.,-18.}, - {-6.,-8.,-12.,-16.,-20.}, - {-6.,-8.,-12.,-16.,-20.}, - {-6.,-8.,-12.,-16.,-18.},}, - -80.,-40., - - 100., + 1,/* tonemaskp*/ + {-35.,-40.,-60.,-80.,-80.}, /* remember that el 4 is an 80 dB curve, not 100 */ + {-35.,-40.,-60.,-80.,-95.}, + {-35.,-40.,-60.,-80.,-95.}, + {-35.,-40.,-60.,-80.,-95.}, + {-35.,-40.,-60.,-80.,-95.}, + {-65.,-60.,-60.,-80.,-95.}, /* remember that el 1 is a 60 dB curve, not 40 */ + + 1,/*noisemaskp*/ + {-100.,-100.,-100.,-200.,-200.}, /* this is the 500 Hz curve, which + is too wrong to work */ + {-60.,-60.,-60.,-80.,-80.}, + {-60.,-60.,-60.,-80.,-80.}, + {-60.,-60.,-60.,-80.,-80.}, + {-60.,-60.,-60.,-80.,-80.}, + {-50.,-55.,-60.,-80.,-80.}, + + 110., .9998, .9997 /* attack/decay control */ }; @@ -132,7 +134,7 @@ int main(int argc,char *argv[]){ int framesize=2048; int order=32; - double *pcm[2],*out[2],*window,*mask,*decay[2],*lpc,*floor; + double *pcm[2],*out[2],*window,*decay[2],*lpc,*floor; signed char *buffer,*buffer2; mdct_lookup m_look; vorbis_look_psy p_look; @@ -187,7 +189,6 @@ int main(int argc,char *argv[]){ out[1]=calloc(framesize/2,sizeof(double)); decay[0]=calloc(framesize/2,sizeof(double)); decay[1]=calloc(framesize/2,sizeof(double)); - mask=malloc(framesize*sizeof(double)); floor=malloc(framesize*sizeof(double)); lpc=malloc(order*sizeof(double)); buffer=malloc(framesize*4); @@ -199,7 +200,10 @@ int main(int argc,char *argv[]){ for(i=0;i<11;i++) for(j=0;j<9;j++) - analysis("Pcurve",i*10+j,p_look.curves[i][j],EHMER_MAX,0,1); + analysis("Ptonecurve",i*10+j,p_look.tonecurves[i][j],EHMER_MAX,0,1); + for(i=0;i<11;i++) + for(j=0;j<9;j++) + analysis("Pnoisecurve",i*10+j,p_look.noisecurves[i][j],EHMER_MAX,0,1); /* we cheat on the WAV header; we just bypass 44 bytes and never verify that it matches 16bit/stereo/44.1kHz. */ @@ -233,7 +237,6 @@ int main(int argc,char *argv[]){ analysis("pre",frameno,pcm[i],framesize,0,0); /* do the psychacoustics */ - memset(mask,0,sizeof(double)*framesize/2); for(j=0;j<framesize;j++) pcm[i][j]*=window[j]; @@ -241,9 +244,8 @@ int main(int argc,char *argv[]){ analysis("mdct",frameno,pcm[i],framesize/2,1,1); - _vp_tone_tone_mask(&p_look,pcm[i],floor,mask,decay[i]); + _vp_compute_mask(&p_look,pcm[i],floor,decay[i]); - analysis("mask",frameno,mask,framesize/2,1,1); analysis("prefloor",frameno,floor,framesize/2,1,1); analysis("decay",frameno,decay[i],framesize/2,1,1); @@ -251,7 +253,7 @@ int main(int argc,char *argv[]){ _lpc_to_curve(floor,lpc,sqrt(amp),&floorlook,"Ffloor",frameno); analysis("floor",frameno,floor,framesize/2,1,1); - _vp_apply_floor(&p_look,pcm[i],floor,mask); + _vp_apply_floor(&p_look,pcm[i],floor); /* re-add floor */ for(j=0;j<framesize/2;j++){ |