summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/analysis.c51
1 files changed, 31 insertions, 20 deletions
diff --git a/src/analysis.c b/src/analysis.c
index 1d6dd829..2e3913c9 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -432,6 +432,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
float alpha, alphaE, alphaE2;
float frame_loudness;
float bandwidth_mask;
+ int is_masked[NB_TBANDS+1];
int bandwidth=0;
float maxE = 0;
float noise_floor;
@@ -449,7 +450,9 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
alpha = 1.f/IMIN(10, 1+tonal->count);
alphaE = 1.f/IMIN(25, 1+tonal->count);
- alphaE2 = 1.f/IMIN(500, 1+tonal->count);
+ /* Noise floor related decay for bandwidth detection: -2.2 dB/second */
+ alphaE2 = 1.f/IMIN(100, 1+tonal->count);
+ if (tonal->count <= 1) alphaE2 = 1;
if (tonal->Fs == 48000)
{
@@ -722,6 +725,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
for (b=0;b<NB_TBANDS;b++)
{
float E=0;
+ float Em;
int band_start, band_end;
/* Keep a margin of 300 Hz for aliasing */
band_start = tbands[b];
@@ -735,40 +739,47 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
E = SCALE_ENER(E);
maxE = MAX32(maxE, E);
tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
- E = MAX32(E, tonal->meanE[b]);
- /* Use a simple follower with 13 dB/Bark slope for spreading function */
- bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
+ Em = MAX32(E, tonal->meanE[b]);
/* Consider the band "active" only if all these conditions are met:
- 1) less than 10 dB below the simple follower
- 2) less than 90 dB below the peak band (maximal masking possible considering
+ 1) less than 90 dB below the peak band (maximal masking possible considering
both the ATH and the loudness-dependent slope of the spreading function)
- 3) above the PCM quantization noise floor
+ 2) above the PCM quantization noise floor
We use b+1 because the first CELT band isn't included in tbands[]
*/
- if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start))
+ if (E*1e9f > maxE && (Em > 3*noise_floor*(band_end-band_start) || E > noise_floor*(band_end-band_start)))
bandwidth = b+1;
+ /* Check if the band is masked (see below). */
+ is_masked[b] = E < (tonal->prev_bandwidth >= b+1 ? .01f : .05f)*bandwidth_mask;
+ /* Use a simple follower with 13 dB/Bark slope for spreading function. */
+ bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
}
/* Special case for the last two bands, for which we don't have spectrum but only
- the energy above 12 kHz. */
+ the energy above 12 kHz. The difficulty here is that the high-pass we use
+ leaks some LF energy, so we need to increase the threshold without accidentally cutting
+ off the band. */
if (tonal->Fs == 48000) {
- float ratio;
- float E = hp_ener*(1.f/(240*240));
- ratio = tonal->prev_bandwidth==20 ? 0.03f : 0.07f;
+ float noise_ratio;
+ float Em;
+ float E = hp_ener*(1.f/(60*60));
+ noise_ratio = tonal->prev_bandwidth==20 ? 10.f : 30.f;
+
#ifdef FIXED_POINT
/* silk_resampler_down2_hp() shifted right by an extra 8 bits. */
E *= 256.f*(1.f/Q15ONE)*(1.f/Q15ONE);
#endif
- maxE = MAX32(maxE, E);
tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
- E = MAX32(E, tonal->meanE[b]);
- /* Use a simple follower with 13 dB/Bark slope for spreading function */
- bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
- if (E>ratio*bandwidth_mask && E*1e9f > maxE && E > noise_floor*160)
- bandwidth = 20;
- /* This detector is unreliable, so if the bandwidth is close to SWB, assume it's FB. */
- if (bandwidth >= 17)
+ Em = MAX32(E, tonal->meanE[b]);
+ if (Em > 3*noise_ratio*noise_floor*160 || E > noise_ratio*noise_floor*160)
bandwidth = 20;
+ /* Check if the band is masked (see below). */
+ is_masked[b] = E < (tonal->prev_bandwidth == 20 ? .01f : .05f)*bandwidth_mask;
}
+ /* In some cases, resampling aliasing can create a small amount of energy in the first band
+ being cut. So if the last band is masked, we don't include it. */
+ if (bandwidth == 20 && is_masked[NB_TBANDS])
+ bandwidth-=2;
+ else if (bandwidth > 0 && bandwidth <= NB_TBANDS && is_masked[bandwidth-1])
+ bandwidth--;
if (tonal->count<=2)
bandwidth = 20;
frame_loudness = 20*(float)log10(frame_loudness);