diff options
Diffstat (limited to 'libavcodec/aacpsy.c')
-rw-r--r-- | libavcodec/aacpsy.c | 155 |
1 files changed, 110 insertions, 45 deletions
diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c index 6cfe3e32ea..a5474b9383 100644 --- a/libavcodec/aacpsy.c +++ b/libavcodec/aacpsy.c @@ -2,20 +2,20 @@ * AAC encoder psychoacoustic model * Copyright (C) 2008 Konstantin Shishkov * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -25,6 +25,8 @@ */ #include "libavutil/attributes.h" +#include "libavutil/libm.h" + #include "avcodec.h" #include "aactab.h" #include "psymodel.h" @@ -216,6 +218,10 @@ static const float psy_fir_coeffs[] = { -5.52212e-17 * 2, -0.313819 * 2 }; +#if ARCH_MIPS +# include "mips/aacpsy_mips.h" +#endif /* ARCH_MIPS */ + /** * Calculate the ABR attack threshold from the above LAME psymodel table. */ @@ -294,7 +300,7 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) { int i, j, g, start; float prev, minscale, minath, minsnr, pe_min; const int chan_bitrate = ctx->avctx->bit_rate / ctx->avctx->channels; - const int bandwidth = ctx->avctx->cutoff ? ctx->avctx->cutoff : ctx->avctx->sample_rate / 2; + const int bandwidth = ctx->avctx->cutoff ? ctx->avctx->cutoff : AAC_CUTOFF(ctx->avctx); const float num_bark = calc_bark((float)bandwidth); ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext)); @@ -337,7 +343,7 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) { coeff->spread_low[1] = pow(10.0, -bark_width * en_spread_low); coeff->spread_hi [1] = pow(10.0, -bark_width * en_spread_hi); pe_min = bark_pe * bark_width; - minsnr = pow(2.0f, pe_min / band_sizes[g]) - 1.5f; + minsnr = exp2(pe_min / band_sizes[g]) - 1.5f; coeff->min_snr = av_clipf(1.0f / minsnr, PSY_SNR_25DB, PSY_SNR_1DB); } start = 0; @@ -350,9 +356,9 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) { } } - pctx->ch = av_mallocz(sizeof(AacPsyChannel) * ctx->avctx->channels); + pctx->ch = av_mallocz_array(ctx->avctx->channels, sizeof(AacPsyChannel)); if (!pctx->ch) { - av_freep(&pctx); + av_freep(&ctx->model_priv_data); return AVERROR(ENOMEM); } @@ -532,8 +538,11 @@ static float calc_reduction_3gpp(float a, float desired_pe, float pe, { float thr_avg, reduction; - thr_avg = powf(2.0f, (a - pe) / (4.0f * active_lines)); - reduction = powf(2.0f, (a - desired_pe) / (4.0f * active_lines)) - thr_avg; + if(active_lines == 0.0) + return 0; + + thr_avg = exp2f((a - pe) / (4.0f * active_lines)); + reduction = exp2f((a - desired_pe) / (4.0f * active_lines)) - thr_avg; return FFMAX(reduction, 0.0f); } @@ -544,8 +553,10 @@ static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr, float thr = band->thr; if (band->energy > thr) { - thr = powf(thr, 0.25f) + reduction; - thr = powf(thr, 4.0f); + thr = sqrtf(thr); + thr = sqrtf(thr) + reduction; + thr *= thr; + thr *= thr; /* This deviates from the 3GPP spec to match the reference encoder. * It performs min(thr_reduced, max(thr, energy/min_snr)) only for bands @@ -561,6 +572,52 @@ static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr, return thr; } +#ifndef calc_thr_3gpp +static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch, + const uint8_t *band_sizes, const float *coefs) +{ + int i, w, g; + int start = 0; + for (w = 0; w < wi->num_windows*16; w += 16) { + for (g = 0; g < num_bands; g++) { + AacPsyBand *band = &pch->band[w+g]; + + float form_factor = 0.0f; + float Temp; + band->energy = 0.0f; + for (i = 0; i < band_sizes[g]; i++) { + band->energy += coefs[start+i] * coefs[start+i]; + form_factor += sqrtf(fabs(coefs[start+i])); + } + Temp = band->energy > 0 ? sqrtf((float)band_sizes[g] / band->energy) : 0; + band->thr = band->energy * 0.001258925f; + band->nz_lines = form_factor * sqrtf(Temp); + + start += band_sizes[g]; + } + } +} +#endif /* calc_thr_3gpp */ + +#ifndef psy_hp_filter +static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs) +{ + int i, j; + for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) { + float sum1, sum2; + sum1 = firbuf[i + (PSY_LAME_FIR_LEN - 1) / 2]; + sum2 = 0.0; + for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) { + sum1 += psy_fir_coeffs[j] * (firbuf[i + j] + firbuf[i + PSY_LAME_FIR_LEN - j]); + sum2 += psy_fir_coeffs[j + 1] * (firbuf[i + j + 1] + firbuf[i + PSY_LAME_FIR_LEN - j - 1]); + } + /* NOTE: The LAME psymodel expects it's input in the range -32768 to 32768. + * Tuning this for normalized floats would be difficult. */ + hpfsmpl[i] = (sum1 + sum2) * 32768.0f; + } +} +#endif /* psy_hp_filter */ + /** * Calculate band thresholds as suggested in 3GPP TS26.403 */ @@ -569,9 +626,8 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, { AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data; AacPsyChannel *pch = &pctx->ch[channel]; - int start = 0; int i, w, g; - float desired_bits, desired_pe, delta_pe, reduction, spread_en[128] = {0}; + float desired_bits, desired_pe, delta_pe, reduction= NAN, spread_en[128] = {0}; float a = 0.0f, active_lines = 0.0f, norm_fac = 0.0f; float pe = pctx->chan_bitrate > 32000 ? 0.0f : FFMAX(50.0f, 100.0f - pctx->chan_bitrate * 100.0f / 32000.0f); const int num_bands = ctx->num_bands[wi->num_windows == 8]; @@ -580,22 +636,8 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, const float avoid_hole_thr = wi->num_windows == 8 ? PSY_3GPP_AH_THR_SHORT : PSY_3GPP_AH_THR_LONG; //calculate energies, initial thresholds and related values - 5.4.2 "Threshold Calculation" - for (w = 0; w < wi->num_windows*16; w += 16) { - for (g = 0; g < num_bands; g++) { - AacPsyBand *band = &pch->band[w+g]; - - float form_factor = 0.0f; - band->energy = 0.0f; - for (i = 0; i < band_sizes[g]; i++) { - band->energy += coefs[start+i] * coefs[start+i]; - form_factor += sqrtf(fabs(coefs[start+i])); - } - band->thr = band->energy * 0.001258925f; - band->nz_lines = form_factor / powf(band->energy / band_sizes[g], 0.25f); + calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs); - start += band_sizes[g]; - } - } //modify thresholds and energies - spread, threshold in quiet, pre-echo control for (w = 0; w < wi->num_windows*16; w += 16) { AacPsyBand *bands = &pch->band[w]; @@ -681,7 +723,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, } desired_pe_no_ah = FFMAX(desired_pe - (pe - pe_no_ah), 0.0f); if (active_lines > 0.0f) - reduction += calc_reduction_3gpp(a, desired_pe_no_ah, pe_no_ah, active_lines); + reduction = calc_reduction_3gpp(a, desired_pe_no_ah, pe_no_ah, active_lines); pe = 0.0f; for (w = 0; w < wi->num_windows*16; w += 16) { @@ -691,7 +733,10 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, if (active_lines > 0.0f) band->thr = calc_reduced_thr_3gpp(band, coeffs[g].min_snr, reduction); pe += calc_pe_3gpp(band); - band->norm_fac = band->active_lines / band->thr; + if (band->thr > 0.0f) + band->norm_fac = band->active_lines / band->thr; + else + band->norm_fac = 0.0f; norm_fac += band->norm_fac; } } @@ -711,7 +756,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, float delta_sfb_pe = band->norm_fac * norm_fac * delta_pe; float thr = band->thr; - thr *= powf(2.0f, delta_sfb_pe / band->active_lines); + thr *= exp2f(delta_sfb_pe / band->active_lines); if (thr > coeffs[g].min_snr * band->energy && band->avoid_holes == PSY_3GPP_AH_INACTIVE) thr = FFMAX(band->thr, coeffs[g].min_snr * band->energy); band->thr = thr; @@ -742,6 +787,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, psy_band->threshold = band->thr; psy_band->energy = band->energy; + psy_band->spread = band->active_lines * 2.0f / band_sizes[g]; } } @@ -791,6 +837,7 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, int grouping = 0; int uselongblock = 1; int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 }; + float clippings[AAC_NUM_BLOCKS_SHORT]; int i; FFPsyWindowInfo wi = { { 0 } }; @@ -801,21 +848,10 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS]; float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 }; const float *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN); - int j, att_sum = 0; + int att_sum = 0; /* LAME comment: apply high pass filter of fs/4 */ - for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) { - float sum1, sum2; - sum1 = firbuf[i + (PSY_LAME_FIR_LEN - 1) / 2]; - sum2 = 0.0; - for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) { - sum1 += psy_fir_coeffs[j] * (firbuf[i + j] + firbuf[i + PSY_LAME_FIR_LEN - j]); - sum2 += psy_fir_coeffs[j + 1] * (firbuf[i + j + 1] + firbuf[i + PSY_LAME_FIR_LEN - j - 1]); - } - /* NOTE: The LAME psymodel expects its input in the range -32768 to - * 32768. Tuning this for normalized floats would be difficult. */ - hpfsmpl[i] = (sum1 + sum2) * 32768.0f; - } + psy_hp_filter(firbuf, hpfsmpl, psy_fir_coeffs); /* Calculate the energies of each sub-shortblock */ for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) { @@ -891,14 +927,35 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, lame_apply_block_type(pch, &wi, uselongblock); + /* Calculate input sample maximums and evaluate clipping risk */ + if (audio) { + for (i = 0; i < AAC_NUM_BLOCKS_SHORT; i++) { + const float *wbuf = audio + i * AAC_BLOCK_SIZE_SHORT; + float max = 0; + int j; + for (j = 0; j < AAC_BLOCK_SIZE_SHORT; j++) + max = FFMAX(max, fabsf(wbuf[j])); + clippings[i] = max; + } + } else { + for (i = 0; i < 8; i++) + clippings[i] = 0; + } + wi.window_type[1] = prev_type; if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) { + float clipping = 0.0f; + wi.num_windows = 1; wi.grouping[0] = 1; if (wi.window_type[0] == LONG_START_SEQUENCE) wi.window_shape = 0; else wi.window_shape = 1; + + for (i = 0; i < 8; i++) + clipping = FFMAX(clipping, clippings[i]); + wi.clipping[0] = clipping; } else { int lastgrp = 0; @@ -909,6 +966,14 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio, lastgrp = i; wi.grouping[lastgrp]++; } + + for (i = 0; i < 8; i += wi.grouping[i]) { + int w; + float clipping = 0.0f; + for (w = 0; w < wi.grouping[i] && !clipping; w++) + clipping = FFMAX(clipping, clippings[i+w]); + wi.clipping[i] = clipping; + } } /* Determine grouping, based on the location of the first attack, and save for |