diff options
author | Koen Vos <koen.vos@skype.net> | 2012-07-12 14:36:38 -0400 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2012-10-10 09:37:10 -0400 |
commit | 9cbbcb53ae4b9123ce9687c787f61a546b43a581 (patch) | |
tree | c386faa2d71b4699f87743709008ae505b35e324 /silk/float | |
parent | 1bcf028af8ff0346bf44ee530f41e3822fe5ee23 (diff) | |
download | opus-9cbbcb53ae4b9123ce9687c787f61a546b43a581.tar.gz |
Improvements to the pitch search
Normalizes the cost function by (x+y) instead of sqrt(x*y)
Diffstat (limited to 'silk/float')
-rw-r--r-- | silk/float/pitch_analysis_core_FLP.c | 101 | ||||
-rw-r--r-- | silk/float/wrappers_FLP.c | 2 |
2 files changed, 47 insertions, 56 deletions
diff --git a/silk/float/pitch_analysis_core_FLP.c b/silk/float/pitch_analysis_core_FLP.c index fbff90c3..94b0fa4e 100644 --- a/silk/float/pitch_analysis_core_FLP.c +++ b/silk/float/pitch_analysis_core_FLP.c @@ -37,7 +37,6 @@ POSSIBILITY OF SUCH DAMAGE. #include "pitch_est_defines.h" #define SCRATCH_SIZE 22 -#define eps 1.192092896e-07f /************************************************************/ /* Internally used functions */ @@ -129,8 +128,6 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, max_lag_4kHz = PE_MAX_LAG_MS * 4; max_lag_8kHz = PE_MAX_LAG_MS * 8 - 1; - silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5)); - /* Resample from input sampled at Fs_kHz to 8 kHz */ if( Fs_kHz == 16 ) { /* Resample to 16 -> 8 khz */ @@ -164,6 +161,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, /****************************************************************************** * FIRST STAGE, operating in 4 khz ******************************************************************************/ + silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5)); target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ]; for( k = 0; k < nb_subfr >> 1; k++ ) { /* Check that we are within range of the array */ @@ -178,12 +176,14 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, /* Calculate first vector products before loop */ cross_corr = silk_inner_product_FLP( target_ptr, basis_ptr, sf_length_8kHz ); - normalizer = silk_energy_FLP( basis_ptr, sf_length_8kHz ) + sf_length_8kHz * 4000.0f; + normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + + silk_energy_FLP( basis_ptr, sf_length_8kHz ) + + sf_length_8kHz * 4000.0f; - C[ 0 ][ min_lag_4kHz ] += (silk_float)(cross_corr / sqrt(normalizer)); + C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer ); /* From now on normalizer is computed recursively */ - for(d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++) { + for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) { basis_ptr--; /* Check that we are within range of the array */ @@ -196,7 +196,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, normalizer += basis_ptr[ 0 ] * (double)basis_ptr[ 0 ] - basis_ptr[ sf_length_8kHz ] * (double)basis_ptr[ sf_length_8kHz ]; - C[ 0 ][ d ] += (silk_float)(cross_corr / sqrt( normalizer )); + C[ 0 ][ d ] += (silk_float)( 2 * cross_corr / normalizer ); } /* Update target pointer */ target_ptr += sf_length_8kHz; @@ -214,13 +214,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, /* Escape if correlation is very low already here */ Cmax = C[ 0 ][ min_lag_4kHz ]; - target_ptr = &frame_4kHz[ silk_SMULBB( sf_length_4kHz, nb_subfr ) ]; - energy = 1000.0f; - for( i = 0; i < silk_LSHIFT( sf_length_4kHz, 2 ); i++ ) { - energy += target_ptr[i] * (double)target_ptr[i]; - } - threshold = Cmax * Cmax; - if( energy / 16.0f > threshold ) { + if( Cmax < 0.2f ) { silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); *LTPCorr = 0.0f; *lagIndex = 0; @@ -287,14 +281,14 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ]; } for( k = 0; k < nb_subfr; k++ ) { - energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ); + energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 1.0; for( j = 0; j < length_d_comp; j++ ) { d = d_comp[ j ]; basis_ptr = target_ptr - d; cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz ); - energy = silk_energy_FLP( basis_ptr, sf_length_8kHz ); if( cross_corr > 0.0f ) { - C[ k ][ d ] = (silk_float)(cross_corr * cross_corr / (energy * energy_tmp + eps)); + energy = silk_energy_FLP( basis_ptr, sf_length_8kHz ); + C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) ); } else { C[ k ][ d ] = 0.0f; } @@ -317,7 +311,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, } else if( Fs_kHz == 16 ) { prevLag = silk_RSHIFT( prevLag, 1 ); } - prevLag_log2 = silk_log2((silk_float)prevLag); + prevLag_log2 = silk_log2( (silk_float)prevLag ); } else { prevLag_log2 = 0; } @@ -356,23 +350,20 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, CBimax_new = i; } } - CCmax_new = silk_max_float(CCmax_new, 0.0f); /* To avoid taking square root of negative number later */ - CCmax_new_b = CCmax_new; /* Bias towards shorter lags */ - lag_log2 = silk_log2((silk_float)d); - CCmax_new_b -= PE_SHORTLAG_BIAS * nb_subfr * lag_log2; + lag_log2 = silk_log2( (silk_float)d ); + CCmax_new_b = CCmax_new - PE_SHORTLAG_BIAS * nb_subfr * lag_log2; /* Bias towards previous lag */ if( prevLag > 0 ) { delta_lag_log2_sqr = lag_log2 - prevLag_log2; delta_lag_log2_sqr *= delta_lag_log2_sqr; - CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / (delta_lag_log2_sqr + 0.5f); + CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / ( delta_lag_log2_sqr + 0.5f ); } - if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ - CCmax_new > nb_subfr * search_thres2 * search_thres2 && /* Correlation needs to be high enough to be voiced */ - silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= min_lag_8kHz /* Lag must be in range */ + if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ + CCmax_new > nb_subfr * search_thres2 /* Correlation needs to be high enough to be voiced */ ) { CCmax_b = CCmax_new_b; CCmax = CCmax_new; @@ -390,6 +381,10 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, return 1; } + /* Output normalized correlation */ + *LTPCorr = (silk_float)( CCmax / nb_subfr ); + silk_assert( *LTPCorr >= 0.0f ); + if( Fs_kHz > 8 ) { /* Search in original signal */ @@ -406,8 +401,6 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, end_lag = silk_min_int( lag + 2, max_lag ); lag_new = lag; /* to avoid undefined lag */ CBimax = 0; /* to avoid undefined lag */ - silk_assert( CCmax >= 0.0f ); - *LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */ CCmax = -1000.0f; @@ -430,25 +423,25 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; } + target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; + energy_tmp = silk_energy_FLP( target_ptr, nb_subfr * sf_length ) + 1.0; for( d = start_lag; d <= end_lag; d++ ) { for( j = 0; j < nb_cbk_search; j++ ) { cross_corr = 0.0; - energy = eps; + energy = energy_tmp; for( k = 0; k < nb_subfr; k++ ) { - energy += energies_st3[ k ][ j ][ lag_counter ]; cross_corr += cross_corr_st3[ k ][ j ][ lag_counter ]; + energy += energies_st3[ k ][ j ][ lag_counter ]; } if( cross_corr > 0.0 ) { - CCmax_new = (silk_float)(cross_corr * cross_corr / energy); + CCmax_new = (silk_float)( 2 * cross_corr / energy ); /* Reduce depending on flatness of contour */ CCmax_new *= 1.0f - contour_bias * j; } else { CCmax_new = 0.0f; } - if( CCmax_new > CCmax && - ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag - ) { + if( CCmax_new > CCmax && ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) { CCmax = CCmax_new; lag_new = d; CBimax = j; @@ -464,12 +457,10 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, *lagIndex = (opus_int16)( lag_new - min_lag ); *contourIndex = (opus_int8)CBimax; } else { /* Fs_kHz == 8 */ - /* Save Lags and correlation */ - silk_assert( CCmax >= 0.0f ); - *LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */ + /* Save Lags */ for( k = 0; k < nb_subfr; k++ ) { pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); - pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * Fs_kHz ); + pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 ); } *lagIndex = (opus_int16)( lag - min_lag_8kHz ); *contourIndex = (opus_int8)CBimax; @@ -479,6 +470,19 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, return 0; } +/*********************************************************************** +/* Calculates the correlations used in stage 3 search. In order to cover +/* the whole lag codebook for all the searched offset lags (lag +- 2), +/* the following correlations are needed in each sub frame: +/* +/* sf1: lag range [-8,...,7] total 16 correlations +/* sf2: lag range [-4,...,4] total 9 correlations +/* sf3: lag range [-3,....4] total 8 correltions +/* sf4: lag range [-6,....8] total 15 correlations +/* +/* In total 48 correlations. The direct implementation computed in worst +/* case 4*12*5 = 240 correlations, but more likely around 120. +/***********************************************************************/ static void silk_P_Ana_calc_corr_st3( silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ const silk_float frame[], /* I vector to correlate */ @@ -487,19 +491,6 @@ static void silk_P_Ana_calc_corr_st3( opus_int nb_subfr, /* I number of subframes */ opus_int complexity /* I Complexity setting */ ) - /*********************************************************************** - Calculates the correlations used in stage 3 search. In order to cover - the whole lag codebook for all the searched offset lags (lag +- 2), - the following correlations are needed in each sub frame: - - sf1: lag range [-8,...,7] total 16 correlations - sf2: lag range [-4,...,4] total 9 correlations - sf3: lag range [-3,....4] total 8 correltions - sf4: lag range [-6,....8] total 15 correlations - - In total 48 correlations. The direct implementation computed in worst case - 4*12*5 = 240 correlations, but more likely around 120. - **********************************************************************/ { const silk_float *target_ptr, *basis_ptr; opus_int i, j, k, lag_counter, lag_low, lag_high; @@ -552,6 +543,10 @@ static void silk_P_Ana_calc_corr_st3( } } +/********************************************************************/ +/* Calculate the energies for first two subframes. The energies are */ +/* calculated recursively. */ +/********************************************************************/ static void silk_P_Ana_calc_energy_st3( silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ const silk_float frame[], /* I vector to correlate */ @@ -560,10 +555,6 @@ static void silk_P_Ana_calc_energy_st3( opus_int nb_subfr, /* I number of subframes */ opus_int complexity /* I Complexity setting */ ) -/**************************************************************** -Calculate the energies for first two subframes. The energies are -calculated recursively. -****************************************************************/ { const silk_float *target_ptr, *basis_ptr; double energy; diff --git a/silk/float/wrappers_FLP.c b/silk/float/wrappers_FLP.c index 55004259..4259e90e 100644 --- a/silk/float/wrappers_FLP.c +++ b/silk/float/wrappers_FLP.c @@ -155,7 +155,7 @@ void silk_NSQ_wrapper_FLP( /* Convert input to fix */ for( i = 0; i < psEnc->sCmn.frame_length; i++ ) { - x_Q3[ i ] = silk_float2int( 8.0 * x[ i ] ); + x_Q3[ i ] = silk_float2int( 8.0f * x[ i ] ); } /* Call NSQ */ |