summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2016-11-06 13:51:49 -0500
committerJean-Marc Valin <jmvalin@jmvalin.ca>2016-11-23 17:20:47 -0500
commitd51c3250623404e443830be504504d689dce5b6f (patch)
tree30bd6b0e7cc5fdb74333e9e2ce1c86968e91f92c
parent8fe210f14b040880829682a9f3f90b02993b4a64 (diff)
downloadopus-d51c3250623404e443830be504504d689dce5b6f.tar.gz
Makes analysis run at 24 kHz, with 20-ms frames
-rw-r--r--celt/arch.h2
-rw-r--r--celt/celt.h1
-rw-r--r--src/analysis.c176
-rw-r--r--src/analysis.h11
-rw-r--r--src/mlp_data.c186
-rw-r--r--src/mlp_train.c14
-rw-r--r--src/opus_encoder.c90
-rw-r--r--src/opus_private.h6
8 files changed, 337 insertions, 149 deletions
diff --git a/celt/arch.h b/celt/arch.h
index 9eb37d8f..9eedf74d 100644
--- a/celt/arch.h
+++ b/celt/arch.h
@@ -101,6 +101,7 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
typedef opus_int16 opus_val16;
typedef opus_int32 opus_val32;
+typedef opus_int64 opus_val64;
typedef opus_val32 celt_sig;
typedef opus_val16 celt_norm;
@@ -158,6 +159,7 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
typedef float opus_val16;
typedef float opus_val32;
+typedef float opus_val64;
typedef float celt_sig;
typedef float celt_norm;
diff --git a/celt/celt.h b/celt/celt.h
index 863a0644..d69cd44c 100644
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -57,6 +57,7 @@ typedef struct {
float noisiness;
float activity;
float music_prob;
+ float vad_prob;
int bandwidth;
float activity_probability;
} AnalysisInfo;
diff --git a/src/analysis.c b/src/analysis.c
index b704fb4c..3ec53ff4 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -100,17 +100,13 @@ static const float analysis_window[240] = {
};
static const int tbands[NB_TBANDS+1] = {
- 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
+ 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 136, 160, 192, 240
};
static const int extra_bands[NB_TOT_BANDS+1] = {
- 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200
+ 2, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 136, 160, 192, 240
};
-/*static const float tweight[NB_TBANDS+1] = {
- .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
-};*/
-
#define NB_TONAL_SKIP_BANDS 9
@@ -141,7 +137,8 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
if (curr_lookahead<0)
curr_lookahead += DETECT_SIZE;
- if (len > 480 && pos != tonal->write_pos)
+ /* On long frames, look at the second analysis window rather than the first. */
+ if (len > 960 && pos != tonal->write_pos)
{
pos++;
if (pos==DETECT_SIZE)
@@ -152,18 +149,27 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
if (pos<0)
pos = DETECT_SIZE-1;
OPUS_COPY(info_out, &tonal->info[pos], 1);
+ /* If possible, look ahead for a tone to compensate for the delay in the tone detector. */
+ for (i=0;i<3;i++)
+ {
+ pos++;
+ if (pos==DETECT_SIZE)
+ pos = 0;
+ if (pos == tonal->write_pos)
+ break;
+ info_out->tonality = MAX32(0, -.03 + MAX32(info_out->tonality, tonal->info[pos].tonality-.05));
+ }
tonal->read_subframe += len/120;
- while (tonal->read_subframe>=4)
+ while (tonal->read_subframe>=8)
{
- tonal->read_subframe -= 4;
+ tonal->read_subframe -= 8;
tonal->read_pos++;
}
if (tonal->read_pos>=DETECT_SIZE)
tonal->read_pos-=DETECT_SIZE;
- /* Compensate for the delay in the features themselves.
- FIXME: Need a better estimate the 10 I just made up */
- curr_lookahead = IMAX(curr_lookahead-10, 0);
+ /* The -1 is to compensate for the delay in the features themselves. */
+ curr_lookahead = IMAX(curr_lookahead-1, 0);
psum=0;
/* Summing the probability of transition patterns that involve music at
@@ -173,7 +179,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
for (;i<DETECT_SIZE;i++)
psum += tonal->pspeech[i];
psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
- /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/
+ /*printf("%f %f %f %f %f\n", psum, info_out->music_prob, info_out->vad_prob, info_out->activity_probability, info_out->tonality);*/
info_out->music_prob = psum;
}
@@ -216,19 +222,28 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
float noise_floor;
int remaining;
AnalysisInfo *info;
+ float hp_ener;
+ float tonality2[240];
+ float midE[8];
+ float spec_variability=0;
SAVE_STACK;
tonal->last_transition++;
- alpha = 1.f/IMIN(20, 1+tonal->count);
- alphaE = 1.f/IMIN(50, 1+tonal->count);
- alphaE2 = 1.f/IMIN(1000, 1+tonal->count);
+ alpha = 1.f/IMIN(10, 1+tonal->count);
+ alphaE = 1.f/IMIN(25, 1+tonal->count);
+ alphaE2 = 1.f/IMIN(500, 1+tonal->count);
+
+ /* len and offset are now at 24 kHz. */
+ len/= 2;
+ offset /= 2;
if (tonal->count<4)
tonal->music_prob = .5;
kfft = celt_mode->mdct.kfft[0];
if (tonal->count==0)
tonal->mem_fill = 240;
- downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
+ tonal->hp_ener_accum += downmix(x, &tonal->inmem[tonal->mem_fill], tonal->downmix_state,
+ IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C);
if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE)
{
tonal->mem_fill += len;
@@ -236,6 +251,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
RESTORE_STACK;
return;
}
+ hp_ener = tonal->hp_ener_accum;
info = &tonal->info[tonal->write_pos++];
if (tonal->write_pos>=DETECT_SIZE)
tonal->write_pos-=DETECT_SIZE;
@@ -254,7 +270,8 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
}
OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240);
remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
- downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
+ tonal->hp_ener_accum = downmix(x, &tonal->inmem[240], tonal->downmix_state,
+ remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
tonal->mem_fill = 240 + remaining;
opus_fft(kfft, in, out, tonal->arch);
#ifndef FIXED_POINT
@@ -296,14 +313,21 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
mod2 *= mod2;
mod2 *= mod2;
- avg_mod = .25f*(d2A[i]+2.f*mod1+mod2);
+ avg_mod = .25f*(d2A[i]+mod1+2*mod2);
+ /* This introduces an extra delay of 2 frames in the detection. */
tonality[i] = 1.f/(1.f+40.f*16.f*pi4*avg_mod)-.015f;
+ /* No delay on this detection, but it's less reliable. */
+ tonality2[i] = 1.f/(1.f+40.f*16.f*pi4*mod2)-.015f;
A[i] = angle2;
dA[i] = d_angle2;
d2A[i] = mod2;
}
-
+ for (i=2;i<N2-1;i++)
+ {
+ float tt = MIN32(tonality2[i], MAX32(tonality2[i-1], tonality2[i+1]));
+ tonality[i] = .9*MAX32(tonality[i], tt-.1);
+ }
frame_tonality = 0;
max_frame_tonality = 0;
/*tw_sum = 0;*/
@@ -334,7 +358,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
binE *= 5.55e-17f;
#endif
E += binE;
- tE += binE*tonality[i];
+ tE += binE*MAX32(0, tonality[i]);
nE += binE*2.f*(.5f-noisiness[i]);
}
#ifndef FIXED_POINT
@@ -352,12 +376,24 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
frame_loudness += (float)sqrt(E+1e-10f);
logE[b] = (float)log(E+1e-10f);
- tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f);
- tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f);
- if (tonal->highE[b] < tonal->lowE[b]+1.f)
+ tonal->logE[tonal->E_count][b] = logE[b];
+ if (tonal->count==0)
+ tonal->highE[b] = tonal->lowE[b] = logE[b];
+ if (tonal->highE[b] > tonal->lowE[b] + 7.5)
{
- tonal->highE[b]+=.5f;
- tonal->lowE[b]-=.5f;
+ if (tonal->highE[b] - logE[b] > logE[b] - tonal->lowE[b])
+ tonal->highE[b] -= .01;
+ else
+ tonal->lowE[b] += .01;
+ }
+ if (logE[b] > tonal->highE[b])
+ {
+ tonal->highE[b] = logE[b];
+ tonal->lowE[b] = MAX32(tonal->highE[b]-15, tonal->lowE[b]);
+ } else if (logE[b] < tonal->lowE[b])
+ {
+ tonal->lowE[b] = logE[b];
+ tonal->highE[b] = MIN32(tonal->lowE[b]+15, tonal->highE[b]);
}
relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]);
@@ -391,6 +427,26 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
tonal->prev_band_tonality[b] = band_tonality[b];
}
+ for (i=0;i<NB_FRAMES;i++)
+ {
+ int j;
+ float mindist = 1e15;
+ for (j=0;j<NB_FRAMES;j++)
+ {
+ int k;
+ float dist=0;
+ for (k=0;k<NB_TBANDS;k++)
+ {
+ float tmp;
+ tmp = tonal->logE[i][k] - tonal->logE[j][k];
+ dist += tmp*tmp;
+ }
+ if (j!=i)
+ mindist = MIN32(mindist, dist);
+ }
+ spec_variability += mindist;
+ }
+ spec_variability = sqrt(spec_variability/NB_FRAMES/NB_TBANDS);
bandwidth_mask = 0;
bandwidth = 0;
maxE = 0;
@@ -426,10 +482,26 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start))
bandwidth = b;
}
+ /* Special case for the last two bands, for which we don't have spectrum but only
+ the energy above 12 kHz. */
+ {
+ float E = hp_ener*(1./(240*240));
+#ifdef FIXED_POINT
+ /* silk_resampler_down2_hp() shifted right by an extra 8 bits. */
+ E *= ((opus_int32)1 << 2*SIG_SHIFT)*256.f;
+#endif
+ maxE = MAX32(maxE, E);
+ tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E);
+ E = MAX32(E, tonal->meanE[b]);
+ /* Use a simple follower with 13 dB/Bark slope for spreading function */
+ bandwidth_mask = MAX32(.05f*bandwidth_mask, E);
+ if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*160)
+ bandwidth = 20;
+ }
if (tonal->count<=2)
bandwidth = 20;
frame_loudness = 20*(float)log10(frame_loudness);
- tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness);
+ tonal->Etracker = MAX32(tonal->Etracker-.003f, frame_loudness);
tonal->lowECount *= (1-alphaE);
if (frame_loudness < tonal->Etracker-30)
tonal->lowECount += alphaE;
@@ -441,6 +513,13 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
sum += dct_table[i*16+b]*logE[b];
BFCC[i] = sum;
}
+ for (i=0;i<8;i++)
+ {
+ float sum=0;
+ for (b=0;b<16;b++)
+ sum += dct_table[i*16+b]*.5*(tonal->highE[b]+tonal->lowE[b]);
+ midE[i] = sum;
+ }
frame_stationarity /= NB_TBANDS;
relativeE /= NB_TBANDS;
@@ -479,6 +558,8 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
for (i=0;i<9;i++)
tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i];
}
+ for (i=0;i<4;i++)
+ features[i] = BFCC[i]-midE[i];
for (i=0;i<8;i++)
{
@@ -489,6 +570,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
}
for (i=0;i<9;i++)
features[11+i] = (float)sqrt(tonal->std[i]) - std_feature_bias[i];
+ features[18] = spec_variability-.78;;
features[20] = info->tonality - 0.154723;
features[21] = info->activity - 0.724643;
features[22] = frame_stationarity - 0.743717;
@@ -503,8 +585,6 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
/* Probability of active audio (as opposed to silence) */
frame_probs[1] = .5f*frame_probs[1]+.5f;
frame_probs[1] *= frame_probs[1];
- /* Consider that silence has a 50-50 probability. */
- frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f;
/* Probability of speech or music vs noise */
info->activity_probability = frame_probs[1];
@@ -527,12 +607,32 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
float music0;
float p, q;
+ /* More silence transitions for speech than for music. */
+ tau = .001f*tonal->music_prob + .01f*(1-tonal->music_prob);
+ p = MAX16(.05f,MIN16(.95f,frame_probs[1]));
+ q = MAX16(.05f,MIN16(.95f,tonal->vad_prob));
+ beta = .02f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
+ /* p0 and p1 are the probabilities of speech and music at this frame
+ using only information from previous frame and applying the
+ state transition model */
+ p0 = (1-tonal->vad_prob)*(1-tau) + tonal->vad_prob *tau;
+ p1 = tonal->vad_prob *(1-tau) + (1-tonal->vad_prob)*tau;
+ /* We apply the current probability with exponent beta to work around
+ the fact that the probability estimates aren't independent. */
+ p0 *= (float)pow(1-frame_probs[1], beta);
+ p1 *= (float)pow(frame_probs[1], beta);
+ /* Normalise the probabilities to get the Marokv probability of music. */
+ tonal->vad_prob = p1/(p0+p1);
+ info->vad_prob = tonal->vad_prob;
+ /* Consider that silence has a 50-50 probability of being speech or music. */
+ frame_probs[0] = tonal->vad_prob*frame_probs[0] + (1-tonal->vad_prob)*.5f;
+
/* One transition every 3 minutes of active audio */
- tau = .00005f*frame_probs[1];
+ tau = .0001f;
/* Adapt beta based on how "unexpected" the new prob is */
p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
- beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
+ beta = .02f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
/* p0 and p1 are the probabilities of speech and music at this frame
using only information from previous frame and applying the
state transition model */
@@ -546,6 +646,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
tonal->music_prob = p1/(p0+p1);
info->music_prob = tonal->music_prob;
+ /*printf("%f %f %f %f\n", frame_probs[0], frame_probs[1], tonal->music_prob, tonal->vad_prob);*/
/* This chunk of code deals with delayed decision. */
psum=1e-20f;
/* Instantaneous probability of speech and music, with beta pre-applied. */
@@ -617,9 +718,11 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
#else
info->music_prob = 0;
#endif
- /*for (i=0;i<25;i++)
+#ifdef MLP_TRAINING
+ for (i=0;i<25;i++)
printf("%f ", features[i]);
- printf("\n");*/
+ printf("\n");
+#endif
info->bandwidth = bandwidth;
/*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/
@@ -635,6 +738,7 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
int offset;
int pcm_len;
+ analysis_frame_size -= analysis_frame_size&1;
if (analysis_pcm != NULL)
{
/* Avoid overflow/wrap-around of the analysis buffer */
@@ -643,9 +747,9 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
pcm_len = analysis_frame_size - analysis->analysis_offset;
offset = analysis->analysis_offset;
while (pcm_len>0) {
- tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
- offset += 480;
- pcm_len -= 480;
+ tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(960, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+ offset += 960;
+ pcm_len -= 960;
}
analysis->analysis_offset = analysis_frame_size;
diff --git a/src/analysis.h b/src/analysis.h
index 86bd6340..ce9a989f 100644
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -33,11 +33,14 @@
#define NB_FRAMES 8
#define NB_TBANDS 18
-#define NB_TOT_BANDS 21
+#define NB_TOT_BANDS 19
#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */
#define DETECT_SIZE 200
+/* Uncomment this to print the MLP features on stdout. */
+/*#define MLP_TRAINING*/
+
typedef struct {
int arch;
#define TONALITY_ANALYSIS_RESET_START angle
@@ -49,13 +52,15 @@ typedef struct {
float prev_band_tonality[NB_TBANDS];
float prev_tonality;
float E[NB_FRAMES][NB_TBANDS];
+ float logE[NB_FRAMES][NB_TBANDS];
float lowE[NB_TBANDS];
float highE[NB_TBANDS];
- float meanE[NB_TOT_BANDS];
+ float meanE[NB_TOT_BANDS+1];
float mem[32];
float cmean[8];
float std[9];
float music_prob;
+ float vad_prob;
float Etracker;
float lowECount;
int E_count;
@@ -76,6 +81,8 @@ typedef struct {
int write_pos;
int read_pos;
int read_subframe;
+ float hp_ener_accum;
+ opus_val32 downmix_state[3];
AnalysisInfo info[DETECT_SIZE];
} TonalityAnalysisState;
diff --git a/src/mlp_data.c b/src/mlp_data.c
index 3222bece..ac18ab1b 100644
--- a/src/mlp_data.c
+++ b/src/mlp_data.c
@@ -4,104 +4,104 @@
#include "mlp.h"
-/* RMS error was 0.230027, seed was 1452289367 */
-/* 0.009100 0.069938 (0.230027 0.230027) 1.24058e-07 5543 */
+/* RMS error was 0.307058, seed was 1479787111 */
+/* 0.006588 0.033188 (0.307072 0.307058) 3.61895e-07 6271 10 */
static const float weights[450] = {
/* hidden layer */
--1.20927f, -0.0275523f, 0.0304442f, -0.071791f, -0.0897356f,
-0.100996f, -0.0492634f, 0.070213f, 0.0187071f, 0.0042668f,
-0.0644589f, -0.10967f, -0.119688f, -0.00888386f, 0.170952f,
-0.174562f, -0.265435f, -0.0635892f, -0.284755f, -1.06453f,
-0.202855f, 2.31084f, -2.763f, -0.420894f, 0.698811f,
-6.46418f, 0.0662341f, 0.0758173f, 0.0511722f, 0.0426484f,
-0.115711f, -0.263815f, -0.0113386f, -0.189737f, -0.0929912f,
--0.287827f, 0.0925463f, 0.0286792f, -0.0199793f, -0.193071f,
-0.258586f, 0.018504f, 0.116125f, 0.099269f, -0.00781962f,
--0.266017f, 0.283733f, 10.5488f, -0.658286f, 0.836758f,
-13.1168f, -5.02553f, -1.0969f, -0.0738116f, 0.0204736f,
-0.0110775f, -0.00198985f, 0.00426824f, 0.148998f, 0.0755275f,
-0.112213f, -0.0518501f, 0.028398f, 0.0240943f, -0.0503666f,
--0.149506f, -0.133575f, -0.137328f, 0.116275f, 0.238077f,
-0.080265f, 0.0387349f, 0.09185f, 4.04867f, 3.2435f,
--0.7155f, 8.14792f, -29.8969f, 1.1575f, -0.124794f,
-0.0226943f, -0.0470538f, -0.0334476f, 0.0360859f, 0.0447789f,
--0.00258532f, -0.0192054f, -0.113082f, 0.109513f, -0.0437787f,
-0.0382349f, -0.00994462f, -0.155653f, 0.171922f, -0.222151f,
--0.523565f, -0.0454432f, -0.556888f, 0.761537f, -2.70075f,
--0.883015f, 0.887168f, 0.746329f, -0.363477f, 0.360424f,
-0.034755f, -0.015404f, 0.00688472f, -0.00949269f, 0.0625642f,
--0.050711f, 0.0370223f, 0.0149561f, 0.060385f, -0.0709806f,
--0.036509f, 0.099007f, -0.0397276f, 0.285237f, 0.127836f,
--0.15154f, 0.265848f, -0.0832318f, 0.0520659f, 0.897805f,
-0.439215f, -3.00803f, 1.93755f, -0.408725f, 0.300142f,
--1.42001f, 0.118794f, -0.04621f, 0.050757f, -0.0239654f,
--0.0629488f, -0.0083243f, -0.108989f, -0.0326831f, 0.104277f,
--0.0667274f, 0.0475941f, 0.069182f, -0.0574944f, -0.137823f,
--0.206978f, -0.162035f, -0.208444f, 0.141751f, -0.289377f,
--0.7875f, 0.0911f, 0.174999f, -2.03406f, 3.06743f,
-1.22255f, 2.10659f, 0.0779022f, -0.220946f, 0.137124f,
--0.0625512f, -0.073468f, 0.174861f, -0.139417f, 0.0967417f,
-0.0830658f, -0.223662f, 0.103016f, -0.102317f, 0.225611f,
-0.154375f, 0.187856f, -0.00878193f, 0.128648f, -0.371477f,
--0.479037f, 0.156541f, 1.10304f, -1.26162f, 0.086939f,
--0.143269f, 2.18318f, -2.88831f, 0.101126f, -0.308315f,
-0.222068f, -0.227709f, -0.00855236f, 0.0107035f, 0.00774349f,
--0.0185316f, 0.0306039f, -0.233612f, 0.0807309f, -0.029933f,
-0.151942f, -0.267724f, 0.0484763f, 0.132192f, -0.230059f,
-0.357879f, 0.075414f, 0.110637f, -1.27818f, 3.3101f,
-0.831064f, -0.212367f, -20.704f, -1.1492f, 0.0312941f,
--0.0208507f, -0.00804196f, 0.0110407f, 0.027599f, 0.00193594f,
--0.0135057f, -0.00614977f, 0.0505432f, -0.0108098f, 0.000826042f,
--0.0243765f, -0.323055f, 0.0682748f, -0.55873f, -0.103042f,
-0.174935f, -0.126558f, -0.104518f, 0.422479f, -0.0683178f,
--1.44811f, 0.702109f, 0.712138f, -0.420112f, 2.59746f,
--0.0297689f, -0.0453044f, -0.0330312f, -0.0344518f, -0.0260442f,
--0.0610515f, 0.0916816f, 0.0256295f, -0.105187f, 0.0771212f,
--0.0898792f, -0.186163f, -0.321019f, -0.225689f, 0.175825f,
-0.252939f, 0.738898f, 2.41919f, 0.114505f, -0.314026f,
-0.607983f, 1.73201f, -2.09609f, -0.609339f, 1.18997f,
-0.113871f, -0.177673f, -0.0785783f, -0.348033f, -0.0949274f,
--0.0191062f, 0.335823f, -0.0578655f, 0.131259f, -0.118687f,
--0.132123f, -0.239624f, 0.000738732f, -0.185936f, -0.13077f,
--0.436439f, -0.141664f, 0.0353391f, -0.0536557f, -0.0964537f,
-0.221853f, 1.94264f, -1.78544f, 3.8254f, 3.74598f,
-2.37071f, -1.42709f, 0.0463179f, -0.0568602f, 0.0529534f,
--0.103245f, -0.340972f, 0.101934f, -0.810811f, 0.176158f,
-0.469658f, 0.0248864f, -0.10734f, -0.143827f, -0.0457131f,
-0.779219f, -0.142152f, 0.0394297f, 0.160772f, -0.707623f,
--0.608236f, 1.07106f, -1.27037f, 2.27722f, 6.3688f,
-0.519837f, -3.33262f, -0.126443f, -0.0943922f, 0.0265837f,
-0.0620709f, 0.0113266f, -0.255811f, -0.0735781f, -0.0638952f,
--0.09543f, -0.204965f, 0.00454999f, 0.0554974f, -0.16251f,
--0.573836f, 0.258764f, 0.19895f, 0.0219289f, -0.376757f,
--0.508578f, -0.0767061f, -0.654512f, 4.48901f, 3.38949f,
--2.34533f, -11.0766f, 4.35799f, 1.66794f, -0.0513934f,
--0.0685787f, -0.0112154f, 0.000464661f, -0.234848f, -0.338596f,
--0.142242f, -0.167476f, -0.140324f, -0.104829f, -0.104195f,
-0.0110351f, -0.112668f, 0.0872292f, -0.170777f, -0.0876985f,
-0.123348f, -0.156758f, 0.199038f, -0.056107f, 0.899269f,
-0.0820197f, -1.295f, 0.0295294f, 2.27577f, -0.940993f,
--0.0100104f, -0.111541f, -0.132193f, -0.11037f, 0.0371375f,
--0.0180172f, -0.0105591f, 0.0197043f, 0.04099f, -0.0538671f,
--0.102347f, -0.0470742f, 0.178034f, -0.267772f, -0.105789f,
--0.105376f, 0.0623262f, -0.042906f, 0.176528f, -0.160076f,
--2.28483f, -1.92619f, 0.218149f, 9.67107f, 3.30399f,
--1.75951f, 0.129671f, 0.118305f, 0.140766f, 0.0678099f,
-0.00313175f, -0.0144533f, -0.0310217f, -0.0245139f, 0.136948f,
-0.150137f, 0.112326f, -0.0755033f, -0.280984f, -0.249342f,
--0.681657f, 0.0315246f, 0.294968f, 0.0407062f, 0.282759f,
--0.344185f, -7.32828f, -0.220036f, -0.560418f, -1.87191f,
--7.10132f,
+0.346452f, -0.000986403f, 0.0238469f, 0.00787237f, -0.00327841f,
+-0.00144448f, -0.00834152f, 0.00662626f, 0.00827114f, -0.00625709f,
+-0.0111221f, -0.0101708f, -0.0220237f, -0.00579342f, 0.0148173f,
+-0.0502682f, 0.0129419f, -0.0202994f, -0.039075f, 0.0529568f,
+0.00938264f, -0.667579f, 0.817718f, 0.212861f, -0.592366f,
+0.182686f, 0.104437f, 0.0418449f, 0.0561753f, 0.186168f,
+0.0837228f, -0.0311366f, 0.00600564f, -0.0454366f, -0.0530324f,
+-0.0216303f, -0.0526483f, -0.0203131f, 0.0659832f, 0.0568102f,
+0.00508217f, -0.0504551f, -0.109038f, -0.0575989f, -0.0545253f,
+-0.499059f, -0.00813707f, 4.57328f, 0.565364f, -2.05258f,
+7.44677f, -1.00112f, -0.438463f, -0.0253055f, -0.0719549f,
+-0.0384969f, -0.0320396f, 0.0327494f, 0.0315827f, 0.0106198f,
+0.00800117f, 0.0197368f, 0.042733f, 0.0292928f, 0.00690012f,
+-0.0710759f, -0.00229299f, 0.123645f, -0.0151665f, 0.113013f,
+0.109826f, 0.20492f, 0.0322878f, 0.50752f, -0.767495f,
+-0.182978f, -0.288763f, -0.340734f, 0.0473468f, 0.0397243f,
+0.0974594f, 0.0526624f, 0.0725072f, -0.0974313f, -0.21646f,
+-0.134403f, -0.0713687f, 0.0559797f, 0.0945215f, -0.037486f,
+0.170421f, -0.299144f, 0.119146f, -0.0586984f, -0.0649505f,
+0.147221f, 0.213973f, -0.370208f, 0.0286924f, 2.32318f,
+-1.11646f, -1.33634f, -1.3209f, -1.33462f, 0.851591f,
+-0.0387395f, 0.00394112f, 0.0538956f, 0.0142521f, 0.0826941f,
+-0.00891185f, 0.031608f, -0.0519062f, 0.0840755f, 0.00711923f,
+0.0553999f, -0.0140473f, 0.289961f, 0.0159519f, 0.408393f,
+0.0706347f, -0.0821191f, 0.094304f, 0.258852f, -0.0872749f,
+-1.31161f, 1.76215f, 1.1734f, -0.218017f, 0.200685f,
+0.113221f, 0.0438638f, -0.11757f, -0.00730474f, -0.034559f,
+-0.0527583f, 0.124873f, -0.0630957f, 0.0199118f, 0.0204139f,
+-0.072775f, 0.0698309f, 0.109033f, 0.418322f, 0.0581625f,
+0.445942f, -0.0967659f, -0.475703f, -0.0364438f, -0.25813f,
+-0.0526156f, 1.95193f, -1.89237f, -0.2298f, 0.484577f,
+-0.70406f, -3.37848f, 0.037487f, 0.0842667f, 0.0862521f,
+0.0203154f, -0.0523732f, -0.120697f, -0.0424331f, -0.0358556f,
+-0.0237288f, -0.0563953f, 0.0413413f, 0.0725612f, 0.484948f,
+-0.0337245f, 0.0919432f, -0.0418379f, -0.289835f, 0.108197f,
+-0.731154f, 0.046635f, -0.0115669f, 1.96781f, 0.22253f,
+-1.44544f, -70.1674f, 1.56784f, -0.0231552f, 0.0226579f,
+0.0368208f, -0.182944f, -0.0664526f, -0.0419537f, -0.0599649f,
+0.177215f, -0.032529f, 0.0417212f, -0.020503f, -0.169923f,
+-0.00776405f, -0.0282886f, 0.582991f, 0.0255885f, -0.0161892f,
+0.0409108f, 1.13725f, 0.149326f, -3.03502f, 4.07061f,
+-1.22442f, -0.955553f, -0.652578f, -0.442688f, -0.0290697f,
+-0.246261f, -0.0545186f, -0.211004f, 0.0218397f, 0.0645374f,
+-0.0239089f, 0.0607779f, -0.00397812f, 0.103708f, 0.0654522f,
+0.0402445f, -0.161771f, -0.119996f, 0.0662066f, -0.0221619f,
+0.156356f, 0.158121f, -0.453376f, 0.100038f, -0.0380598f,
+-0.655184f, -0.874463f, 0.441085f, -0.0189581f, -1.64911f,
+0.116019f, 0.0778868f, 0.022377f, 0.0567152f, -0.0536254f,
+-0.0525457f, -0.0163194f, -0.023485f, -0.0900654f, -0.042401f,
+-0.0452115f, -0.0980355f, -0.30054f, -0.0340618f, -0.315325f,
+-0.0454255f, 0.0550435f, -0.117775f, 0.745518f, -0.111094f,
+-4.59615f, -2.11679f, 0.377472f, -0.398044f, 0.100191f,
+0.987546f, -0.105993f, 0.211353f, 0.0199354f, -0.110528f,
+0.0587012f, -0.146575f, 0.0240706f, 0.0808704f, 0.0662973f,
+-0.133194f, 0.063903f, -0.0663394f, 0.0840598f, -0.157854f,
+-0.0208042f, -0.0415707f, 0.0616584f, 0.134096f, 0.0971693f,
+0.0607324f, 2.18513f, 0.424779f, 1.18391f, 0.800226f,
+4.50274f, -1.97694f, -0.0996252f, 0.00959504f, -0.0687779f,
+0.0108385f, 0.0209571f, -0.0232143f, 0.0324491f, -0.0284241f,
+0.0349509f, 0.00658307f, 0.0670332f, 0.0135798f, -0.0242892f,
+0.114423f, 0.0906438f, -0.00625305f, 0.0991216f, 0.0018775f,
+-0.0631878f, 0.00596324f, 1.98581f, -0.35355f, 0.000361734f,
+1.20065f, 0.845175f, -0.812598f, -0.0566753f, 0.2034f,
+0.108179f, -0.0565119f, 0.0033704f, -0.138371f, -0.0822029f,
+-0.0330501f, -0.0454151f, -0.0938773f, -0.0865522f, 0.296783f,
+0.51366f, -0.138525f, 0.138197f, -0.280676f, 0.00719433f,
+0.368148f, 0.78515f, -0.0206407f, 0.501422f, 4.04115f,
+3.22981f, 1.79516f, 18.177f, -0.764188f, -0.0305839f,
+-0.0102731f, -0.0188846f, -0.0703807f, 0.0311089f, 0.000158915f,
+-0.0778763f, 0.0275067f, -0.00954754f, 0.0199833f, 0.0500232f,
+0.0661436f, 0.469303f, 0.50695f, -0.218825f, 0.00533274f,
+-0.512075f, -1.9125f, 0.639009f, -0.148785f, 1.4833f,
+0.852433f, 1.34851f, -0.734623f, 0.210932f, -0.789634f,
+-0.0114672f, 0.150499f, 0.0381371f, 0.0691792f, -8.753e-06f,
+0.0263655f, 0.0247899f, 0.0172167f, 0.00014426f, -0.00153884f,
+0.0203275f, -0.0194041f, -0.0110974f, 0.108975f, -0.0697057f,
+0.0964956f, -0.10174f, -0.0626501f, 0.0680276f, 0.0215191f,
+-1.26794f, 3.33681f, 0.366054f, -3.0353f, -1.45105f,
+-1.05225f, 0.0302715f, 0.0330566f, 0.0275157f, 0.0161609f,
+-0.10335f, -0.144689f, -0.052541f, -0.024565f, -0.169548f,
+-0.215189f, -0.0575187f, 0.00427752f, -0.0549234f, 0.103446f,
+-0.0833558f, 0.0133471f, -0.0620358f, -0.134324f, -0.291327f,
+-0.043899f, 0.135107f, -0.0220095f, 0.302584f, -1.94763f,
+-0.32957f,
/* output layer */
-8.55144, 2.0822, 0.240592, 1.26638, 0.0309585,
--1.09841, 0.861549, -1.53704, 1.07356, 4.39194,
--2.60476, 0.375094, 0.122941, 0.00326393, 0.777163,
--2.03171, -0.944556, 4.02958, -0.260741, 0.556385,
--0.220568, -1.77121, -0.858706, -1.52023, -0.784162,
-0.345948, -0.0488489, -0.323381, -0.752573, 0.517346,
-0.876475, -1.44056, -0.382276, -1.55409, };
+-0.138449, 23.1555, 2.02078, 4.63684, 1.63151,
+-2.36045, 1.80583, 0.923012, -1.55375, 1.01092,
+-0.215321, -0.642902, 2.25817, -0.491944, 2.25117,
+-2.19328, -1.98994, 5.62795, 0.957799, 0.472934,
+3.45822, 0.248562, -0.117404, -0.274879, 0.683224,
+0.199083, -1.49602, -2.40808, -2.27289, -1.6856,
+0.325958, 1.21798, 3.14007, 1.50236, };
static const int topo[3] = {25, 16, 2};
diff --git a/src/mlp_train.c b/src/mlp_train.c
index dfb88c67..b175bb0d 100644
--- a/src/mlp_train.c
+++ b/src/mlp_train.c
@@ -138,13 +138,16 @@ double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamp
for (s=0;s<nbSamples;s++)
{
float *in, *out;
+ float inp[inDim];
in = inputs+s*inDim;
out = outputs + s*outDim;
+ for (j=0;j<inDim;j++)
+ inp[j] = in[j];
for (i=0;i<hiddenDim;i++)
{
double sum = W0[i*(inDim+1)];
for (j=0;j<inDim;j++)
- sum += W0[i*(inDim+1)+j+1]*in[j];
+ sum += W0[i*(inDim+1)+j+1]*inp[j];
hidden[i] = tansig_approx(sum);
}
for (i=0;i<outDim;i++)
@@ -156,14 +159,14 @@ double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamp
error[i] = out[i] - netOut[i];
if (out[i] == 0) error[i] *= .0;
error_rate[i] += fabs(error[i])>1;
- if (i==0) error[i] *= 3;
+ if (i==0) error[i] *= 5;
rms += error[i]*error[i];
/*error[i] = error[i]/(1+fabs(error[i]));*/
}
/* Back-propagate error */
for (i=0;i<outDim;i++)
{
- float grad = 1-netOut[i]*netOut[i];
+ double grad = 1-netOut[i]*netOut[i];
W1_grad[i*(hiddenDim+1)] += error[i]*grad;
for (j=0;j<hiddenDim;j++)
W1_grad[i*(hiddenDim+1)+j+1] += grad*error[i]*hidden[j];
@@ -177,7 +180,7 @@ double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamp
grad *= 1-hidden[i]*hidden[i];
W0_grad[i*(inDim+1)] += grad;
for (j=0;j<inDim;j++)
- W0_grad[i*(inDim+1)+j+1] += grad*in[j];
+ W0_grad[i*(inDim+1)+j+1] += grad*inp[j];
}
}
return rms;
@@ -476,6 +479,9 @@ int main(int argc, char **argv)
fprintf (stderr, "Got %d samples\n", nbSamples);
net = mlp_init(topo, 3, inputs, outputs, nbSamples);
rms = mlp_train_backprop(net, inputs, outputs, nbSamples, nbEpoch, 1);
+ printf ("#ifdef HAVE_CONFIG_H\n");
+ printf ("#include \"config.h\"\n");
+ printf ("#endif\n\n");
printf ("#include \"mlp.h\"\n\n");
printf ("/* RMS error was %f, seed was %u */\n\n", rms, seed);
printf ("static const float weights[%d] = {\n", (topo[0]+1)*topo[1] + (topo[1]+1)*topo[2]);
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index bb94ec1a..ad776c48 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -577,25 +577,81 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m
#else
#define PCM2VAL(x) SCALEIN(x)
#endif
-void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+
+static opus_val32 silk_resampler_down2_hp(
+ opus_val32 *S, /* I/O State vector [ 2 ] */
+ opus_val32 *out, /* O Output signal [ floor(len/2) ] */
+ const opus_val32 *in, /* I Input signal [ len ] */
+ int inLen /* I Number of input samples */
+)
+{
+ int k, len2 = inLen/2;
+ opus_val32 in32, out32, out32_hp, Y, X;
+ opus_val64 hp_ener = 0;
+ /* Internal variables and state are in Q10 format */
+ for( k = 0; k < len2; k++ ) {
+ /* Convert to Q10 */
+ in32 = in[ 2 * k ];
+
+ /* All-pass section for even input sample */
+ Y = SUB32( in32, S[ 0 ] );
+ X = MULT16_32_Q15(QCONST16(0.6074371f, 15), Y);
+ out32 = ADD32( S[ 0 ], X );
+ S[ 0 ] = ADD32( in32, X );
+ out32_hp = out32;
+ /* Convert to Q10 */
+ in32 = in[ 2 * k + 1 ];
+
+ /* All-pass section for odd input sample, and add to output of previous section */
+ Y = SUB32( in32, S[ 1 ] );
+ X = MULT16_32_Q15(QCONST16(0.15063f, 15), Y);
+ out32 = ADD32( out32, S[ 1 ] );
+ out32 = ADD32( out32, X );
+ S[ 1 ] = ADD32( in32, X );
+
+ Y = SUB32( -in32, S[ 2 ] );
+ X = MULT16_32_Q15(QCONST16(0.15063f, 15), Y);
+ out32_hp = ADD32( out32_hp, S[ 2 ] );
+ out32_hp = ADD32( out32_hp, X );
+ S[ 2 ] = ADD32( -in32, X );
+
+ hp_ener += out32_hp*(opus_val64)out32_hp;
+ /* Add, convert back to int16 and store to output */
+ out[ k ] = HALF32(out32);
+ }
+#ifdef FIXED_POINT
+ /* len2 can be up to 480, so we shift by 8 more to make it fit. */
+ hp_ener = hp_ener >> (2*SIG_SHIFT + 8);
+#endif
+ return hp_ener;
+}
+
+opus_val32 downmix_float(const void *_x, opus_val32 *sub, opus_val32 S[3], int subframe, int offset, int c1, int c2, int C)
{
+ VARDECL(opus_val32, tmp);
const float *x;
opus_val32 scale;
int j;
+
+ if (subframe==0) return 0;
+ subframe *= 2;
+ offset *= 2;
+ ALLOC(tmp, subframe, opus_val32);
+
x = (const float *)_x;
for (j=0;j<subframe;j++)
- sub[j] = PCM2VAL(x[(j+offset)*C+c1]);
+ tmp[j] = PCM2VAL(x[(j+offset)*C+c1]);
if (c2>-1)
{
for (j=0;j<subframe;j++)
- sub[j] += PCM2VAL(x[(j+offset)*C+c2]);
+ tmp[j] += PCM2VAL(x[(j+offset)*C+c2]);
} else if (c2==-2)
{
int c;
for (c=1;c<C;c++)
{
for (j=0;j<subframe;j++)
- sub[j] += PCM2VAL(x[(j+offset)*C+c]);
+ tmp[j] += PCM2VAL(x[(j+offset)*C+c]);
}
}
#ifdef FIXED_POINT
@@ -608,29 +664,38 @@ void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, in
else if (c2>-1)
scale /= 2;
for (j=0;j<subframe;j++)
- sub[j] *= scale;
+ tmp[j] *= scale;
+ return silk_resampler_down2_hp(S, sub, tmp, subframe);
}
#endif
-void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
+opus_val32 downmix_int(const void *_x, opus_val32 *sub, opus_val32 S[3], int subframe, int offset, int c1, int c2, int C)
{
+ VARDECL(opus_val32, tmp);
const opus_int16 *x;
opus_val32 scale;
int j;
+ ALLOC_STACK;
+
+ if (subframe==0) return 0;
+ subframe *= 2;
+ offset *= 2;
+ ALLOC(tmp, subframe, opus_val32);
+
x = (const opus_int16 *)_x;
for (j=0;j<subframe;j++)
- sub[j] = x[(j+offset)*C+c1];
+ tmp[j] = x[(j+offset)*C+c1];
if (c2>-1)
{
for (j=0;j<subframe;j++)
- sub[j] += x[(j+offset)*C+c2];
+ tmp[j] += x[(j+offset)*C+c2];
} else if (c2==-2)
{
int c;
for (c=1;c<C;c++)
{
for (j=0;j<subframe;j++)
- sub[j] += x[(j+offset)*C+c];
+ tmp[j] += x[(j+offset)*C+c];
}
}
#ifdef FIXED_POINT
@@ -643,7 +708,8 @@ void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int
else if (c2>-1)
scale /= 2;
for (j=0;j<subframe;j++)
- sub[j] *= scale;
+ tmp[j] *= scale;
+ return silk_resampler_down2_hp(S, sub, tmp, subframe);
}
opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
@@ -866,7 +932,9 @@ static int is_digital_silence(const opus_val16* pcm, int frame_size, int channel
{
int silence = 0;
opus_val32 sample_max = 0;
-
+#ifdef MLP_TRAINING
+ return 0;
+#endif
sample_max = celt_maxabs16(pcm, frame_size*channels);
#ifdef FIXED_POINT
diff --git a/src/opus_private.h b/src/opus_private.h
index a731cc55..25104de1 100644
--- a/src/opus_private.h
+++ b/src/opus_private.h
@@ -84,9 +84,9 @@ int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev);
#define OPUS_SET_FORCE_MODE_REQUEST 11002
#define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x)
-typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int);
-void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
-void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C);
+typedef opus_val32 (*downmix_func)(const void *, opus_val32 *, opus_val32[3], int, int, int, int, int);
+opus_val32 downmix_float(const void *_x, opus_val32 *sub, opus_val32 S[3], int subframe, int offset, int c1, int c2, int C);
+opus_val32 downmix_int(const void *_x, opus_val32 *sub, opus_val32 S[3], int subframe, int offset, int c1, int c2, int C);
int encode_size(int size, unsigned char *data);