6 files changed, 99 insertions, 39 deletions
diff --git a/libavcodec/alacenc.c b/libavcodec/alacenc.c
index 5706ae9f4d..d9ff2b8e0a 100644
--- a/libavcodec/alacenc.c
+++ b/libavcodec/alacenc.c
@@ -378,6 +378,7 @@ static void write_compressed_frame(AlacEncodeContext *s)
 static av_cold int alac_encode_init(AVCodecContext *avctx)
 {
     AlacEncodeContext *s    = avctx->priv_data;
+    int ret;
     uint8_t *alac_extradata = av_mallocz(ALAC_EXTRADATA_SIZE+1);
 
     avctx->frame_size      = DEFAULT_FRAME_SIZE;
@@ -455,9 +456,10 @@ static av_cold int alac_encode_init(AVCodecContext *avctx)
     avctx->coded_frame->key_frame = 1;
 
     s->avctx = avctx;
-    ff_lpc_init(&s->lpc_ctx);
+    ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size, s->max_prediction_order,
+                      AV_LPC_TYPE_LEVINSON);
 
-    return 0;
+    return ret;
 }
 
 static int alac_encode_frame(AVCodecContext *avctx, uint8_t *frame,
@@ -513,6 +515,8 @@ verbatim:
 
 static av_cold int alac_encode_close(AVCodecContext *avctx)
 {
+    AlacEncodeContext *s = avctx->priv_data;
+    ff_lpc_end(&s->lpc_ctx);
     av_freep(&avctx->extradata);
     avctx->extradata_size = 0;
     av_freep(&avctx->coded_frame);
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index d6f0f87376..2bfe56e8eb 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -211,13 +211,11 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
     int freq = avctx->sample_rate;
     int channels = avctx->channels;
     FlacEncodeContext *s = avctx->priv_data;
-    int i, level;
+    int i, level, ret;
     uint8_t *streaminfo;
 
     s->avctx = avctx;
 
-    ff_lpc_init(&s->lpc_ctx);
-
     if (avctx->sample_fmt != AV_SAMPLE_FMT_S16)
         return -1;
 
@@ -438,9 +436,12 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
     if (!avctx->coded_frame)
         return AVERROR(ENOMEM);
 
+    ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
+                      s->options.max_prediction_order, AV_LPC_TYPE_LEVINSON);
+
     dprint_compression_options(s);
 
-    return 0;
+    return ret;
 }
 
 
@@ -1316,6 +1317,7 @@ static av_cold int flac_encode_close(AVCodecContext *avctx)
     if (avctx->priv_data) {
         FlacEncodeContext *s = avctx->priv_data;
         av_freep(&s->md5ctx);
+        ff_lpc_end(&s->lpc_ctx);
     }
     av_freep(&avctx->extradata);
     avctx->extradata_size = 0;
diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c
index 3a93c9f673..fd51491716 100644
--- a/libavcodec/lpc.c
+++ b/libavcodec/lpc.c
@@ -28,7 +28,7 @@
 /**
  * Apply Welch window function to audio block
  */
-static void apply_welch_window(const int32_t *data, int len, double *w_data)
+static void apply_welch_window_c(const int32_t *data, int len, double *w_data)
 {
     int i, n2;
     double w;
@@ -54,24 +54,16 @@ static void apply_welch_window(const int32_t *data, int len, double *w_data)
  * Calculate autocorrelation data from audio samples
  * A Welch window function is applied before calculation.
  */
-static void lpc_compute_autocorr_c(const int32_t *data, int len, int lag,
+static void lpc_compute_autocorr_c(const double *data, int len, int lag,
                              double *autoc)
 {
     int i, j;
-    double tmp[len + lag + 1];
-    double *data1= tmp + lag;
-
-    apply_welch_window(data, len, data1);
-
-    for(j=0; j<lag; j++)
-        data1[j-lag]= 0.0;
-    data1[len] = 0.0;
 
     for(j=0; j<lag; j+=2){
         double sum0 = 1.0, sum1 = 1.0;
         for(i=j; i<len; i++){
-            sum0 += data1[i] * data1[i-j];
-            sum1 += data1[i] * data1[i-j-1];
+            sum0 += data[i] * data[i-j];
+            sum1 += data[i] * data[i-j-1];
         }
         autoc[j  ] = sum0;
         autoc[j+1] = sum1;
@@ -80,8 +72,8 @@ static void lpc_compute_autocorr_c(const int32_t *data, int len, int lag,
     if(j==lag){
         double sum = 1.0;
         for(i=j-1; i<len; i+=2){
-            sum += data1[i  ] * data1[i-j  ]
-                 + data1[i+1] * data1[i-j+1];
+            sum += data[i  ] * data[i-j  ]
+                 + data[i+1] * data[i-j+1];
         }
         autoc[j] = sum;
     }
@@ -177,8 +169,19 @@ int ff_lpc_calc_coefs(LPCContext *s,
     assert(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER &&
            lpc_type > AV_LPC_TYPE_FIXED);
 
+    /* reinit LPC context if parameters have changed */
+    if (blocksize != s->blocksize || max_order != s->max_order ||
+        lpc_type  != s->lpc_type) {
+        ff_lpc_end(s);
+        ff_lpc_init(s, blocksize, max_order, lpc_type);
+    }
+
     if (lpc_type == AV_LPC_TYPE_LEVINSON) {
-        s->lpc_compute_autocorr(samples, blocksize, max_order, autoc);
+        double *windowed_samples = s->windowed_samples + max_order;
+
+        s->lpc_apply_welch_window(samples, blocksize, windowed_samples);
+
+        s->lpc_compute_autocorr(windowed_samples, blocksize, max_order, autoc);
 
         compute_lpc_coefs(autoc, max_order, &lpc[0][0], MAX_LPC_ORDER, 0, 1);
 
@@ -236,10 +239,32 @@ int ff_lpc_calc_coefs(LPCContext *s,
     return opt_order;
 }
 
-av_cold void ff_lpc_init(LPCContext *s)
+av_cold int ff_lpc_init(LPCContext *s, int blocksize, int max_order,
+                        enum AVLPCType lpc_type)
 {
+    s->blocksize = blocksize;
+    s->max_order = max_order;
+    s->lpc_type  = lpc_type;
+
+    if (lpc_type == AV_LPC_TYPE_LEVINSON) {
+        s->windowed_samples = av_mallocz((blocksize + max_order + 2) *
+                                         sizeof(*s->windowed_samples));
+        if (!s->windowed_samples)
+            return AVERROR(ENOMEM);
+    } else {
+        s->windowed_samples = NULL;
+    }
+
+    s->lpc_apply_welch_window = apply_welch_window_c;
     s->lpc_compute_autocorr = lpc_compute_autocorr_c;
 
     if (HAVE_MMX)
         ff_lpc_init_x86(s);
+
+    return 0;
+}
+
+av_cold void ff_lpc_end(LPCContext *s)
+{
+    av_freep(&s->windowed_samples);
 }
diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h
index a307793374..96b66df909 100644
--- a/libavcodec/lpc.h
+++ b/libavcodec/lpc.h
@@ -37,16 +37,35 @@
 
 
 typedef struct LPCContext {
+    int blocksize;
+    int max_order;
+    enum AVLPCType lpc_type;
+    double *windowed_samples;
+
+    /**
+     * Apply a Welch window to an array of input samples.
+     * The output samples have the same scale as the input, but are in double
+     * sample format.
+     * @param data    input samples
+     * @param len     number of input samples
+     * @param w_data  output samples
+     */
+    void (*lpc_apply_welch_window)(const int32_t *data, int len,
+                                   double *w_data);
     /**
      * Perform autocorrelation on input samples with delay of 0 to lag.
      * @param data  input samples.
-     *              no alignment needed.
+     *              constraints: no alignment needed, but must have have at
+     *              least lag*sizeof(double) valid bytes preceeding it, and
+     *              size must be at least (len+1)*sizeof(double) if data is
+     *              16-byte aligned or (len+2)*sizeof(double) if data is
+     *              unaligned.
      * @param len   number of input samples to process
      * @param lag   maximum delay to calculate
      * @param autoc output autocorrelation coefficients.
      *              constraints: array size must be at least lag+1.
      */
-    void (*lpc_compute_autocorr)(const int32_t *data, int len, int lag,
+    void (*lpc_compute_autocorr)(const double *data, int len, int lag,
                                  double *autoc);
 } LPCContext;
 
@@ -64,9 +83,15 @@ int ff_lpc_calc_coefs(LPCContext *s,
 /**
  * Initialize LPCContext.
  */
-void ff_lpc_init(LPCContext *s);
+int ff_lpc_init(LPCContext *s, int blocksize, int max_order,
+                enum AVLPCType lpc_type);
 void ff_lpc_init_x86(LPCContext *s);
 
+/**
+ * Uninitialize LPCContext.
+ */
+void ff_lpc_end(LPCContext *s);
+
 #ifdef LPC_USE_DOUBLE
 #define LPC_TYPE double
 #else
diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c
index 3f8694eb8f..2c0a6b1fbd 100644
--- a/libavcodec/ra144enc.c
+++ b/libavcodec/ra144enc.c
@@ -36,6 +36,7 @@
 static av_cold int ra144_encode_init(AVCodecContext * avctx)
 {
     RA144Context *ractx;
+    int ret;
 
     if (avctx->sample_fmt != AV_SAMPLE_FMT_S16) {
         av_log(avctx, AV_LOG_ERROR, "invalid sample format\n");
@@ -52,7 +53,16 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
     ractx->lpc_coef[0] = ractx->lpc_tables[0];
     ractx->lpc_coef[1] = ractx->lpc_tables[1];
     ractx->avctx = avctx;
-    ff_lpc_init(&ractx->lpc_ctx);
+    ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER,
+                      AV_LPC_TYPE_LEVINSON);
+    return ret;
+}
+
+
+static av_cold int ra144_encode_close(AVCodecContext *avctx)
+{
+    RA144Context *ractx = avctx->priv_data;
+    ff_lpc_end(&ractx->lpc_ctx);
     return 0;
 }
 
@@ -506,5 +516,6 @@ AVCodec ra_144_encoder =
     sizeof(RA144Context),
     ra144_encode_init,
     ra144_encode_frame,
+    ra144_encode_close,
     .long_name = NULL_IF_CONFIG_SMALL("RealAudio 1.0 (14.4K) encoder"),
 };
diff --git a/libavcodec/x86/lpc_mmx.c b/libavcodec/x86/lpc_mmx.c
index 19aad9860f..3a0a1f0f6f 100644
--- a/libavcodec/x86/lpc_mmx.c
+++ b/libavcodec/x86/lpc_mmx.c
@@ -69,21 +69,13 @@ static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data
 #undef WELCH
 }
 
-static void lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
+static void lpc_compute_autocorr_sse2(const double *data, int len, int lag,
                                    double *autoc)
 {
-    double tmp[len + lag + 2];
-    double *data1 = tmp + lag;
     int j;
 
-    if((x86_reg)data1 & 15)
-        data1++;
-
-    apply_welch_window_sse2(data, len, data1);
-
-    for(j=0; j<lag; j++)
-        data1[j-lag]= 0.0;
-    data1[len] = 0.0;
+    if((x86_reg)data & 15)
+        data++;
 
     for(j=0; j<lag; j+=2){
         x86_reg i = -len*sizeof(double);
@@ -114,7 +106,7 @@ static void lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
                 "movsd     %%xmm1,  8(%1)           \n\t"
                 "movsd     %%xmm2, 16(%1)           \n\t"
                 :"+&r"(i)
-                :"r"(autoc+j), "r"(data1+len), "r"(data1+len-j)
+                :"r"(autoc+j), "r"(data+len), "r"(data+len-j)
                 :"memory"
             );
         } else {
@@ -137,7 +129,7 @@ static void lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
                 "movsd     %%xmm0, %1               \n\t"
                 "movsd     %%xmm1, %2               \n\t"
                 :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
-                :"r"(data1+len), "r"(data1+len-j)
+                :"r"(data+len), "r"(data+len-j)
             );
         }
     }
@@ -148,6 +140,7 @@ av_cold void ff_lpc_init_x86(LPCContext *c)
     int mm_flags = av_get_cpu_flags();
 
     if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) {
+        c->lpc_apply_welch_window = apply_welch_window_sse2;
         c->lpc_compute_autocorr = lpc_compute_autocorr_sse2;
     }
 }