dcadec: remove scaling in lfe_interpolation_fir

The scaling factor is constant so it is faster to scale the FIR coefficients in the tables during compilation. Signed-off-by: Janne Grunau <janne-libav@jannau.net>
author: Christophe Gisquet <christophe.gisquet@gmail.com> 2014-02-14 15:03:06 +0000
committer: Janne Grunau <janne-libav@jannau.net> 2014-02-28 13:00:47 +0100
commit: 87ec849fe9acba075c843e67bcd01f256f481a18 (patch)
tree: e071371104bda0f397ec2b647f90ae8d5a898bc1 /libavcodec/arm
parent: a55546f48d55e3d1155840541b2be5f4f8cf18ab (diff)
download: ffmpeg-87ec849fe9acba075c843e67bcd01f256f481a18.tar.gz
3 files changed, 5 insertions, 21 deletions
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c
index 2ea12895de..d91c787d79 100644
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/dcadsp_init_arm.c
@@ -24,15 +24,11 @@
 #include "libavutil/attributes.h"
 #include "libavcodec/dcadsp.h"
 
-void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs,
-                          float scale);
-void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs,
-                          float scale);
+void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs);
+void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs);
 
-void ff_dca_lfe_fir32_vfp(float *out, const float *in, const float *coefs,
-                          float scale);
-void ff_dca_lfe_fir64_vfp(float *out, const float *in, const float *coefs,
-                          float scale);
+void ff_dca_lfe_fir32_vfp(float *out, const float *in, const float *coefs);
+void ff_dca_lfe_fir64_vfp(float *out, const float *in, const float *coefs);
 
 void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
                                 SynthFilterContext *synth, FFTContext *imdct,
diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S
index c798fea7f7..735c4c28e5 100644
--- a/libavcodec/arm/dcadsp_neon.S
+++ b/libavcodec/arm/dcadsp_neon.S
@@ -22,7 +22,6 @@
 
 function ff_dca_lfe_fir0_neon, export=1
         push            {r4-r6,lr}
-NOVFP   vmov            s0,  r3                 @ scale
         mov             r3,  #32                @ decifactor
         mov             r6,  #256/32
         b               dca_lfe_fir
@@ -30,7 +29,6 @@ endfunc
 
 function ff_dca_lfe_fir1_neon, export=1
         push            {r4-r6,lr}
-NOVFP   vmov            s0,  r3                 @ scale
         mov             r3,  #64                @ decifactor
         mov             r6,  #256/64
 dca_lfe_fir:
@@ -57,8 +55,7 @@ dca_lfe_fir:
         subs            r3,  r3,  #1
         vadd.f32        d4,  d4,  d5
         vadd.f32        d6,  d6,  d7
-        vpadd.f32       d4,  d4,  d6
-        vmul.f32        d5,  d4,  d0[0]
+        vpadd.f32       d5,  d4,  d6
         vst1.32         {d5[0]},  [r0,:32]!
         vst1.32         {d5[1]},  [r4,:32]!
         bne             1b
diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S
index edabc29e23..c9114d499a 100644
--- a/libavcodec/arm/dcadsp_vfp.S
+++ b/libavcodec/arm/dcadsp_vfp.S
@@ -27,8 +27,6 @@ PCOEF         .req    a3
 OLDFPSCR      .req    a4
 COUNTER       .req    ip
 
-SCALE32       .req    s28  @ use vector of 4 in place of 9th scalar when decifactor=32 / JMAX=8
-SCALE64       .req    s0   @ spare register in scalar bank when decifactor=64 / JMAX=4
 IN0           .req    s4
 IN1           .req    s5
 IN2           .req    s6
@@ -76,9 +74,6 @@ POST3         .req    s27
         vldr    COEF5, [PCOEF, #X + (1*JMAX + 1) * Y]
         vldr    COEF6, [PCOEF, #X + (2*JMAX + 1) * Y]
  .endif
- .ifnc "\tail",""
-        vmul.f  POST0, POST0, SCALE\decifactor  @ vector operation (SCALE may be scalar)
- .endif
  .ifnc "\head",""
         vldr    COEF7, [PCOEF, #X + (3*JMAX + 1) * Y]
    .ifc "\tail",""
@@ -129,7 +124,6 @@ POST3         .req    s27
 
 .macro dca_lfe_fir  decifactor
 function ff_dca_lfe_fir\decifactor\()_vfp, export=1
-NOVFP   vmov    s0, r3
         fmrx    OLDFPSCR, FPSCR
         ldr     ip, =0x03030000         @ RunFast mode, short vectors of length 4, stride 1
         fmxr    FPSCR, ip
@@ -140,7 +134,6 @@ NOVFP   vmov    s0, r3
  .if \decifactor == 32
   .set JMAX, 8
         vpush   {s16-s31}
-        vmov    SCALE32, s0             @ duplicate scalar across vector
         vldr    IN4, [PIN, #-4*4]
         vldr    IN5, [PIN, #-5*4]
         vldr    IN6, [PIN, #-6*4]
@@ -186,8 +179,6 @@ endfunc
         .unreq  OLDFPSCR
         .unreq  COUNTER
 
-        .unreq  SCALE32
-        .unreq  SCALE64
         .unreq  IN0
         .unreq  IN1
         .unreq  IN2
author	Christophe Gisquet <christophe.gisquet@gmail.com>	2014-02-14 15:03:06 +0000
committer	Janne Grunau <janne-libav@jannau.net>	2014-02-28 13:00:47 +0100
commit	87ec849fe9acba075c843e67bcd01f256f481a18 (patch)
tree	e071371104bda0f397ec2b647f90ae8d5a898bc1 /libavcodec/arm
parent	a55546f48d55e3d1155840541b2be5f4f8cf18ab (diff)
download	ffmpeg-87ec849fe9acba075c843e67bcd01f256f481a18.tar.gz