diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2014-02-14 15:03:06 +0000 |
---|---|---|
committer | Janne Grunau <janne-libav@jannau.net> | 2014-02-28 13:00:47 +0100 |
commit | 87ec849fe9acba075c843e67bcd01f256f481a18 (patch) | |
tree | e071371104bda0f397ec2b647f90ae8d5a898bc1 /libavcodec/arm | |
parent | a55546f48d55e3d1155840541b2be5f4f8cf18ab (diff) | |
download | ffmpeg-87ec849fe9acba075c843e67bcd01f256f481a18.tar.gz |
dcadec: remove scaling in lfe_interpolation_fir
The scaling factor is constant so it is faster to scale the
FIR coefficients in the tables during compilation.
Signed-off-by: Janne Grunau <janne-libav@jannau.net>
Diffstat (limited to 'libavcodec/arm')
-rw-r--r-- | libavcodec/arm/dcadsp_init_arm.c | 12 | ||||
-rw-r--r-- | libavcodec/arm/dcadsp_neon.S | 5 | ||||
-rw-r--r-- | libavcodec/arm/dcadsp_vfp.S | 9 |
3 files changed, 5 insertions, 21 deletions
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c index 2ea12895de..d91c787d79 100644 --- a/libavcodec/arm/dcadsp_init_arm.c +++ b/libavcodec/arm/dcadsp_init_arm.c @@ -24,15 +24,11 @@ #include "libavutil/attributes.h" #include "libavcodec/dcadsp.h" -void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs, - float scale); -void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs, - float scale); +void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs); +void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs); -void ff_dca_lfe_fir32_vfp(float *out, const float *in, const float *coefs, - float scale); -void ff_dca_lfe_fir64_vfp(float *out, const float *in, const float *coefs, - float scale); +void ff_dca_lfe_fir32_vfp(float *out, const float *in, const float *coefs); +void ff_dca_lfe_fir64_vfp(float *out, const float *in, const float *coefs); void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act, SynthFilterContext *synth, FFTContext *imdct, diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S index c798fea7f7..735c4c28e5 100644 --- a/libavcodec/arm/dcadsp_neon.S +++ b/libavcodec/arm/dcadsp_neon.S @@ -22,7 +22,6 @@ function ff_dca_lfe_fir0_neon, export=1 push {r4-r6,lr} -NOVFP vmov s0, r3 @ scale mov r3, #32 @ decifactor mov r6, #256/32 b dca_lfe_fir @@ -30,7 +29,6 @@ endfunc function ff_dca_lfe_fir1_neon, export=1 push {r4-r6,lr} -NOVFP vmov s0, r3 @ scale mov r3, #64 @ decifactor mov r6, #256/64 dca_lfe_fir: @@ -57,8 +55,7 @@ dca_lfe_fir: subs r3, r3, #1 vadd.f32 d4, d4, d5 vadd.f32 d6, d6, d7 - vpadd.f32 d4, d4, d6 - vmul.f32 d5, d4, d0[0] + vpadd.f32 d5, d4, d6 vst1.32 {d5[0]}, [r0,:32]! vst1.32 {d5[1]}, [r4,:32]! bne 1b diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S index edabc29e23..c9114d499a 100644 --- a/libavcodec/arm/dcadsp_vfp.S +++ b/libavcodec/arm/dcadsp_vfp.S @@ -27,8 +27,6 @@ PCOEF .req a3 OLDFPSCR .req a4 COUNTER .req ip -SCALE32 .req s28 @ use vector of 4 in place of 9th scalar when decifactor=32 / JMAX=8 -SCALE64 .req s0 @ spare register in scalar bank when decifactor=64 / JMAX=4 IN0 .req s4 IN1 .req s5 IN2 .req s6 @@ -76,9 +74,6 @@ POST3 .req s27 vldr COEF5, [PCOEF, #X + (1*JMAX + 1) * Y] vldr COEF6, [PCOEF, #X + (2*JMAX + 1) * Y] .endif - .ifnc "\tail","" - vmul.f POST0, POST0, SCALE\decifactor @ vector operation (SCALE may be scalar) - .endif .ifnc "\head","" vldr COEF7, [PCOEF, #X + (3*JMAX + 1) * Y] .ifc "\tail","" @@ -129,7 +124,6 @@ POST3 .req s27 .macro dca_lfe_fir decifactor function ff_dca_lfe_fir\decifactor\()_vfp, export=1 -NOVFP vmov s0, r3 fmrx OLDFPSCR, FPSCR ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 fmxr FPSCR, ip @@ -140,7 +134,6 @@ NOVFP vmov s0, r3 .if \decifactor == 32 .set JMAX, 8 vpush {s16-s31} - vmov SCALE32, s0 @ duplicate scalar across vector vldr IN4, [PIN, #-4*4] vldr IN5, [PIN, #-5*4] vldr IN6, [PIN, #-6*4] @@ -186,8 +179,6 @@ endfunc .unreq OLDFPSCR .unreq COUNTER - .unreq SCALE32 - .unreq SCALE64 .unreq IN0 .unreq IN1 .unreq IN2 |