diff options
author | foo86 <foobaz86@gmail.com> | 2016-01-16 11:07:08 +0300 |
---|---|---|
committer | Hendrik Leppkes <h.leppkes@gmail.com> | 2016-01-31 17:09:38 +0100 |
commit | 46089967722f74e794865a044f5f682f26628802 (patch) | |
tree | b4ca91d42d3eb0da3229d217323565738c101f87 /libavcodec/arm | |
parent | b552f3afa2a76142c9aa87a89e31e75423b4cd3b (diff) | |
download | ffmpeg-46089967722f74e794865a044f5f682f26628802.tar.gz |
avcodec/dca: remove old decoder
Remove all files and functions which are not going to be reused,
and disable all functions and FATE tests temporarily which will be.
Diffstat (limited to 'libavcodec/arm')
-rw-r--r-- | libavcodec/arm/Makefile | 9 | ||||
-rw-r--r-- | libavcodec/arm/dca.h | 1 | ||||
-rw-r--r-- | libavcodec/arm/dcadsp_init_arm.c | 53 | ||||
-rw-r--r-- | libavcodec/arm/dcadsp_neon.S | 64 | ||||
-rw-r--r-- | libavcodec/arm/dcadsp_vfp.S | 476 |
5 files changed, 3 insertions, 600 deletions
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index 6a29a5fbb7..b2f5a5aec5 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -36,8 +36,7 @@ OBJS-$(CONFIG_VP8DSP) += arm/vp8dsp_init_arm.o # decoders/encoders OBJS-$(CONFIG_AAC_DECODER) += arm/aacpsdsp_init_arm.o \ arm/sbrdsp_init_arm.o -OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \ - arm/synth_filter_init_arm.o +#OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_init_arm.o OBJS-$(CONFIG_HEVC_DECODER) += arm/hevcdsp_init_arm.o OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_init_arm.o OBJS-$(CONFIG_RV40_DECODER) += arm/rv40dsp_init_arm.o @@ -88,8 +87,7 @@ VFP-OBJS-$(CONFIG_FMTCONVERT) += arm/fmtconvert_vfp.o VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o # decoders/encoders -VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ - arm/synth_filter_vfp.o +#VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_vfp.o # NEON optimizations @@ -128,8 +126,7 @@ NEON-OBJS-$(CONFIG_VP8DSP) += arm/vp8dsp_init_neon.o \ NEON-OBJS-$(CONFIG_AAC_DECODER) += arm/aacpsdsp_neon.o \ arm/sbrdsp_neon.o NEON-OBJS-$(CONFIG_LLAUDDSP) += arm/lossless_audiodsp_neon.o -NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_neon.o \ - arm/synth_filter_neon.o +#NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_neon.o NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevcdsp_init_neon.o \ arm/hevcdsp_deblock_neon.o \ arm/hevcdsp_idct_neon.o \ diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h index 6e87111a32..ae4b730a8a 100644 --- a/libavcodec/arm/dca.h +++ b/libavcodec/arm/dca.h @@ -24,7 +24,6 @@ #include <stdint.h> #include "config.h" -#include "libavcodec/dcadsp.h" #include "libavcodec/mathops.h" #if HAVE_ARMV6_INLINE && AV_GCC_VERSION_AT_LEAST(4,4) && !CONFIG_THUMB diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c deleted file mode 100644 index febb4445d2..0000000000 --- a/libavcodec/arm/dcadsp_init_arm.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "config.h" - -#include "libavutil/arm/cpu.h" -#include "libavutil/attributes.h" -#include "libavcodec/dcadsp.h" - -void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs); -void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs); - -void ff_dca_lfe_fir32_vfp(float *out, const float *in, const float *coefs); -void ff_dca_lfe_fir64_vfp(float *out, const float *in, const float *coefs); - -void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act, - SynthFilterContext *synth, FFTContext *imdct, - float synth_buf_ptr[512], - int *synth_buf_offset, float synth_buf2[32], - const float window[512], float *samples_out, - float raXin[32], float scale); - -av_cold void ff_dcadsp_init_arm(DCADSPContext *s) -{ - int cpu_flags = av_get_cpu_flags(); - - if (have_vfp_vm(cpu_flags)) { - s->lfe_fir[0] = ff_dca_lfe_fir32_vfp; - s->lfe_fir[1] = ff_dca_lfe_fir64_vfp; - s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp; - } - if (have_neon(cpu_flags)) { - s->lfe_fir[0] = ff_dca_lfe_fir0_neon; - s->lfe_fir[1] = ff_dca_lfe_fir1_neon; - } -} diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S deleted file mode 100644 index 101fee0884..0000000000 --- a/libavcodec/arm/dcadsp_neon.S +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2010 Mans Rullgard <mans@mansr.com> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/arm/asm.S" - -function ff_dca_lfe_fir0_neon, export=1 - push {r4-r6,lr} - mov r3, #32 @ decifactor - mov r6, #256/32 - b dca_lfe_fir -endfunc - -function ff_dca_lfe_fir1_neon, export=1 - push {r4-r6,lr} - mov r3, #64 @ decifactor - mov r6, #256/64 -dca_lfe_fir: - add r4, r0, r3, lsl #2 @ out2 - add r5, r2, #256*4-16 @ cf1 - sub r1, r1, #12 - mov lr, #-16 -1: - vmov.f32 q2, #0.0 @ v0 - vmov.f32 q3, #0.0 @ v1 - mov r12, r6 -2: - vld1.32 {q8}, [r2,:128]! @ cf0 - vld1.32 {q9}, [r5,:128], lr @ cf1 - vld1.32 {q1}, [r1], lr @ in - subs r12, r12, #4 - vrev64.32 q10, q8 - vmla.f32 q3, q1, q9 - vmla.f32 d4, d2, d21 - vmla.f32 d5, d3, d20 - bne 2b - - add r1, r1, r6, lsl #2 - subs r3, r3, #1 - vadd.f32 d4, d4, d5 - vadd.f32 d6, d6, d7 - vpadd.f32 d5, d4, d6 - vst1.32 {d5[0]}, [r0,:32]! - vst1.32 {d5[1]}, [r4,:32]! - bne 1b - - pop {r4-r6,pc} -endfunc diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S deleted file mode 100644 index 2e09f0ee5d..0000000000 --- a/libavcodec/arm/dcadsp_vfp.S +++ /dev/null @@ -1,476 +0,0 @@ -/* - * Copyright (c) 2013 RISC OS Open Ltd - * Author: Ben Avison <bavison@riscosopen.org> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/arm/asm.S" - -POUT .req a1 -PIN .req a2 -PCOEF .req a3 -OLDFPSCR .req a4 -COUNTER .req ip - -IN0 .req s4 -IN1 .req s5 -IN2 .req s6 -IN3 .req s7 -IN4 .req s0 -IN5 .req s1 -IN6 .req s2 -IN7 .req s3 -COEF0 .req s8 @ coefficient elements -COEF1 .req s9 -COEF2 .req s10 -COEF3 .req s11 -COEF4 .req s12 -COEF5 .req s13 -COEF6 .req s14 -COEF7 .req s15 -ACCUM0 .req s16 @ double-buffered multiply-accumulate results -ACCUM4 .req s20 -POST0 .req s24 @ do long-latency post-multiply in this vector in parallel -POST1 .req s25 -POST2 .req s26 -POST3 .req s27 - - -.macro inner_loop decifactor, dir, tail, head - .ifc "\dir","up" - .set X, 0 - .set Y, 4 - .else - .set X, 4*JMAX*4 - 4 - .set Y, -4 - .endif - .ifnc "\head","" - vldr COEF0, [PCOEF, #X + (0*JMAX + 0) * Y] - vldr COEF1, [PCOEF, #X + (1*JMAX + 0) * Y] - vldr COEF2, [PCOEF, #X + (2*JMAX + 0) * Y] - vldr COEF3, [PCOEF, #X + (3*JMAX + 0) * Y] - .endif - .ifnc "\tail","" - vadd.f POST0, ACCUM0, ACCUM4 @ vector operation - .endif - .ifnc "\head","" - vmul.f ACCUM0, COEF0, IN0 @ vector = vector * scalar - vldr COEF4, [PCOEF, #X + (0*JMAX + 1) * Y] - vldr COEF5, [PCOEF, #X + (1*JMAX + 1) * Y] - vldr COEF6, [PCOEF, #X + (2*JMAX + 1) * Y] - .endif - .ifnc "\head","" - vldr COEF7, [PCOEF, #X + (3*JMAX + 1) * Y] - .ifc "\tail","" - vmul.f ACCUM4, COEF4, IN1 @ vector operation - .endif - vldr COEF0, [PCOEF, #X + (0*JMAX + 2) * Y] - vldr COEF1, [PCOEF, #X + (1*JMAX + 2) * Y] - .ifnc "\tail","" - vmul.f ACCUM4, COEF4, IN1 @ vector operation - .endif - vldr COEF2, [PCOEF, #X + (2*JMAX + 2) * Y] - vldr COEF3, [PCOEF, #X + (3*JMAX + 2) * Y] - .endif - .ifnc "\tail","" - vstmia POUT!, {POST0-POST3} - .endif - .ifnc "\head","" - vmla.f ACCUM0, COEF0, IN2 @ vector = vector * scalar - vldr COEF4, [PCOEF, #X + (0*JMAX + 3) * Y] - vldr COEF5, [PCOEF, #X + (1*JMAX + 3) * Y] - vldr COEF6, [PCOEF, #X + (2*JMAX + 3) * Y] - vldr COEF7, [PCOEF, #X + (3*JMAX + 3) * Y] - vmla.f ACCUM4, COEF4, IN3 @ vector = vector * scalar - .if \decifactor == 32 - vldr COEF0, [PCOEF, #X + (0*JMAX + 4) * Y] - vldr COEF1, [PCOEF, #X + (1*JMAX + 4) * Y] - vldr COEF2, [PCOEF, #X + (2*JMAX + 4) * Y] - vldr COEF3, [PCOEF, #X + (3*JMAX + 4) * Y] - vmla.f ACCUM0, COEF0, IN4 @ vector = vector * scalar - vldr COEF4, [PCOEF, #X + (0*JMAX + 5) * Y] - vldr COEF5, [PCOEF, #X + (1*JMAX + 5) * Y] - vldr COEF6, [PCOEF, #X + (2*JMAX + 5) * Y] - vldr COEF7, [PCOEF, #X + (3*JMAX + 5) * Y] - vmla.f ACCUM4, COEF4, IN5 @ vector = vector * scalar - vldr COEF0, [PCOEF, #X + (0*JMAX + 6) * Y] - vldr COEF1, [PCOEF, #X + (1*JMAX + 6) * Y] - vldr COEF2, [PCOEF, #X + (2*JMAX + 6) * Y] - vldr COEF3, [PCOEF, #X + (3*JMAX + 6) * Y] - vmla.f ACCUM0, COEF0, IN6 @ vector = vector * scalar - vldr COEF4, [PCOEF, #X + (0*JMAX + 7) * Y] - vldr COEF5, [PCOEF, #X + (1*JMAX + 7) * Y] - vldr COEF6, [PCOEF, #X + (2*JMAX + 7) * Y] - vldr COEF7, [PCOEF, #X + (3*JMAX + 7) * Y] - vmla.f ACCUM4, COEF4, IN7 @ vector = vector * scalar - .endif - .endif -.endm - -.macro dca_lfe_fir decifactor -function ff_dca_lfe_fir\decifactor\()_vfp, export=1 - fmrx OLDFPSCR, FPSCR - ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 - fmxr FPSCR, ip - vldr IN0, [PIN, #-0*4] - vldr IN1, [PIN, #-1*4] - vldr IN2, [PIN, #-2*4] - vldr IN3, [PIN, #-3*4] - .if \decifactor == 32 - .set JMAX, 8 - vpush {s16-s31} - vldr IN4, [PIN, #-4*4] - vldr IN5, [PIN, #-5*4] - vldr IN6, [PIN, #-6*4] - vldr IN7, [PIN, #-7*4] - .else - .set JMAX, 4 - vpush {s16-s27} - .endif - - mov COUNTER, #\decifactor/4 - 1 - inner_loop \decifactor, up,, head -1: add PCOEF, PCOEF, #4*JMAX*4 - subs COUNTER, COUNTER, #1 - inner_loop \decifactor, up, tail, head - bne 1b - inner_loop \decifactor, up, tail - - mov COUNTER, #\decifactor/4 - 1 - inner_loop \decifactor, down,, head -1: sub PCOEF, PCOEF, #4*JMAX*4 - subs COUNTER, COUNTER, #1 - inner_loop \decifactor, down, tail, head - bne 1b - inner_loop \decifactor, down, tail - - .if \decifactor == 32 - vpop {s16-s31} - .else - vpop {s16-s27} - .endif - fmxr FPSCR, OLDFPSCR - bx lr -endfunc -.endm - - dca_lfe_fir 64 - .ltorg - dca_lfe_fir 32 - - .unreq POUT - .unreq PIN - .unreq PCOEF - .unreq OLDFPSCR - .unreq COUNTER - - .unreq IN0 - .unreq IN1 - .unreq IN2 - .unreq IN3 - .unreq IN4 - .unreq IN5 - .unreq IN6 - .unreq IN7 - .unreq COEF0 - .unreq COEF1 - .unreq COEF2 - .unreq COEF3 - .unreq COEF4 - .unreq COEF5 - .unreq COEF6 - .unreq COEF7 - .unreq ACCUM0 - .unreq ACCUM4 - .unreq POST0 - .unreq POST1 - .unreq POST2 - .unreq POST3 - - -IN .req a1 -SBACT .req a2 -OLDFPSCR .req a3 -IMDCT .req a4 -WINDOW .req v1 -OUT .req v2 -BUF .req v3 -SCALEINT .req v4 @ only used in softfp case -COUNT .req v5 - -SCALE .req s0 - -/* Stack layout differs in softfp and hardfp cases: - * - * hardfp - * fp -> 6 arg words saved by caller - * a3,a4,v1-v3,v5,fp,lr on entry (a3 just to pad to 8 bytes) - * s16-s23 on entry - * align 16 - * buf -> 8*32*4 bytes buffer - * s0 on entry - * sp -> 3 arg words for callee - * - * softfp - * fp -> 7 arg words saved by caller - * a4,v1-v5,fp,lr on entry - * s16-s23 on entry - * align 16 - * buf -> 8*32*4 bytes buffer - * sp -> 4 arg words for callee - */ - -/* void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act, - * SynthFilterContext *synth, FFTContext *imdct, - * float (*synth_buf_ptr)[512], - * int *synth_buf_offset, float (*synth_buf2)[32], - * const float (*window)[512], float *samples_out, - * float (*raXin)[32], float scale); - */ -function ff_dca_qmf_32_subbands_vfp, export=1 -VFP push {a3-a4,v1-v3,v5,fp,lr} -NOVFP push {a4,v1-v5,fp,lr} - add fp, sp, #8*4 - vpush {s16-s23} - @ The buffer pointed at by raXin isn't big enough for us to do a - @ complete matrix transposition as we want to, so allocate an - @ alternative buffer from the stack. Align to 4 words for speed. - sub BUF, sp, #8*32*4 - bic BUF, BUF, #15 - mov sp, BUF - ldr lr, =0x03330000 @ RunFast mode, short vectors of length 4, stride 2 - fmrx OLDFPSCR, FPSCR - fmxr FPSCR, lr - @ COUNT is used to count down 2 things at once: - @ bits 0-4 are the number of word pairs remaining in the output row - @ bits 5-31 are the number of words to copy (with possible negation) - @ from the source matrix before we start zeroing the remainder - mov COUNT, #(-4 << 5) + 16 - adds COUNT, COUNT, SBACT, lsl #5 - bmi 2f -1: - vldr s8, [IN, #(0*8+0)*4] - vldr s10, [IN, #(0*8+1)*4] - vldr s12, [IN, #(0*8+2)*4] - vldr s14, [IN, #(0*8+3)*4] - vldr s16, [IN, #(0*8+4)*4] - vldr s18, [IN, #(0*8+5)*4] - vldr s20, [IN, #(0*8+6)*4] - vldr s22, [IN, #(0*8+7)*4] - vneg.f s8, s8 - vldr s9, [IN, #(1*8+0)*4] - vldr s11, [IN, #(1*8+1)*4] - vldr s13, [IN, #(1*8+2)*4] - vldr s15, [IN, #(1*8+3)*4] - vneg.f s16, s16 - vldr s17, [IN, #(1*8+4)*4] - vldr s19, [IN, #(1*8+5)*4] - vldr s21, [IN, #(1*8+6)*4] - vldr s23, [IN, #(1*8+7)*4] - vstr d4, [BUF, #(0*32+0)*4] - vstr d5, [BUF, #(1*32+0)*4] - vstr d6, [BUF, #(2*32+0)*4] - vstr d7, [BUF, #(3*32+0)*4] - vstr d8, [BUF, #(4*32+0)*4] - vstr d9, [BUF, #(5*32+0)*4] - vstr d10, [BUF, #(6*32+0)*4] - vstr d11, [BUF, #(7*32+0)*4] - vldr s9, [IN, #(3*8+0)*4] - vldr s11, [IN, #(3*8+1)*4] - vldr s13, [IN, #(3*8+2)*4] - vldr s15, [IN, #(3*8+3)*4] - vldr s17, [IN, #(3*8+4)*4] - vldr s19, [IN, #(3*8+5)*4] - vldr s21, [IN, #(3*8+6)*4] - vldr s23, [IN, #(3*8+7)*4] - vneg.f s9, s9 - vldr s8, [IN, #(2*8+0)*4] - vldr s10, [IN, #(2*8+1)*4] - vldr s12, [IN, #(2*8+2)*4] - vldr s14, [IN, #(2*8+3)*4] - vneg.f s17, s17 - vldr s16, [IN, #(2*8+4)*4] - vldr s18, [IN, #(2*8+5)*4] - vldr s20, [IN, #(2*8+6)*4] - vldr s22, [IN, #(2*8+7)*4] - vstr d4, [BUF, #(0*32+2)*4] - vstr d5, [BUF, #(1*32+2)*4] - vstr d6, [BUF, #(2*32+2)*4] - vstr d7, [BUF, #(3*32+2)*4] - vstr d8, [BUF, #(4*32+2)*4] - vstr d9, [BUF, #(5*32+2)*4] - vstr d10, [BUF, #(6*32+2)*4] - vstr d11, [BUF, #(7*32+2)*4] - add IN, IN, #4*8*4 - add BUF, BUF, #4*4 - subs COUNT, COUNT, #(4 << 5) + 2 - bpl 1b -2: @ Now deal with trailing < 4 samples - adds COUNT, COUNT, #3 << 5 - bmi 4f @ sb_act was a multiple of 4 - bics lr, COUNT, #0x1F - bne 3f - @ sb_act was n*4+1 - vldr s8, [IN, #(0*8+0)*4] - vldr s10, [IN, #(0*8+1)*4] - vldr s12, [IN, #(0*8+2)*4] - vldr s14, [IN, #(0*8+3)*4] - vldr s16, [IN, #(0*8+4)*4] - vldr s18, [IN, #(0*8+5)*4] - vldr s20, [IN, #(0*8+6)*4] - vldr s22, [IN, #(0*8+7)*4] - vneg.f s8, s8 - vldr s9, zero - vldr s11, zero - vldr s13, zero - vldr s15, zero - vneg.f s16, s16 - vldr s17, zero - vldr s19, zero - vldr s21, zero - vldr s23, zero - vstr d4, [BUF, #(0*32+0)*4] - vstr d5, [BUF, #(1*32+0)*4] - vstr d6, [BUF, #(2*32+0)*4] - vstr d7, [BUF, #(3*32+0)*4] - vstr d8, [BUF, #(4*32+0)*4] - vstr d9, [BUF, #(5*32+0)*4] - vstr d10, [BUF, #(6*32+0)*4] - vstr d11, [BUF, #(7*32+0)*4] - add BUF, BUF, #2*4 - sub COUNT, COUNT, #1 - b 4f -3: @ sb_act was n*4+2 or n*4+3, so do the first 2 - vldr s8, [IN, #(0*8+0)*4] - vldr s10, [IN, #(0*8+1)*4] - vldr s12, [IN, #(0*8+2)*4] - vldr s14, [IN, #(0*8+3)*4] - vldr s16, [IN, #(0*8+4)*4] - vldr s18, [IN, #(0*8+5)*4] - vldr s20, [IN, #(0*8+6)*4] - vldr s22, [IN, #(0*8+7)*4] - vneg.f s8, s8 - vldr s9, [IN, #(1*8+0)*4] - vldr s11, [IN, #(1*8+1)*4] - vldr s13, [IN, #(1*8+2)*4] - vldr s15, [IN, #(1*8+3)*4] - vneg.f s16, s16 - vldr s17, [IN, #(1*8+4)*4] - vldr s19, [IN, #(1*8+5)*4] - vldr s21, [IN, #(1*8+6)*4] - vldr s23, [IN, #(1*8+7)*4] - vstr d4, [BUF, #(0*32+0)*4] - vstr d5, [BUF, #(1*32+0)*4] - vstr d6, [BUF, #(2*32+0)*4] - vstr d7, [BUF, #(3*32+0)*4] - vstr d8, [BUF, #(4*32+0)*4] - vstr d9, [BUF, #(5*32+0)*4] - vstr d10, [BUF, #(6*32+0)*4] - vstr d11, [BUF, #(7*32+0)*4] - add BUF, BUF, #2*4 - sub COUNT, COUNT, #(2 << 5) + 1 - bics lr, COUNT, #0x1F - bne 4f - @ sb_act was n*4+3 - vldr s8, [IN, #(2*8+0)*4] - vldr s10, [IN, #(2*8+1)*4] - vldr s12, [IN, #(2*8+2)*4] - vldr s14, [IN, #(2*8+3)*4] - vldr s16, [IN, #(2*8+4)*4] - vldr s18, [IN, #(2*8+5)*4] - vldr s20, [IN, #(2*8+6)*4] - vldr s22, [IN, #(2*8+7)*4] - vldr s9, zero - vldr s11, zero - vldr s13, zero - vldr s15, zero - vldr s17, zero - vldr s19, zero - vldr s21, zero - vldr s23, zero - vstr d4, [BUF, #(0*32+0)*4] - vstr d5, [BUF, #(1*32+0)*4] - vstr d6, [BUF, #(2*32+0)*4] - vstr d7, [BUF, #(3*32+0)*4] - vstr d8, [BUF, #(4*32+0)*4] - vstr d9, [BUF, #(5*32+0)*4] - vstr d10, [BUF, #(6*32+0)*4] - vstr d11, [BUF, #(7*32+0)*4] - add BUF, BUF, #2*4 - sub COUNT, COUNT, #1 -4: @ Now fill the remainder with 0 - vldr s8, zero - vldr s9, zero - ands COUNT, COUNT, #0x1F - beq 6f -5: vstr d4, [BUF, #(0*32+0)*4] - vstr d4, [BUF, #(1*32+0)*4] - vstr d4, [BUF, #(2*32+0)*4] - vstr d4, [BUF, #(3*32+0)*4] - vstr d4, [BUF, #(4*32+0)*4] - vstr d4, [BUF, #(5*32+0)*4] - vstr d4, [BUF, #(6*32+0)*4] - vstr d4, [BUF, #(7*32+0)*4] - add BUF, BUF, #2*4 - subs COUNT, COUNT, #1 - bne 5b -6: - fmxr FPSCR, OLDFPSCR - ldr WINDOW, [fp, #3*4] - ldr OUT, [fp, #4*4] - sub BUF, BUF, #32*4 -NOVFP ldr SCALEINT, [fp, #6*4] - mov COUNT, #8 -VFP vpush {SCALE} -VFP sub sp, sp, #3*4 -NOVFP sub sp, sp, #4*4 -7: -VFP ldr a1, [fp, #-7*4] @ imdct -NOVFP ldr a1, [fp, #-8*4] - ldmia fp, {a2-a4} -VFP stmia sp, {WINDOW, OUT, BUF} -NOVFP stmia sp, {WINDOW, OUT, BUF, SCALEINT} -VFP vldr SCALE, [sp, #3*4] - bl X(ff_synth_filter_float_vfp) - add OUT, OUT, #32*4 - add BUF, BUF, #32*4 - subs COUNT, COUNT, #1 - bne 7b - -A sub sp, fp, #(8+8)*4 -T sub fp, fp, #(8+8)*4 -T mov sp, fp - vpop {s16-s23} -VFP pop {a3-a4,v1-v3,v5,fp,pc} -NOVFP pop {a4,v1-v5,fp,pc} -endfunc - - .unreq IN - .unreq SBACT - .unreq OLDFPSCR - .unreq IMDCT - .unreq WINDOW - .unreq OUT - .unreq BUF - .unreq SCALEINT - .unreq COUNT - - .unreq SCALE - - .align 2 -zero: .word 0 |