From e3fcb14347466095839c2a3c47ebecff02da891e Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Fri, 24 Jan 2014 11:55:16 +0100 Subject: dsputil: Split off IDCT bits into their own context --- libavcodec/arm/idctdsp_arm.S | 120 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 libavcodec/arm/idctdsp_arm.S (limited to 'libavcodec/arm/idctdsp_arm.S') diff --git a/libavcodec/arm/idctdsp_arm.S b/libavcodec/arm/idctdsp_arm.S new file mode 100644 index 0000000000..34f467e86f --- /dev/null +++ b/libavcodec/arm/idctdsp_arm.S @@ -0,0 +1,120 @@ +@ +@ ARMv4-optimized IDCT functions +@ Copyright (c) 2004 AGAWA Koji +@ +@ This file is part of Libav. +@ +@ Libav is free software; you can redistribute it and/or +@ modify it under the terms of the GNU Lesser General Public +@ License as published by the Free Software Foundation; either +@ version 2.1 of the License, or (at your option) any later version. +@ +@ Libav is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY; without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +@ Lesser General Public License for more details. +@ +@ You should have received a copy of the GNU Lesser General Public +@ License along with Libav; if not, write to the Free Software +@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +@ + +#include "config.h" +#include "libavutil/arm/asm.S" + +@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride) +function ff_add_pixels_clamped_arm, export=1, align=5 + push {r4-r10} + mov r10, #8 +1: + ldr r4, [r1] /* load dest */ + /* block[0] and block[1]*/ + ldrsh r5, [r0] + ldrsh r7, [r0, #2] + and r6, r4, #0xFF + and r8, r4, #0xFF00 + add r6, r6, r5 + add r8, r7, r8, lsr #8 + mvn r5, r5 + mvn r7, r7 + tst r6, #0x100 + it ne + movne r6, r5, lsr #24 + tst r8, #0x100 + it ne + movne r8, r7, lsr #24 + mov r9, r6 + ldrsh r5, [r0, #4] /* moved form [A] */ + orr r9, r9, r8, lsl #8 + /* block[2] and block[3] */ + /* [A] */ + ldrsh r7, [r0, #6] + and r6, r4, #0xFF0000 + and r8, r4, #0xFF000000 + add r6, r5, r6, lsr #16 + add r8, r7, r8, lsr #24 + mvn r5, r5 + mvn r7, r7 + tst r6, #0x100 + it ne + movne r6, r5, lsr #24 + tst r8, #0x100 + it ne + movne r8, r7, lsr #24 + orr r9, r9, r6, lsl #16 + ldr r4, [r1, #4] /* moved form [B] */ + orr r9, r9, r8, lsl #24 + /* store dest */ + ldrsh r5, [r0, #8] /* moved form [C] */ + str r9, [r1] + + /* load dest */ + /* [B] */ + /* block[4] and block[5] */ + /* [C] */ + ldrsh r7, [r0, #10] + and r6, r4, #0xFF + and r8, r4, #0xFF00 + add r6, r6, r5 + add r8, r7, r8, lsr #8 + mvn r5, r5 + mvn r7, r7 + tst r6, #0x100 + it ne + movne r6, r5, lsr #24 + tst r8, #0x100 + it ne + movne r8, r7, lsr #24 + mov r9, r6 + ldrsh r5, [r0, #12] /* moved from [D] */ + orr r9, r9, r8, lsl #8 + /* block[6] and block[7] */ + /* [D] */ + ldrsh r7, [r0, #14] + and r6, r4, #0xFF0000 + and r8, r4, #0xFF000000 + add r6, r5, r6, lsr #16 + add r8, r7, r8, lsr #24 + mvn r5, r5 + mvn r7, r7 + tst r6, #0x100 + it ne + movne r6, r5, lsr #24 + tst r8, #0x100 + it ne + movne r8, r7, lsr #24 + orr r9, r9, r6, lsl #16 + add r0, r0, #16 /* moved from [E] */ + orr r9, r9, r8, lsl #24 + subs r10, r10, #1 /* moved from [F] */ + /* store dest */ + str r9, [r1, #4] + + /* [E] */ + /* [F] */ + add r1, r1, r2 + bne 1b + + pop {r4-r10} + bx lr +endfunc -- cgit v1.2.1