diff options
author | Martin Storsjö <martin@martin.st> | 2017-02-05 22:55:20 +0200 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2017-02-09 12:31:53 +0200 |
commit | 47b3c2c18d1897f3c753ba0cec4b2d7aa24526af (patch) | |
tree | a13cb9c5b58916e5ec3fc06122fd0ec826ce78f9 | |
parent | 115476018d2c97df7e9b4445fe8f6cc7420ab91f (diff) | |
download | ffmpeg-47b3c2c18d1897f3c753ba0cec4b2d7aa24526af.tar.gz |
arm: vp9itxfm: Move the load_add_store macro out from the itxfm16 pass2 function
This allows reusing the macro for a separate implementation of the
pass2 function.
Signed-off-by: Martin Storsjö <martin@martin.st>
-rw-r--r-- | libavcodec/arm/vp9itxfm_neon.S | 72 |
1 files changed, 36 insertions, 36 deletions
diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S index fd53a20a73..b3188bc711 100644 --- a/libavcodec/arm/vp9itxfm_neon.S +++ b/libavcodec/arm/vp9itxfm_neon.S @@ -657,6 +657,42 @@ function iadst16 bx lr endfunc +.macro load_add_store coef0, coef1, coef2, coef3 + vrshr.s16 \coef0, \coef0, #6 + vrshr.s16 \coef1, \coef1, #6 + + vld1.32 {d4[]}, [r0,:32], r1 + vld1.32 {d4[1]}, [r3,:32], r1 + vrshr.s16 \coef2, \coef2, #6 + vrshr.s16 \coef3, \coef3, #6 + vld1.32 {d5[]}, [r0,:32], r1 + vld1.32 {d5[1]}, [r3,:32], r1 + vaddw.u8 \coef0, \coef0, d4 + vld1.32 {d6[]}, [r0,:32], r1 + vld1.32 {d6[1]}, [r3,:32], r1 + vaddw.u8 \coef1, \coef1, d5 + vld1.32 {d7[]}, [r0,:32], r1 + vld1.32 {d7[1]}, [r3,:32], r1 + + vqmovun.s16 d4, \coef0 + vqmovun.s16 d5, \coef1 + sub r0, r0, r1, lsl #2 + sub r3, r3, r1, lsl #2 + vaddw.u8 \coef2, \coef2, d6 + vaddw.u8 \coef3, \coef3, d7 + vst1.32 {d4[0]}, [r0,:32], r1 + vst1.32 {d4[1]}, [r3,:32], r1 + vqmovun.s16 d6, \coef2 + vst1.32 {d5[0]}, [r0,:32], r1 + vst1.32 {d5[1]}, [r3,:32], r1 + vqmovun.s16 d7, \coef3 + + vst1.32 {d6[0]}, [r0,:32], r1 + vst1.32 {d6[1]}, [r3,:32], r1 + vst1.32 {d7[0]}, [r0,:32], r1 + vst1.32 {d7[1]}, [r3,:32], r1 +.endm + .macro itxfm16_1d_funcs txfm @ Read a vertical 4x16 slice out of a 16x16 matrix, do a transform on it, @ transpose into a horizontal 16x4 slice and store. @@ -739,44 +775,8 @@ function \txfm\()16_1d_4x16_pass2_neon lsl r1, r1, #1 bl \txfm\()16 -.macro load_add_store coef0, coef1, coef2, coef3 - vrshr.s16 \coef0, \coef0, #6 - vrshr.s16 \coef1, \coef1, #6 - - vld1.32 {d4[]}, [r0,:32], r1 - vld1.32 {d4[1]}, [r3,:32], r1 - vrshr.s16 \coef2, \coef2, #6 - vrshr.s16 \coef3, \coef3, #6 - vld1.32 {d5[]}, [r0,:32], r1 - vld1.32 {d5[1]}, [r3,:32], r1 - vaddw.u8 \coef0, \coef0, d4 - vld1.32 {d6[]}, [r0,:32], r1 - vld1.32 {d6[1]}, [r3,:32], r1 - vaddw.u8 \coef1, \coef1, d5 - vld1.32 {d7[]}, [r0,:32], r1 - vld1.32 {d7[1]}, [r3,:32], r1 - - vqmovun.s16 d4, \coef0 - vqmovun.s16 d5, \coef1 - sub r0, r0, r1, lsl #2 - sub r3, r3, r1, lsl #2 - vaddw.u8 \coef2, \coef2, d6 - vaddw.u8 \coef3, \coef3, d7 - vst1.32 {d4[0]}, [r0,:32], r1 - vst1.32 {d4[1]}, [r3,:32], r1 - vqmovun.s16 d6, \coef2 - vst1.32 {d5[0]}, [r0,:32], r1 - vst1.32 {d5[1]}, [r3,:32], r1 - vqmovun.s16 d7, \coef3 - - vst1.32 {d6[0]}, [r0,:32], r1 - vst1.32 {d6[1]}, [r3,:32], r1 - vst1.32 {d7[0]}, [r0,:32], r1 - vst1.32 {d7[1]}, [r3,:32], r1 -.endm load_add_store q8, q9, q10, q11 load_add_store q12, q13, q14, q15 -.purgem load_add_store pop {pc} endfunc |