summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2017-02-05 22:55:20 +0200
committerMartin Storsjö <martin@martin.st>2017-02-09 12:31:53 +0200
commit47b3c2c18d1897f3c753ba0cec4b2d7aa24526af (patch)
treea13cb9c5b58916e5ec3fc06122fd0ec826ce78f9
parent115476018d2c97df7e9b4445fe8f6cc7420ab91f (diff)
downloadffmpeg-47b3c2c18d1897f3c753ba0cec4b2d7aa24526af.tar.gz
arm: vp9itxfm: Move the load_add_store macro out from the itxfm16 pass2 function
This allows reusing the macro for a separate implementation of the pass2 function. Signed-off-by: Martin Storsjö <martin@martin.st>
-rw-r--r--libavcodec/arm/vp9itxfm_neon.S72
1 files changed, 36 insertions, 36 deletions
diff --git a/libavcodec/arm/vp9itxfm_neon.S b/libavcodec/arm/vp9itxfm_neon.S
index fd53a20a73..b3188bc711 100644
--- a/libavcodec/arm/vp9itxfm_neon.S
+++ b/libavcodec/arm/vp9itxfm_neon.S
@@ -657,6 +657,42 @@ function iadst16
bx lr
endfunc
+.macro load_add_store coef0, coef1, coef2, coef3
+ vrshr.s16 \coef0, \coef0, #6
+ vrshr.s16 \coef1, \coef1, #6
+
+ vld1.32 {d4[]}, [r0,:32], r1
+ vld1.32 {d4[1]}, [r3,:32], r1
+ vrshr.s16 \coef2, \coef2, #6
+ vrshr.s16 \coef3, \coef3, #6
+ vld1.32 {d5[]}, [r0,:32], r1
+ vld1.32 {d5[1]}, [r3,:32], r1
+ vaddw.u8 \coef0, \coef0, d4
+ vld1.32 {d6[]}, [r0,:32], r1
+ vld1.32 {d6[1]}, [r3,:32], r1
+ vaddw.u8 \coef1, \coef1, d5
+ vld1.32 {d7[]}, [r0,:32], r1
+ vld1.32 {d7[1]}, [r3,:32], r1
+
+ vqmovun.s16 d4, \coef0
+ vqmovun.s16 d5, \coef1
+ sub r0, r0, r1, lsl #2
+ sub r3, r3, r1, lsl #2
+ vaddw.u8 \coef2, \coef2, d6
+ vaddw.u8 \coef3, \coef3, d7
+ vst1.32 {d4[0]}, [r0,:32], r1
+ vst1.32 {d4[1]}, [r3,:32], r1
+ vqmovun.s16 d6, \coef2
+ vst1.32 {d5[0]}, [r0,:32], r1
+ vst1.32 {d5[1]}, [r3,:32], r1
+ vqmovun.s16 d7, \coef3
+
+ vst1.32 {d6[0]}, [r0,:32], r1
+ vst1.32 {d6[1]}, [r3,:32], r1
+ vst1.32 {d7[0]}, [r0,:32], r1
+ vst1.32 {d7[1]}, [r3,:32], r1
+.endm
+
.macro itxfm16_1d_funcs txfm
@ Read a vertical 4x16 slice out of a 16x16 matrix, do a transform on it,
@ transpose into a horizontal 16x4 slice and store.
@@ -739,44 +775,8 @@ function \txfm\()16_1d_4x16_pass2_neon
lsl r1, r1, #1
bl \txfm\()16
-.macro load_add_store coef0, coef1, coef2, coef3
- vrshr.s16 \coef0, \coef0, #6
- vrshr.s16 \coef1, \coef1, #6
-
- vld1.32 {d4[]}, [r0,:32], r1
- vld1.32 {d4[1]}, [r3,:32], r1
- vrshr.s16 \coef2, \coef2, #6
- vrshr.s16 \coef3, \coef3, #6
- vld1.32 {d5[]}, [r0,:32], r1
- vld1.32 {d5[1]}, [r3,:32], r1
- vaddw.u8 \coef0, \coef0, d4
- vld1.32 {d6[]}, [r0,:32], r1
- vld1.32 {d6[1]}, [r3,:32], r1
- vaddw.u8 \coef1, \coef1, d5
- vld1.32 {d7[]}, [r0,:32], r1
- vld1.32 {d7[1]}, [r3,:32], r1
-
- vqmovun.s16 d4, \coef0
- vqmovun.s16 d5, \coef1
- sub r0, r0, r1, lsl #2
- sub r3, r3, r1, lsl #2
- vaddw.u8 \coef2, \coef2, d6
- vaddw.u8 \coef3, \coef3, d7
- vst1.32 {d4[0]}, [r0,:32], r1
- vst1.32 {d4[1]}, [r3,:32], r1
- vqmovun.s16 d6, \coef2
- vst1.32 {d5[0]}, [r0,:32], r1
- vst1.32 {d5[1]}, [r3,:32], r1
- vqmovun.s16 d7, \coef3
-
- vst1.32 {d6[0]}, [r0,:32], r1
- vst1.32 {d6[1]}, [r3,:32], r1
- vst1.32 {d7[0]}, [r0,:32], r1
- vst1.32 {d7[1]}, [r3,:32], r1
-.endm
load_add_store q8, q9, q10, q11
load_add_store q12, q13, q14, q15
-.purgem load_add_store
pop {pc}
endfunc