diff options
author | Derek Buitenhuis <derek.buitenhuis@gmail.com> | 2016-04-24 12:51:34 +0100 |
---|---|---|
committer | Derek Buitenhuis <derek.buitenhuis@gmail.com> | 2016-04-24 12:51:42 +0100 |
commit | 87b8e9500874930667ac966ea2fabdd6222ef6e0 (patch) | |
tree | 96c5df610b7b75913666b263d6de7ea0d1ee0957 /libavcodec/aarch64 | |
parent | 4fe4c5c3761a92fdaf8b3bbb21c00fb40b08f156 (diff) | |
parent | cdb1665f70def544ddab3e3ed3763ef99c8b3873 (diff) | |
download | ffmpeg-87b8e9500874930667ac966ea2fabdd6222ef6e0.tar.gz |
Merge commit 'cdb1665f70def544ddab3e3ed3763ef99c8b3873'
* commit 'cdb1665f70def544ddab3e3ed3763ef99c8b3873':
aarch64: Make transpose_4x4H do a regular transpose
Merged-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
Diffstat (limited to 'libavcodec/aarch64')
-rw-r--r-- | libavcodec/aarch64/h264idct_neon.S | 24 | ||||
-rw-r--r-- | libavcodec/aarch64/neon.S | 12 |
2 files changed, 18 insertions, 18 deletions
diff --git a/libavcodec/aarch64/h264idct_neon.S b/libavcodec/aarch64/h264idct_neon.S index 91f1e773c4..fa414f73b2 100644 --- a/libavcodec/aarch64/h264idct_neon.S +++ b/libavcodec/aarch64/h264idct_neon.S @@ -33,25 +33,25 @@ function ff_h264_idct_add_neon, export=1 sshr v17.4H, v3.4H, #1 st1 {v30.8H}, [x1], #16 sub v5.4H, v0.4H, v2.4H - add v6.4H, v1.4H, v17.4H - sub v7.4H, v16.4H, v3.4H - add v0.4H, v4.4H, v6.4H - add v1.4H, v5.4H, v7.4H - sub v3.4H, v4.4H, v6.4H - sub v2.4H, v5.4H, v7.4H + sub v6.4H, v16.4H, v3.4H + add v7.4H, v1.4H, v17.4H + add v0.4H, v4.4H, v7.4H + add v1.4H, v5.4H, v6.4H + sub v2.4H, v5.4H, v6.4H + sub v3.4H, v4.4H, v7.4H transpose_4x4H v0, v1, v2, v3, v4, v5, v6, v7 - add v4.4H, v0.4H, v3.4H + add v4.4H, v0.4H, v2.4H ld1 {v18.S}[0], [x0], x2 - sshr v16.4H, v2.4H, #1 + sshr v16.4H, v3.4H, #1 sshr v17.4H, v1.4H, #1 - ld1 {v19.S}[1], [x0], x2 - sub v5.4H, v0.4H, v3.4H ld1 {v18.S}[1], [x0], x2 + sub v5.4H, v0.4H, v2.4H + ld1 {v19.S}[1], [x0], x2 add v6.4H, v16.4H, v1.4H ins v4.D[1], v5.D[0] - sub v7.4H, v2.4H, v17.4H + sub v7.4H, v17.4H, v3.4H ld1 {v19.S}[0], [x0], x2 ins v6.D[1], v7.D[0] sub x0, x0, x2, lsl #2 @@ -68,8 +68,8 @@ function ff_h264_idct_add_neon, export=1 sqxtun v1.8B, v1.8H st1 {v0.S}[0], [x0], x2 - st1 {v1.S}[1], [x0], x2 st1 {v0.S}[1], [x0], x2 + st1 {v1.S}[1], [x0], x2 st1 {v1.S}[0], [x0], x2 sub x1, x1, #32 diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S index a227cbd3f6..0fddbecae3 100644 --- a/libavcodec/aarch64/neon.S +++ b/libavcodec/aarch64/neon.S @@ -107,12 +107,12 @@ .macro transpose_4x4H r0, r1, r2, r3, r4, r5, r6, r7 trn1 \r4\().4H, \r0\().4H, \r1\().4H trn2 \r5\().4H, \r0\().4H, \r1\().4H - trn1 \r7\().4H, \r2\().4H, \r3\().4H - trn2 \r6\().4H, \r2\().4H, \r3\().4H - trn1 \r0\().2S, \r4\().2S, \r7\().2S - trn2 \r3\().2S, \r4\().2S, \r7\().2S - trn1 \r1\().2S, \r5\().2S, \r6\().2S - trn2 \r2\().2S, \r5\().2S, \r6\().2S + trn1 \r6\().4H, \r2\().4H, \r3\().4H + trn2 \r7\().4H, \r2\().4H, \r3\().4H + trn1 \r0\().2S, \r4\().2S, \r6\().2S + trn2 \r2\().2S, \r4\().2S, \r6\().2S + trn1 \r1\().2S, \r5\().2S, \r7\().2S + trn2 \r3\().2S, \r5\().2S, \r7\().2S .endm .macro transpose_8x8H r0, r1, r2, r3, r4, r5, r6, r7, r8, r9 |