summaryrefslogtreecommitdiff
path: root/libavutil/x86
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-04-16 20:15:35 -0300
committerMichael Niedermayer <michaelni@gmx.at>2014-04-17 14:15:09 +0200
commit76ed71a72bffb45027923e4da5f6fc6a97bfb218 (patch)
tree975fdcf1b1c1165b9eed695c45b5efc942a294d4 /libavutil/x86
parent443261cbbdaac2eaba5fada318fa596bd5ab3e4a (diff)
downloadffmpeg-76ed71a72bffb45027923e4da5f6fc6a97bfb218.tar.gz
x86: move horizontal add macros to x86util
Also port relevant AVX2/XOP optimizations from x264 with permission to relicense to LGPL from the corresponding authors Signed-off-by: James Almer <jamrial@gmail.com> Reviewed-by: "Ronald S. Bultje" <rsbultje@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/x86')
-rw-r--r--libavutil/x86/x86util.asm33
1 files changed, 33 insertions, 0 deletions
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index df58cadf63..67d7905132 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -273,6 +273,39 @@
%endif
%endmacro
+%macro HADDD 2 ; sum junk
+%if sizeof%1 == 32
+%define %2 xmm%2
+ vextracti128 %2, %1, 1
+%define %1 xmm%1
+ paddd %1, %2
+%endif
+%if mmsize >= 16
+%if cpuflag(xop) && sizeof%1 == 16
+ vphadddq %1, %1
+%endif
+ movhlps %2, %1
+ paddd %1, %2
+%endif
+%if notcpuflag(xop) || sizeof%1 != 16
+ PSHUFLW %2, %1, q0032
+ paddd %1, %2
+%endif
+%undef %1
+%undef %2
+%endmacro
+
+%macro HADDW 2 ; reg, tmp
+%if cpuflag(xop) && sizeof%1 == 16
+ vphaddwq %1, %1
+ movhlps %2, %1
+ paddd %1, %2
+%else
+ pmaddwd %1, [pw_1]
+ HADDD %1, %2
+%endif
+%endmacro
+
%macro PALIGNR 4-5
%if cpuflag(ssse3)
%if %0==5