diff options
author | James Almer <jamrial@gmail.com> | 2014-04-16 20:15:35 -0300 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-04-17 14:15:09 +0200 |
commit | 76ed71a72bffb45027923e4da5f6fc6a97bfb218 (patch) | |
tree | 975fdcf1b1c1165b9eed695c45b5efc942a294d4 /libavutil/x86 | |
parent | 443261cbbdaac2eaba5fada318fa596bd5ab3e4a (diff) | |
download | ffmpeg-76ed71a72bffb45027923e4da5f6fc6a97bfb218.tar.gz |
x86: move horizontal add macros to x86util
Also port relevant AVX2/XOP optimizations from x264 with permission
to relicense to LGPL from the corresponding authors
Signed-off-by: James Almer <jamrial@gmail.com>
Reviewed-by: "Ronald S. Bultje" <rsbultje@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/x86')
-rw-r--r-- | libavutil/x86/x86util.asm | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index df58cadf63..67d7905132 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -273,6 +273,39 @@ %endif %endmacro +%macro HADDD 2 ; sum junk +%if sizeof%1 == 32 +%define %2 xmm%2 + vextracti128 %2, %1, 1 +%define %1 xmm%1 + paddd %1, %2 +%endif +%if mmsize >= 16 +%if cpuflag(xop) && sizeof%1 == 16 + vphadddq %1, %1 +%endif + movhlps %2, %1 + paddd %1, %2 +%endif +%if notcpuflag(xop) || sizeof%1 != 16 + PSHUFLW %2, %1, q0032 + paddd %1, %2 +%endif +%undef %1 +%undef %2 +%endmacro + +%macro HADDW 2 ; reg, tmp +%if cpuflag(xop) && sizeof%1 == 16 + vphaddwq %1, %1 + movhlps %2, %1 + paddd %1, %2 +%else + pmaddwd %1, [pw_1] + HADDD %1, %2 +%endif +%endmacro + %macro PALIGNR 4-5 %if cpuflag(ssse3) %if %0==5 |